Coretex
utils.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import Type, Any, Optional, List, Generator
19 from pathlib import Path
20 from zipfile import ZipFile, ZIP_DEFLATED
21 
22 import logging
23 
24 import git
25 
26 from ..dataset import Dataset, LocalDataset, NetworkDataset, LocalCustomDataset, \
27  CustomDataset, LocalImageDataset, ImageDataset
28 from ..project import ProjectType
29 from ..._folder_manager import folder_manager
30 
31 
32 def getDatasetType(type_: ProjectType, isLocal: bool) -> Type[Dataset]:
33  if type_ == ProjectType.other:
34  if isLocal:
35  return LocalCustomDataset
36 
37  return CustomDataset
38 
39  if type_ in [ProjectType.computerVision, ProjectType.imageSegmentation]:
40  if isLocal:
41  return LocalImageDataset
42 
43  return ImageDataset
44 
45  logging.getLogger("coretexpylib").debug(f">> [Coretex] ProjectType ({type_}) does not have a dataset type using CustomDataset")
46 
47  # Returning CustomDataset in case the type_ doesn't have it's dataset type
48  if isLocal:
49  return LocalCustomDataset
50 
51  return CustomDataset
52 
53 
54 def fetchDataset(datasetType: Type[Dataset], value: Any) -> Optional[Dataset]:
55  if issubclass(datasetType, LocalDataset):
56  return datasetType(value) # type: ignore
57 
58  if issubclass(datasetType, NetworkDataset):
59  return datasetType.fetchById(value)
60 
61  return None
62 
63 
64 def getSnapshotFiles(dirPath: Path, ignoredFiles: List[str]) -> List[Path]:
65  snapshotFiles: List[Path] = []
66 
67  if dirPath.joinpath(".coretexignore").exists():
68  return []
69 
70  for path in dirPath.iterdir():
71  if path.is_dir():
72  snapshotFiles.extend(getSnapshotFiles(path, ignoredFiles))
73  elif str(path) not in ignoredFiles:
74  snapshotFiles.append(path)
75 
76  return snapshotFiles
77 
78 
79 def getDefaultEntryPoint() -> Optional[str]:
80  for defaultEntryPoint in [Path(".", "main.py"), Path(".", "main.r"), Path(".", "main.R")]:
81  if defaultEntryPoint.exists():
82  return defaultEntryPoint.name
83 
84  return None
85 
86 
87 def chunks(lst: List, n: int) -> Generator[List, None, None]:
88  for i in range(0, len(lst), n):
89  yield lst[i:i + n]
90 
91 
92 def createSnapshot() -> Path:
93  entryPoint = getDefaultEntryPoint()
94  if entryPoint is None or not Path(".", entryPoint).exists():
95  raise FileNotFoundError(">> [Coretex] Entry point file not found")
96 
97  ignoredFiles: List[str] = []
98 
99  snapshotPath = folder_manager.temp / "snapshot.zip"
100  with ZipFile(snapshotPath, "w", ZIP_DEFLATED) as snapshotArchive:
101  repo = git.Repo(Path.cwd(), search_parent_directories = True)
102  for paths in chunks(list(Path.cwd().rglob("*")), 256):
103  ignoredFiles.extend(repo.ignored(*paths))
104 
105  if not Path(entryPoint).exists() or not Path("requirements.txt").exists():
106  raise FileNotFoundError(f">> [Coretex] Required files \"{entryPoint}\" and \"requirements.txt\"")
107 
108  for path in getSnapshotFiles(Path.cwd(), ignoredFiles):
109  snapshotArchive.write(path.relative_to(Path.cwd()))
110 
111  return snapshotPath