Coretex
model.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import Any, Dict, Union, Optional
19 from typing_extensions import Self, override
20 from datetime import datetime
21 from zipfile import ZipFile
22 from pathlib import Path
23 
24 import json
25 import logging
26 
27 from ..tag import Taggable, EntityTagType
28 from ..utils import isEntityNameValid
29 from ..._folder_manager import folder_manager
30 from ...networking import networkManager, NetworkObject, ChunkUploadSession, MAX_CHUNK_SIZE, NetworkRequestError
31 from ...codable import KeyDescriptor
32 
33 
34 class Model(NetworkObject, Taggable):
35 
36  """
37  Represents a machine learning model object on Coretex.ai
38  Contains properties that describe the model
39 
40  Properties
41  ----------
42  name : str
43  model name
44  createdById : str
45  id of model
46  createdOn : datetime
47  date of model creation
48  datasetId : int
49  dataset id that is used for training the model
50  projectId : int
51  project id that is used for training the model
52  taskId : int
53  task id that is used for training the model
54  isTrained : bool
55  True if model is trained, False otherwise
56  isDeleted : bool
57  True if model is deleted, False otherwise
58  accuracy : float
59  model accuracy
60  taskRunId : int
61  TaskRun id of trained model
62  meta : Dict[str, Any]
63  model meta data
64  """
65 
66  name: str
67  createdById: str
68  createdOn: datetime
69  datasetId: int
70  projectId: int
71  taskId: int
72  isTrained: bool
73  isDeleted: bool
74  accuracy: float
75  taskRunId: int
76  meta: Dict[str, Any]
77 
78  @property
79  def path(self) -> Path:
80  return folder_manager.modelsFolder / str(self.id)
81 
82  @property
83  def zipPath(self) -> Path:
84  return self.pathpath.with_suffix(".zip")
85 
86  @property
87  def entityTagType(self) -> EntityTagType:
88  return EntityTagType.model
89 
90  @classmethod
91  def modelDescriptorFileName(cls) -> str:
92  """
93  Returns
94  -------
95  str -> name of model descriptor file
96  """
97 
98  return "model_descriptor.json"
99 
100  @classmethod
101  def _keyDescriptors(cls) -> Dict[str, KeyDescriptor]:
102  descriptors = super()._keyDescriptors()
103  descriptors["taskRunId"] = KeyDescriptor("model_queue_id")
104 
105  return descriptors
106 
107  @classmethod
109  cls,
110  name: str,
111  projectId: int,
112  accuracy: float,
113  meta: Optional[Dict[str, Any]] = None
114  ) -> Self:
115 
116  """
117  Creates Model as a result of TaskRun
118 
119  Parameters
120  ----------
121  name : str
122  model name
123  projectId : int
124  Project to which the Model will be added
125  accuracy : float
126  model accuracy
127  meta : Optional[Dict[str, Any]]
128  model metadata
129 
130  Returns
131  -------
132  Self -> Model object
133 
134  Raises
135  -------
136  NetworkRequestError -> If model creation failed
137 
138  Example
139  -------
140  >>> from coretex import Model, currentTaskRun
141  >>> model = Model.createModel("model-name", currentTaskRun().id, 0.87)
142  """
143 
144  if not isEntityNameValid(name):
145  raise ValueError(">> [Coretex] Model name is invalid. Requirements: alphanumeric characters (\"a-z\", and \"0-9\") and dash (\"-\") with length between 3 to 50")
146 
147  if accuracy < 0:
148  logging.getLogger("coretexpylib").warning(f">> [Coretex] Invalid value for accuracy: ({accuracy} < 0), clipping to 0.")
149 
150  if accuracy > 1:
151  logging.getLogger("coretexpylib").warning(f">> [Coretex] Invalid value for accuracy: ({accuracy} > 1), clipping to 1.")
152 
153  accuracy = max(0, min(accuracy, 1))
154 
155  if meta is None:
156  meta = {}
157 
158  return cls.create(
159  name = name,
160  project_id = projectId,
161  accuracy = accuracy,
162  meta = meta
163  )
164 
165  @classmethod
166  def saveModelDescriptor(cls, path: Union[Path, str], contents: Dict[str, Any]) -> None:
167  """
168  Saves a model descriptor - a JSON file that provides a description of a
169  machine learning model. It includes information such as the model's
170  architecture, input and output shapes, labels, description and etc.
171 
172  Parameters
173  ----------
174  path : Union[Path, str]
175  path to where the model descriptor will be saved
176  contents : Dict[str, Any]
177  key-value pairs which will be stored as json
178 
179  Example
180  -------
181  >>> from coretex import currentTaskRun, Model
182  >>> model = Model.createModel("model-name", currentTaskRun().id, accuracy)
183  >>> model.saveModelDescriptor(modelPath, {
184  "project_task": currentTaskRun().projectType,
185  "labels": labels,
186  "modelName": model.name,
187  "description": currentTaskRun().description,
188 
189  "input_description":
190  Input shape is [x, y]
191 
192  x is actually number of samples in dataset\n
193  y represents number of unique taxons for selected level in dataset,
194 
195  "input_shape": [x, y],
196 
197  "output_description":
198  Output shape - [x, z]
199 
200  x is actually number of samples in dataset\n
201  z represents that output 2d array (table) is going to have only 1 column (1 prediction for each sample in dataset),
202 
203  "output_shape": [x, z]
204  })
205  """
206 
207  if isinstance(path, str):
208  path = Path(path)
209 
210  modelDescriptorPath = path / cls.modelDescriptorFileNamemodelDescriptorFileName()
211 
212  with modelDescriptorPath.open("w", encoding = "utf-8") as file:
213  json.dump(contents, file, ensure_ascii = False, indent = 4)
214 
215  @override
216  def entityUrl(self) -> str:
217  return f'model-item?id={self.id}'
218 
219  def download(self, path: Optional[Path] = None, ignoreCache: bool = False) -> None:
220  """
221  Downloads and extracts the model zip file from Coretex.ai
222  """
223 
224  if path is None:
225  path = self.pathpath
226 
227  if self.isDeleted or not self.isTrained:
228  return
229 
230  if path.exists() and not ignoreCache:
231  return
232 
233  modelZip = path.with_suffix(".zip")
234  response = networkManager.download(f"{self._endpoint()}/download", modelZip, {
235  "id": self.id
236  })
237 
238  if response.hasFailed():
239  raise NetworkRequestError(response, "Failed to download Model")
240 
241  with ZipFile(modelZip) as zipFile:
242  zipFile.extractall(path)
243 
244  def upload(self, path: Union[Path, str]) -> None:
245  """
246  Uploads the provided model folder as zip file to Coretex.ai
247 
248  Parameters
249  ----------
250  path : Union[Path, str]
251  Path to the model directory
252 
253  Raises
254  -------
255  ValueError -> if provided path is not a directory
256  NetworkRequestError -> if Model upload failed
257 
258  Example
259  -------
260  >>> from coretex import Model, currentTaskRun
261  >>> model = Model.createModel("model-name", currentTaskRun().id, 0.87)
262  >>> model.upload("path/to/model-dir")
263  """
264 
265  if isinstance(path, str):
266  path = Path(path)
267 
268  if not path.is_dir():
269  raise ValueError("\"path\" must be a directory")
270 
271  zipPath = path.with_suffix(".zip")
272  with ZipFile(zipPath, "w") as zipFile:
273  for value in path.rglob("*"):
274  if not value.is_file():
275  continue
276 
277  zipFile.write(value, value.relative_to(path))
278 
279  uploadSession = ChunkUploadSession(MAX_CHUNK_SIZE, zipPath)
280  uploadId = uploadSession.run()
281 
282  parameters = {
283  "id": self.id,
284  "file_id": uploadId
285  }
286 
287  response = networkManager.formData("model/upload", parameters)
288  if response.hasFailed():
289  raise NetworkRequestError(response, "Failed to upload model")
Self createModel(cls, str name, int projectId, float accuracy, Optional[Dict[str, Any]] meta=None)
Definition: model.py:114
None saveModelDescriptor(cls, Union[Path, str] path, Dict[str, Any] contents)
Definition: model.py:166
None download(self, Optional[Path] path=None, bool ignoreCache=False)
Definition: model.py:219
None upload(self, Union[Path, str] path)
Definition: model.py:244