Coretex
All Classes Functions
network_sample.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import TypeVar, Generic, Dict, Any, List
19 from typing_extensions import override
20 from datetime import datetime
21 from pathlib import Path
22 
23 import os
24 import time
25 import shutil
26 
27 from .sample import Sample
28 from ..project import ProjectType
29 from ..._folder_manager import folder_manager
30 from ...codable import KeyDescriptor
31 from ...networking import NetworkObject, networkManager, NetworkRequestError, \
32  fileChunkUpload, MAX_CHUNK_SIZE, FileData
33 from ...utils import TIME_ZONE
34 from ...cryptography import getProjectKey, aes
35 
36 
37 SampleDataType = TypeVar("SampleDataType")
38 
39 
40 def _relinkSample(samplePath: Path) -> None:
41  for datasetPath in folder_manager.datasetsFolder.iterdir():
42  linkPath = datasetPath / samplePath.name
43  if not linkPath.exists():
44  continue
45 
46  linkPath.unlink()
47  os.link(samplePath, linkPath)
48 
49 
50 class NetworkSample(Generic[SampleDataType], Sample[SampleDataType], NetworkObject):
51 
52  """
53  Represents a base class for all Sample classes which are
54  comunicating with Coretex.ai
55  """
56 
57  isLocked: bool
58  projectId: int
59  projectType: ProjectType
60  lastModified: datetime
61  isEncrypted: bool
62 
63  @property
64  def path(self) -> Path:
65  """
66  Returns
67  -------
68  Path -> path for network sample
69  """
70 
71  return folder_manager.samplesFolder / str(self.id)
72 
73  @property
74  def zipPath(self) -> Path:
75  """
76  Returns
77  -------
78  Path -> zip path for network sample
79  """
80 
81  return self.pathpathpath.with_suffix(".zip")
82 
83  @property
84  def downloadPath(self) -> Path:
85  """
86  Returns
87  -------
88  Path -> path to which the network sample is downloaded to
89  """
90 
91  return self.pathpathpath.with_suffix(".bin") if self.isEncrypted else self.zipPathzipPathzipPath
92 
93  @classmethod
94  def _keyDescriptors(cls) -> Dict[str, KeyDescriptor]:
95  descriptors = super()._keyDescriptors()
96 
97  descriptors["projectType"] = KeyDescriptor("project_task", ProjectType)
98  descriptors["lastModified"] = KeyDescriptor("storage_last_modified", datetime)
99 
100  return descriptors
101 
102  @classmethod
103  def _endpoint(cls) -> str:
104  return "session"
105 
106  def modifiedSinceLastDownload(self) -> bool:
107  """
108  Checking if sample has been modified since last download, if the sample is already
109  stored locally
110 
111  Returns
112  -------
113  bool -> False if sample has not changed since last download, True otherwise
114 
115  Raises
116  ------
117  FileNotFoundError -> sample file cannot be found
118  """
119 
120  if not self.downloadPathdownloadPath.exists():
121  raise FileNotFoundError(
122  f">> [Coretex] Sample file could not be found at {self.downloadPath}. "
123  "Cannot check if file has been modified since last download"
124  )
125 
126  lastModified = datetime.fromtimestamp(self.downloadPathdownloadPath.stat().st_mtime).astimezone(TIME_ZONE)
127  return self.lastModified > lastModified
128 
129  def decrypt(self, ignoreCache: bool = False) -> None:
130  """
131  Decrypts the content of this Sample and caches
132  the results. Is ignored if the "isEncrypted" value is False.
133 
134  Parameters
135  ----------
136  ignoreCache : bool
137  defines if content should be decrypted if a cache for decryption
138  already exists
139  """
140 
141  if not self.isEncrypted:
142  return
143 
144  if ignoreCache and self.zipPathzipPathzipPath.exists():
145  self.zipPathzipPathzipPath.unlink()
146 
147  if not ignoreCache and self.zipPathzipPathzipPath.exists():
148  return
149 
150  # Decrypt sample
151  aes.decryptFile(getProjectKey(self.projectId), self.downloadPathdownloadPath, self.zipPathzipPathzipPath)
152 
153  # Relink sample to all datasets to which it belongs
154  _relinkSample(self.zipPathzipPathzipPath)
155 
156  def _download(self, ignoreCache: bool = False) -> None:
157  if self.downloadPathdownloadPath.exists() and self.modifiedSinceLastDownloadmodifiedSinceLastDownload():
158  ignoreCache = True
159 
160  if ignoreCache:
161  # Delete downloadPath file
162  self.downloadPathdownloadPath.unlink(missing_ok = True)
163 
164  # If the downloadPath exists at this point do not redownload
165  if self.downloadPathdownloadPath.exists():
166  return
167 
168  params = {
169  "id": self.id
170  }
171 
172  response = networkManager.download(f"{self._endpoint()}/export", self.downloadPathdownloadPath, params)
173  if response.hasFailed():
174  raise NetworkRequestError(response, f"Failed to download Sample \"{self.name}\"")
175 
176  if not response.isHead():
177  if self.isEncrypted:
178  # Delete the zipPath file, if Sample is encrypted
179  # downloadPath != zipPath, otherwise they point to same file
180  self.zipPathzipPathzipPath.unlink(missing_ok = True)
181 
182  if self.pathpathpath.exists():
183  # Delete the unzipped folder
184  shutil.rmtree(self.pathpathpath)
185 
186  @override
187  def download(self, decrypt: bool = True, ignoreCache: bool = False) -> None:
188  """
189  Downloads and optionally decrypts sample from Coretex.ai
190 
191  Raises
192  ------
193  NetworkRequestError -> if some kind of error happened during
194  the download process
195  """
196 
197  if decrypt and not self.isEncrypted:
198  # Change to false if sample is not encrypted
199  decrypt = False
200 
201  # Download the sample
202  self._download_download(ignoreCache)
203 
204  if decrypt:
205  # Decrypt the sample
206  self.decryptdecrypt(ignoreCache)
207 
208  # Update sample download time to now
209  os.utime(self.downloadPathdownloadPath, (os.stat(self.downloadPathdownloadPath).st_atime, time.time()))
210 
211  # If sample was downloaded succesfully relink it to datasets to which it is linked
212  _relinkSample(self.downloadPathdownloadPath)
213 
214  @override
215  def unzip(self, ignoreCache: bool = False) -> None:
216  if not self.downloadPathdownloadPath.exists():
217  raise RuntimeError("You must first download the Sample before you can unzip it")
218 
219  if not self.zipPathzipPathzipPath.exists() and self.isEncrypted:
220  raise RuntimeError("You must first decrypt the Sample before you can unzip it")
221 
222  super().unzip(ignoreCache)
223 
224  @override
225  def load(self) -> SampleDataType:
226  return super().load() # type: ignore
227 
228  @override
229  def _updateArchive(self) -> None:
230  super()._updateArchive()
231 
232  if self.isEncrypted:
233  aes.encryptFile(getProjectKey(self.projectId), self.zipPathzipPathzipPath, self.downloadPathdownloadPath)
234 
235  def _overwriteSample(self, samplePath: Path) -> None:
236  if not self.isEncrypted:
237  raise RuntimeError("Only encrypted samples can be overwriten.")
238 
239  with folder_manager.tempFile() as encryptedPath:
240  aes.encryptFile(getProjectKey(self.projectId), samplePath, encryptedPath)
241 
242  params: Dict[str, Any] = {
243  "id": self.id
244  }
245 
246  files: List[FileData] = []
247 
248  # Use chunk upload if file is larger than MAX_CHUNK_SIZE
249  # Use normal upload if file is smaller than MAX_CHUNK_SIZE
250  size = encryptedPath.stat().st_size
251 
252  if size > MAX_CHUNK_SIZE:
253  params["file_id"] = fileChunkUpload(encryptedPath)
254  else:
255  files.append(FileData.createFromPath("file", encryptedPath))
256 
257  response = networkManager.formData(f"{self._endpoint()}/upload", params, files)
258  if response.hasFailed():
259  raise NetworkRequestError(response, f"Failed to overwrite Sample \"{self.name}\"")
None _download(self, bool ignoreCache=False)
None download(self, bool decrypt=True, bool ignoreCache=False)
None decrypt(self, bool ignoreCache=False)