Coretex
local_dataset.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import TypeVar, Generic, Type, Generator, Optional, Union, Any
19 from pathlib import Path
20 
21 import logging
22 import zipfile
23 
24 from .dataset import Dataset
25 from ..sample import LocalSample, AnyLocalSample
26 
27 
28 SampleType = TypeVar("SampleType", bound = "LocalSample")
29 SampleGenerator = Generator[SampleType, None, None]
30 
31 
32 def _generateZippedSamples(path: Path, sampleClass: Type[SampleType]) -> Generator[SampleType, None, None]:
33  for samplePath in path.glob("*"):
34  if not zipfile.is_zipfile(samplePath):
35  continue
36 
37  yield sampleClass(samplePath)
38 
39 
40 class LocalDataset(Generic[SampleType], Dataset[SampleType]):
41 
42  """
43  Represents the generic Local Dataset class for all
44  LocalDataset classes \n
45  Used for working with local datasets
46 
47  Properties
48  ----------
49  path : Path
50  local path of dataset
51  sampleClass : Type[SampleType]
52  class of sample
53  generator : Optional[SampleGenerator]
54  sample generator
55  """
56 
57  def __init__(self, path: Path, sampleClass: Type[SampleType], generator: Optional[SampleGenerator] = None) -> None:
58  if generator is None:
59  generator = _generateZippedSamples(path, sampleClass)
60 
61  self.__path__path = path
62  self.__sampleClass__sampleClass = sampleClass
63 
64  self.namenamename = path.stem
65  self.samplessamples = list(generator)
66 
67  @staticmethod
68  def default(path: Path) -> 'LocalDataset':
69  """
70  Creates Local Dataset object
71 
72  Parameters
73  ----------
74  path : Path
75  Local Dataset path
76 
77  Returns
78  -------
79  LocalDataset -> Local Dataset object
80  """
81 
82  return LocalDataset(path, LocalSample)
83 
84  @staticmethod
85  def custom(path: Path, generator: SampleGenerator) -> 'LocalDataset':
86  """
87  Creates Custom Local Dataset object
88 
89  Parameters
90  ----------
91  path : Path
92  Local Dataset path
93  generator : SampleGenerator
94  sample generator
95 
96  Returns
97  -------
98  LocalDataset -> Local Dataset object
99  """
100 
101  return LocalDataset(path, AnyLocalSample, generator)
102 
103  @property
104  def path(self) -> Path:
105  """
106  Returns
107  -------
108  Path -> Local Dataset path
109  """
110 
111  return self.__path__path
112 
113  def download(self, decrypt: bool = True, ignoreCache: bool = False) -> None:
114  logging.getLogger("coretexpylib").warning(">> [Coretex] Local dataset cannot be downloaded")
115 
116  def add(self, samplePath: Union[Path, str], sampleName: Optional[str] = None, **metadata: Any) -> SampleType:
117  if isinstance(samplePath, str):
118  samplePath = Path(samplePath)
119 
120  sample = self.__sampleClass__sampleClass(samplePath)
121  self.samplessamples.append(sample)
122 
123  return sample
'LocalDataset' custom(Path path, SampleGenerator generator)