Coretex
dataset.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import Optional, TypeVar, Generic, List, Callable
19 from abc import ABC, abstractmethod
20 from pathlib import Path
21 
22 from ..sample import Sample
23 from ..utils import isEntityNameValid
24 
25 
26 SampleType = TypeVar("SampleType", bound = "Sample")
27 
28 
29 class Dataset(ABC, Generic[SampleType]):
30 
31  """
32  Represents the generic class Dataset
33  Includes methods that can be used by any instance of Dataset
34  and abstract methods that must be implemented by any subclass
35 
36  Properties
37  ----------
38  name : str
39  name of dataset
40  samples : List[SampleType]
41  list of samples
42  """
43 
44  name: str
45  samples: List[SampleType]
46 
47  @property
48  def count(self) -> int:
49  """
50  Returns
51  -------
52  int -> number of samples in this dataset
53  """
54 
55  return len(self.samples)
56 
57  @property
58  @abstractmethod
59  def path(self) -> Path:
60  pass
61 
62  @abstractmethod
63  def download(self, decrypt: bool = True, ignoreCache: bool = False) -> None:
64  pass
65 
66  def rename(self, name: str) -> bool:
67  """
68  Renames the dataset, if the provided name is
69  different from the current name
70 
71  Parameters
72  ----------
73  name : str
74  new dataset name
75 
76  Returns
77  -------
78  bool -> True if dataset was renamed, False if dataset was not renamed
79  """
80 
81  if not isEntityNameValid(name):
82  raise ValueError(">> [Coretex] Dataset name is invalid. Requirements: alphanumeric characters (\"a-z\", and \"0-9\") and dash (\"-\") with length between 3 to 50")
83 
84  if self.namename == name:
85  return False
86 
87  self.namename = name
88  return True
89 
90  def getSample(self, name: str) -> Optional[SampleType]:
91  """
92  Retrieves sample which matches the provided name
93 
94  Parameters
95  ----------
96  name : str
97  name of sample
98 
99  Returns
100  -------
101  Optional[SampleType] -> sample object
102  """
103 
104  for sample in self.samples:
105  # startswith must be used since if we import sample
106  # with the same name twice, the second one will have
107  # suffix with it's serial number
108  if sample.name.startswith(name):
109  return sample
110 
111  return None
112 
113  def getSamples(self, filterFunc: Callable[[SampleType], bool]) -> List[SampleType]:
114  filteredSamples: List[SampleType] = []
115 
116  for sample in self.samples:
117  if filterFunc(sample):
118  filteredSamples.append(sample)
119 
120  return filteredSamples
Optional[SampleType] getSample(self, str name)
Definition: dataset.py:90