Coretex
sequence_sample.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import Union
19 from pathlib import Path
20 
21 from .local_sequence_sample import LocalSequenceSample
22 from ..network_sample import NetworkSample
23 
24 
26 
27  """
28  Represents the local custom Sample class
29  which is used for working with Other Task locally
30  """
31 
32  def __init__(self) -> None:
33  NetworkSample.__init__(self)
34 
35  @classmethod
36  def isValidSequenceFile(cls, path: Union[Path, str]) -> bool:
37  """
38  Checks whether the file is a valid sequence file or not.
39  File is a valid sequence file if it ends with any of these extensions:
40  - .fasta
41  - .fastq
42  - .fa
43  - .fq
44  """
45 
46  if not isinstance(path, Path):
47  path = Path(path)
48 
49  supportedExtensions = cls.supportedExtensionssupportedExtensions()
50  supportedExtensions.extend([f"{extension}.gz" for extension in cls.supportedExtensionssupportedExtensions()])
51 
52  return any(path.name.endswith(extension) for extension in supportedExtensions)