18 from typing
import List
19 from pathlib
import Path
20 from zipfile
import ZipFile
26 from ..._folder_manager
import folder_manager
27 from ...entities
import TaskRun, CustomSample, CustomDataset
28 from ...networking
import NetworkRequestError
31 def createSample(name: str, dataset: CustomDataset, path: Path, taskRun: TaskRun, stepName: str, retryCount: int = 0) -> CustomSample:
33 sample = dataset.add(path, name)
34 except NetworkRequestError:
36 logging.info(f
">> [Coretex] Retry count: {retryCount}")
37 return createSample(name, dataset, path, taskRun, stepName, retryCount + 1)
41 taskRun.createQiimeArtifact(f
"{stepName}/{name}", path)
46 def compressGzip(source: Path, destination: Path, deleteSource: bool =
False) ->
None:
47 logging.info(f
"{source} -> {destination}")
49 with gzip.open(destination,
"w")
as destinationFile:
50 destinationFile.write(source.read_bytes())
56 def sampleNumber(sample: CustomSample) -> int:
57 return int(sample.name.split(
"-")[0])
60 def isFastqMPSample(sample: CustomSample) -> bool:
63 sequenceFileNames = [
"forward.fastq",
"forward.fastq.gz",
"sequences.fastq",
"sequences.fastq.gz"]
64 barcodesFileNames = [
"barcodes.fastq",
"barcodes.fastq.gz"]
66 folderContent = list(sample.load().folderContent)
68 sequenceFilePresent = any([path.name
in sequenceFileNames
for path
in folderContent])
69 barcodesFilePresent = any([path.name
in barcodesFileNames
for path
in folderContent])
71 return sequenceFilePresent
and barcodesFilePresent
74 def isFastqDPSample(sample: CustomSample) -> bool:
77 return any([path.suffix ==
".fastq" for path
in sample.load().folderContent])
80 def isImportedSample(sample: CustomSample) -> bool:
83 sampleContent = [path.name
for path
in sample.load().folderContent]
84 return "multiplexed-sequences.qza" in sampleContent
87 def isDemultiplexedSample(sample: CustomSample) -> bool:
90 sampleContent = [path.name
for path
in sample.load().folderContent]
91 return "demux.qza" in sampleContent
94 def isDenoisedSample(sample: CustomSample) -> bool:
97 sampleContent = [path.name
for path
in sample.load().folderContent]
99 "table.qza" in sampleContent
and
100 "rep-seqs.qza" in sampleContent
and
101 "stats.qza" in sampleContent
105 def isPhylogeneticTreeSample(sample: CustomSample) -> bool:
108 sampleContent = [path.name
for path
in sample.load().folderContent]
110 "rooted-tree.qza" in sampleContent
and
111 "unrooted-tree.qza" in sampleContent
and
112 "aligned-rep-seqs.qza" in sampleContent
and
113 "masked-aligned-rep-seqs.qza" in sampleContent
117 def getFastqMPSamples(dataset: CustomDataset) -> List[CustomSample]:
119 return dataset.getSamples(isFastqMPSample)
122 def getFastqDPSamples(dataset: CustomDataset) -> List[CustomSample]:
124 return dataset.getSamples(isFastqDPSample)
127 def getImportedSamples(dataset: CustomDataset) -> List[CustomSample]:
128 return dataset.getSamples(isImportedSample)
131 def getDemuxSamples(dataset: CustomDataset) -> List[CustomSample]:
132 return dataset.getSamples(isDemultiplexedSample)
135 def getDenoisedSamples(dataset: CustomDataset) -> List[CustomSample]:
136 return dataset.getSamples(isDenoisedSample)
139 def getPhylogeneticTreeSamples(dataset: CustomDataset) -> List[CustomSample]:
140 return dataset.getSamples(isPhylogeneticTreeSample)
143 def getMetadata(sample: CustomSample) -> Path:
144 metadataPathList = list(sample.path.glob(
"*.tsv"))
145 if len(metadataPathList) != 1:
146 raise RuntimeError(f
">> [Coretex] Metadata sample must contain one .tsv file. Found {len(metadataPathList)}")
148 return metadataPathList[0]
151 def isPairedEnd(sample: CustomSample) -> bool:
157 sampleTemp = folder_manager.createTempFolder(
"qzaSample")
158 qzaPath = list(sample.path.iterdir())[0]
160 with ZipFile(qzaPath,
"r")
as qzaFile:
161 qzaFile.extractall(sampleTemp)
163 metadataPath = list(sampleTemp.rglob(
"*metadata.yaml"))[0]
165 with metadataPath.open(
"r")
as metadata:
166 pairedEnd =
"PairedEnd" in metadata.readlines()[1]
168 shutil.rmtree(sampleTemp)