18 from typing
import Optional, Union
19 from pathlib
import Path
23 from .utils
import compressGzip, createSample, getDemuxSamples, getDenoisedSamples, \
24 getFastqDPSamples, getFastqMPSamples, getImportedSamples, getMetadata, getPhylogeneticTreeSamples, \
25 isDemultiplexedSample, isDenoisedSample, isFastqDPSample, isFastqMPSample, \
26 isImportedSample, isPhylogeneticTreeSample, sampleNumber, isPairedEnd
28 from ...utils
import command
35 inputFormat: Optional[str] =
None
39 "qiime",
"tools",
"import",
40 "--type", sequenceType,
41 "--input-path", str(inputPath),
42 "--output-path", str(outputPath)
45 if inputFormat
is not None:
47 "--input-format" , inputFormat
57 perSampleSequences: str,
58 errorCorretctionDetailsPath: str
62 "qiime",
"demux",
"emp-single",
63 "--i-seqs", sequencesPath,
64 "--m-barcodes-file", barcodesPath,
65 "--m-barcodes-column", barcodesColumn,
66 "--o-per-sample-sequences", perSampleSequences,
67 "--o-error-correction-details", errorCorretctionDetailsPath,
73 sequencesPath: Union[str, Path],
74 barcodesPath: Union[str, Path],
76 perSampleSequences: Union[str, Path],
77 errorCorretctionDetailsPath: Union[str, Path]
80 Wrapper for QIIME's demux emp-paired command, which performs demultiplexing of multiplexed
81 sequences in the Earth Microbiome Project amplicon sequencing standard format
82 http://www.earthmicrobiome.org/protocols-and-standards/.
86 sequencesPath : Union[str, Path]
87 Path to the paired-end demultiplexed sequences to be denoised
88 barcodesPath : Union[str, Path]
89 Output path to the resulting feature sequences
90 perSampleSequences : Union[str, Path]
91 Output path to the resulting feature table
92 errorCorretctionDetailsPath : Union[str, Path]
93 Output path to the statistics of the denoising
96 if isinstance(sequencesPath, Path):
97 sequencesPath = str(sequencesPath)
99 if isinstance(barcodesPath, Path):
100 barcodesPath = str(barcodesPath)
102 if isinstance(perSampleSequences, Path):
103 perSampleSequences = str(perSampleSequences)
105 if isinstance(errorCorretctionDetailsPath, Path):
106 errorCorretctionDetailsPath = str(errorCorretctionDetailsPath)
109 "qiime",
"demux",
"emp-paired",
110 "--i-seqs", sequencesPath,
111 "--m-barcodes-file", barcodesPath,
112 "--m-barcodes-column", barcodesColumn,
113 "--o-per-sample-sequences", perSampleSequences,
114 "--o-error-correction-details", errorCorretctionDetailsPath,
119 def demuxSummarize(dataPath: str, visualizationPath: str) ->
None:
121 "qiime",
"demux",
"summarize",
122 "--i-data", dataPath,
123 "--o-visualization", visualizationPath
127 def dada2DenoiseSingle(
128 inputPath: Union[str, Path],
131 representativeSequencesPath: Union[str, Path],
132 tablePath: Union[str, Path],
133 denoisingStatsPath: Union[str, Path],
134 threads: Optional[int] = -1
137 Wrapper for QIIME's DADA2 denoise-single, which performs denoising on the input paired-end reads.
141 inputPath : Union[str, Path]
142 Path to the paired-end demultiplexed sequences to be denoised
144 Position at which read sequences should be trimmed due to low quality.
145 This trims the 5' end of the input sequences, which will be the bases that were
146 sequenced in the first cycles
148 Position at which read sequences should be truncated due to decrease in quality.
149 This truncates the 3' end of the input sequences, which will be the bases that
150 were sequenced in the last cycles. Reads that are shorter than this value will be discarded.
151 After this parameter is applied there must still be at least a 20 nucleotide overlap
152 between the forward and reverse reads. If 0 is provided, no truncation or length
153 filtering will be performed
154 representativeSequencesPath : Union[str, Path]
155 Output path to the resulting feature sequences. Each feature in the feature table will
156 be represented by exactly one sequence, and these sequences will be the
157 joined paired-end sequences
158 tablePath : Union[str, Path]
159 Output path to the resulting feature table
160 denoisingStatsPath : Union[str, Path]
161 Output path to the statistics of the denoising
162 threads : Optional[int]
163 Number of threads to use for the process. If None, multithreading will not be used.
164 Set to -1 (defaul) to use a thread for each CPU core
167 if isinstance(inputPath, Path):
168 inputPath = str(inputPath)
170 if isinstance(representativeSequencesPath, Path):
171 representativeSequencesPath = str(representativeSequencesPath)
173 if isinstance(tablePath, Path):
174 tablePath = str(tablePath)
176 if isinstance(denoisingStatsPath, Path):
177 denoisingStatsPath = str(denoisingStatsPath)
180 "qiime",
"dada2",
"denoise-single",
181 "--i-demultiplexed-seqs", inputPath,
182 "--p-trim-left", str(trimLeft),
183 "--p-trunc-len", str(truncLen),
184 "--o-representative-sequences", representativeSequencesPath,
185 "--o-table", tablePath,
186 "--o-denoising-stats", denoisingStatsPath
190 args.extend([
"--p-n-threads",
"0"])
191 elif threads
is not None:
192 args.extend([
"--p-n-threads", str()])
197 def dada2DenoisePaired(
198 inputPath: Union[str, Path],
203 representativeSequencesPath: Union[str, Path],
204 tablePath: Union[str, Path],
205 denoisingStatsPath: Union[str, Path],
206 threads: Optional[int] = -1
209 Wrapper for QIIME's DADA2 denoise-paired, which performs denoising on the input paired-end reads.
213 inputPath : Union[str, Path]
214 Path to the paired-end demultiplexed sequences to be denoised
216 Position at which forward read sequences should be trimmed due to low quality.
217 This trims the 5' end of the input sequences, which will be the bases that were
218 sequenced in the first cycles
220 Position at which reverse read sequences should be trimmed due to low quality.
221 This trims the 5' end of the input sequences, which will be the bases that were
222 sequenced in the first cycles
224 Position at which forward read sequences should be truncated due to decrease in quality.
225 This truncates the 3' end of the input sequences, which will be the bases that
226 were sequenced in the last cycles. Reads that are shorter than this value will be discarded.
227 After this parameter is applied there must still be at least a 20 nucleotide overlap
228 between the forward and reverse reads. If 0 is provided, no truncation or length
229 filtering will be performed
231 Position at which reverse read sequences should be truncated due to decrease in quality.
232 This truncates the 3' end of the input sequences, which will be the bases that
233 were sequenced in the last cycles. Reads that are shorter than this value will be discarded.
234 After this parameter is applied there must still be at least a 20 nucleotide overlap
235 between the forward and reverse reads. If 0 is provided, no truncation or length
236 filtering will be performed
237 representativeSequencesPath : Union[str, Path]
238 Output path to the resulting feature sequences. Each feature in the feature table will
239 be represented by exactly one sequence, and these sequences will be the
240 joined paired-end sequences
241 tablePath : Union[str, Path]
242 Output path to the resulting feature table
243 denoisingStatsPath : Union[str, Path]
244 Output path to the statistics of the denoising
245 threads : Optional[int]
246 Number of threads to use for the process. If None, multithreading will not be used.
247 Set to -1 (defaul) to use a thread for each CPU core
250 if isinstance(inputPath, Path):
251 inputPath = str(inputPath)
253 if isinstance(representativeSequencesPath, Path):
254 representativeSequencesPath = str(representativeSequencesPath)
256 if isinstance(tablePath, Path):
257 tablePath = str(tablePath)
259 if isinstance(denoisingStatsPath, Path):
260 denoisingStatsPath = str(denoisingStatsPath)
263 "qiime",
"dada2",
"denoise-paired",
264 "--i-demultiplexed-seqs", inputPath,
265 "--p-trim-left-f", str(trimLeftF),
266 "--p-trim-left-r", str(trimLeftR),
267 "--p-trunc-len-f", str(truncLenF),
268 "--p-trunc-len-r", str(truncLenR),
269 "--o-representative-sequences", representativeSequencesPath,
270 "--o-table", tablePath,
271 "--o-denoising-stats", denoisingStatsPath
275 args.extend([
"--p-n-threads",
"0"])
276 elif threads
is not None:
277 args.extend([
"--p-n-threads", str(threads)])
282 def metadataTabulate(inputFile: str, visualizationPath: str) ->
None:
284 "qiime",
"metadata",
"tabulate",
285 "--m-input-file", inputFile,
286 "--o-visualization", visualizationPath
290 def featureTableSummarize(inputPath: str, visualizationPath: str, metadataPath: str) ->
None:
292 "qiime",
"feature-table",
"summarize",
293 "--i-table", inputPath,
294 "--o-visualization", visualizationPath,
295 "--m-sample-metadata-file", metadataPath
299 def featureTableTabulateSeqs(inputPath: str, visualizationPath: str) ->
None:
301 "qiime",
"feature-table",
"tabulate-seqs",
302 "--i-data", inputPath,
303 "--o-visualization", visualizationPath
307 def phylogenyAlignToTreeMafftFasttree(
310 maskedAligmentPath: str,
311 unrootedTreePath: str,
313 threads: Optional[int] = -1
317 "qiime",
"phylogeny",
"align-to-tree-mafft-fasttree",
318 "--i-sequences", sequencesPath,
319 "--o-alignment", aligmentPath,
320 "--o-masked-alignment", maskedAligmentPath,
321 "--o-tree", unrootedTreePath,
322 "--o-rooted-tree", rootedTreePath
326 args.extend([
"--p-n-threads",
"auto"])
327 elif threads
is not None:
328 args.extend([
"--p-n-threads", str(threads)])
333 def diversityCoreMetricsPhylogenetic(
339 threads: Optional[int] = -1
343 "qiime",
"diversity",
"core-metrics-phylogenetic",
344 "--i-phylogeny", phlogenyPath,
345 "--i-table", tablePath,
346 "--p-sampling-depth", str(samplingDepth),
347 "--m-metadata-file", metadataPath,
348 "--output-dir", outputDir
352 args.extend([
"--p-n-jobs-or-threads",
"auto"])
353 elif threads
is not None:
354 args.extend([
"--p-n-jobs-or-threads", str(threads)])
359 def diversityAlphaGroupSignificance(
360 alphaDiversityPath: str,
362 visualizationPath: str
366 "qiime",
"diversity",
"alpha-group-significance",
367 "--i-alpha-diversity", alphaDiversityPath,
368 "--m-metadata-file", metadataPath,
369 "--o-visualization", visualizationPath
373 def diversityBetaGroupSignificance(
374 distanceMatrixPath: str,
377 visualizationPath: str,
382 "qiime",
"diversity",
"beta-group-significance",
383 "--i-distance-matrix", distanceMatrixPath,
384 "--m-metadata-file", metadataPath,
385 "--m-metadata-column", metadataColumn,
386 "--o-visualization", visualizationPath,
387 "--p-pairwise" if pairwise
else ""
395 visualizationPath: str
399 "qiime",
"emperor",
"plot",
400 "--i-pcoa", pcoaPath,
401 "--m-metadata-file", metadataPath,
402 "--p-custom-axes", customAxes,
403 "--o-visualization", visualizationPath
407 def diversityAlphaRarefaction(
412 visualizationPath: str
416 "qiime",
"diversity",
"alpha-rarefaction",
417 "--i-table", tablePath,
418 "--i-phylogeny", phylogenyPath,
419 "--p-max-depth", str(maxDepth),
420 "--m-metadata-file", metadataPath,
421 "--o-visualization", visualizationPath
425 def featureClassifierClassifySklearn(
428 classificationPath: str,
429 threads: Optional[int] = -1
433 "qiime",
"feature-classifier",
"classify-sklearn",
434 "--i-classifier", classifierPath,
435 "--i-reads", readsPath,
436 "--o-classification", classificationPath
440 args.extend([
"--p-n-jobs",
"-1"])
441 elif threads
is not None:
442 args.extend([
"--p-n-jobs", str(threads)])
451 visualizationPath: str
455 "qiime",
"taxa",
"barplot",
456 "--i-table", tablePath,
457 "--i-taxonomy", taxonomyPath,
458 "--m-metadata-file", metadataPath,
459 "--o-visualization", visualizationPath
463 def featureTableFilterSamples(
467 filteredTablePath: str
471 "qiime",
"feature-table",
"filter-samples",
472 "--i-table", tablePath,
473 "--m-metadata-file", metadataPath,
475 "--o-filtered-table", filteredTablePath
479 def compositionAddPseudocount(tablePath: str, compositionTablePath: str) ->
None:
481 "qiime",
"composition",
"add-pseudocount",
482 "--i-table", tablePath,
483 "--o-composition-table", compositionTablePath
487 def compositionAncom(
491 visualizationPath: str
495 "qiime",
"composition",
"ancom",
496 "--i-table", tablePath,
497 "--m-metadata-file", metadataPath,
498 "--m-metadata-column", metadataColumn,
499 "--o-visualization", visualizationPath
507 collapsedTablePath: str
511 "qiime",
"taxa",
"collapse",
512 "--i-table", tablePath,
513 "--i-taxonomy", taxonomyPath,
514 "--p-level", str(level),
515 "--o-collapsed-table", collapsedTablePath
518 def vsearchClusterDeNovo(
519 tablePath: Union[str, Path],
520 representativeSequencesPath: Union[str, Path],
522 clusteredTablePath: Union[str, Path],
523 clusteredSequencesPath: Union[str, Path],
524 threads: Optional[int] = -1
527 Wrapper for QIIME2's vsearch de novo clusteing command.
531 tablePath : Union[str, Path]
532 Path to the feature table generated by DADA2 denoise
533 representativeSequencesPath : Union[str, Path]
534 Path to the represenative sequences generated by DADA2 denoise
536 Percent identiy threshold for the OTU clustering
537 clusteredTablePath : Union[str, Path]
538 Path to the output clustered feature table
539 clusteredSequencesPath : Union[str, Path]
540 Path to the output clustered sequences
541 multithreading : bool
542 Number of threads to use for the process. If None, multithreading will not be used.
543 Set to -1 (defaul) to use a thread for each CPU core
546 if isinstance(tablePath, Path):
547 tablePath = str(tablePath)
549 if isinstance(representativeSequencesPath, Path):
550 representativeSequencesPath = str(representativeSequencesPath)
552 if isinstance(clusteredTablePath, Path):
553 clusteredTablePath = str(clusteredTablePath)
555 if isinstance(clusteredSequencesPath, Path):
556 clusteredSequencesPath = str(clusteredSequencesPath)
559 "qiime",
"vsearch",
"cluster-features-de-novo",
560 "--i-table", tablePath,
561 "--i-sequences", representativeSequencesPath,
562 "--p-perc-identity", str(percIdentity),
563 "--o-clustered-table", clusteredTablePath,
564 "--o-clustered-sequences", clusteredSequencesPath
568 args.extend([
"--p-threads",
"0"])
569 elif threads
is not None:
570 args.extend([
"--p-threads", str(threads)])
575 def vsearchClusterClosedReference(
576 tablePath: Union[str, Path],
577 representativeSequencesPath: Union[str, Path],
578 referenceSequencesPath: Union[str, Path],
580 clusteredTablePath: Union[str, Path],
581 clusteredSequencesPath: Union[str, Path],
582 unmatchedSequencesPath: Union[str, Path],
583 threads: Optional[int] = -1
586 Wrapper for QIIME2's vsearch closed reference clusteing command.
590 tablePath : Union[str, Path]
591 Path to the feature table generated by DADA2 denoise
592 representativeSequencesPath : Union[str, Path]
593 Path to the represenative sequences generated by DADA2 denoise
594 referenceSequencesPath : Union[str, Path]
595 Path to reference OTU sequences
597 Percent identiy threshold for the OTU clustering
598 clusteredTablePath : Union[str, Path]
599 Path to the output clustered feature table
600 clusteredSequencesPath : Union[str, Path]
601 Path to the output clustered sequences
602 unmatchedSequencesPath : Union[str, Path]
603 Path to the output unmatched sequences
604 multithreading : bool
605 Number of threads to use for the process. If None, multithreading will not be used.
606 Set to -1 (defaul) to use a thread for each CPU core
609 if isinstance(tablePath, Path):
610 tablePath = str(tablePath)
612 if isinstance(representativeSequencesPath, Path):
613 representativeSequencesPath = str(representativeSequencesPath)
615 if isinstance(referenceSequencesPath, Path):
616 referenceSequencesPath = str(referenceSequencesPath)
618 if isinstance(clusteredTablePath, Path):
619 clusteredTablePath = str(clusteredTablePath)
621 if isinstance(clusteredSequencesPath, Path):
622 clusteredSequencesPath = str(clusteredSequencesPath)
624 if isinstance(unmatchedSequencesPath, Path):
625 unmatchedSequencesPath = str(unmatchedSequencesPath)
628 "qiime",
"vsearch",
"cluster-features-closed-reference",
629 "--i-table", tablePath,
630 "--i-sequences", representativeSequencesPath,
631 "--i-reference-sequences", referenceSequencesPath,
632 "--p-perc-identity", str(percIdentity),
633 "--o-clustered-table", clusteredTablePath,
634 "--o-clustered-sequences", clusteredSequencesPath,
635 "--o-unmatched-sequences", unmatchedSequencesPath
639 args.extend([
"--p-threads",
"0"])
640 elif threads
is not None:
641 args.extend([
"--p-threads", str(threads)])
646 def vsearchClusterOpenReference(
647 tablePath: Union[str, Path],
648 representativeSequencesPath: Union[str, Path],
649 referenceSequencesPath: Union[str, Path],
651 clusteredTablePath: Union[str, Path],
652 clusteredSequencesPath: Union[str, Path],
653 newReferenceSequencesPath: Union[str, Path],
654 threads: Optional[int] = -1
657 Wrapper for QIIME2's vsearch open reference clusteing command.
661 tablePath : Union[str, Path]
662 Path to the feature table generated by DADA2 denoise
663 representativeSequencesPath : Union[str, Path]
664 Path to the represenative sequences generated by DADA2 denoise
665 referenceSequencesPath : Union[str, Path]
666 Path to reference OTU sequences
668 Percent identiy threshold for the OTU clustering
669 clusteredTablePath : Union[str, Path]
670 Path to the output clustered feature table
671 clusteredSequencesPath : Union[str, Path]
672 Path to the output clustered sequences
673 newReferenceSequencesPath : Union[str, Path]
674 Path to the output new reference sequences
675 multithreading : bool
676 Number of threads to use for the process. If None, multithreading will not be used.
677 Set to -1 (defaul) to use a thread for each CPU core
680 if isinstance(tablePath, Path):
681 tablePath = str(tablePath)
683 if isinstance(representativeSequencesPath, Path):
684 representativeSequencesPath = str(representativeSequencesPath)
686 if isinstance(referenceSequencesPath, Path):
687 referenceSequencesPath = str(referenceSequencesPath)
689 if isinstance(clusteredTablePath, Path):
690 clusteredTablePath = str(clusteredTablePath)
692 if isinstance(clusteredSequencesPath, Path):
693 clusteredSequencesPath = str(clusteredSequencesPath)
695 if isinstance(newReferenceSequencesPath, Path):
696 newReferenceSequencesPath = str(newReferenceSequencesPath)
699 "qiime",
"vsearch",
"cluster-features-open-reference",
700 "--i-table", tablePath,
701 "--i-sequences", representativeSequencesPath,
702 "--i-reference-sequences", referenceSequencesPath,
703 "--p-perc-identity", str(percIdentity),
704 "--o-clustered-table", clusteredTablePath,
705 "--o-clustered-sequences", clusteredSequencesPath,
706 "--o-new-reference-sequences", newReferenceSequencesPath
710 args.extend([
"--p-threads",
"0"])
711 elif threads
is not None:
712 args.extend([
"--p-threads", str(threads)])