def assemble_genomes(self): """ Use skesa to assemble genomes """ assembly = skesa.Skesa(inputobject=self) assembly.main() metadataprinter.MetadataPrinter(inputobject=self)
def evaluate_assemblies(self): """ Evaluate assemblies with Quast """ qual = evaluate.AssemblyEvaluation(inputobject=self) qual.main() metadataprinter.MetadataPrinter(inputobject=self)
def quality_features(self, analysis): """ Extract features from assemblies such as total genome size, longest contig, and N50 """ features = quality.QualityFeatures(inputobject=self, analysis=analysis) features.main() metadataprinter.MetadataPrinter(self)
def contamination_detection(self): """ Calculate the levels of contamination in the reads """ self.qualityobject.contamination_finder(report_path=self.reportpath, debug=self.debug) metadataprinter.MetadataPrinter(inputobject=self)
def quality(self): """ Creates quality objects and runs quality assessments and quality processes on the supplied sequences """ # Validate that the FASTQ files are in the proper format, and that there are no issues e.g. different numbers # of forward and reverse reads, read length longer than quality score length, proper extension if not self.debug: self.fastq_validate() # Run FastQC on the unprocessed fastq files self.fastqc_raw() # Perform quality trimming and FastQC on the trimmed files self.quality_trim() # Run FastQC on the trimmed files self.fastqc_trimmed() # Perform error correcting on the reads self.error_correct() # Detect contamination in the reads self.contamination_detection() # Run FastQC on the processed fastq files self.fastqc_trimmedcorrected() # Exit if only pre-processing of data is requested metadataprinter.MetadataPrinter(inputobject=self) if self.preprocess: logging.info('Pre-processing complete') quit()
def sixteens(self): """ Run the 16S analyses """ sixteen_s = BLAST(args=self, analysistype='sixteens_full', cutoff=95) sixteen_s.seekr() metadataprinter.MetadataPrinter(inputobject=self)
def virulence(self): """ Virulence gene detection """ virulence = BLAST(args=self, analysistype='virulence') virulence.seekr() metadataprinter.MetadataPrinter(inputobject=self)
def resfinder(self): """ Resistance finding - assemblies """ resfinder = BLAST(args=self, analysistype='resfinder_assembled') resfinder.seekr() metadataprinter.MetadataPrinter(inputobject=self)
def seqsero(self): """ Run SeqSero2 on Salmonella samples """ seqsero = SeqSero(self) seqsero.main() metadataprinter.MetadataPrinter(inputobject=self)
def sistr(self): """ Sistr """ sistr_obj = sistr.Sistr(inputobject=self, analysistype='sistr') sistr_obj.main() metadataprinter.MetadataPrinter(inputobject=self)
def geneseekr(self): """ Find genes of interest """ geneseekr = BLAST(args=self, analysistype='genesippr', cutoff=95) geneseekr.seekr() metadataprinter.MetadataPrinter(inputobject=self)
def helper(self): """Helper function for file creation (if desired), manipulation, quality assessment, and trimming as well as the assembly""" # Simple assembly without requiring accessory files (SampleSheet.csv, etc). if self.basicassembly: self.runmetadata = Basic(inputobject=self) else: # Populate the runmetadata object by parsing the SampleSheet.csv, GenerateFASTQRunStatistics.xml, and # RunInfo.xml files self.runinfo = os.path.join(self.path, 'RunInfo.xml') self.runmetadata = runMetadata.Metadata(passed=self) # Extract the flowcell ID and the instrument name if the RunInfo.xml file was provided self.runmetadata.parseruninfo() # Extract PhiX mapping information from the run phi = phix.PhiX(inputobject=self) phi.main() # Populate the lack of bclcall and nohup call into the metadata sheet for sample in self.runmetadata.samples: sample.commands = GenObject() sample.commands.nohupcall = 'NA' sample.commands.bclcall = 'NA' # Move/link the FASTQ files to strain-specific working directories fastqmover.FastqMover(inputobject=self) # Print the metadata to file metadataprinter.MetadataPrinter(inputobject=self)
def mash(self): """ Run mash to determine closest refseq genome """ logging.info('Running MASH analyses') mash.Mash(inputobject=self, analysistype='mash') metadataprinter.MetadataPrinter(inputobject=self)
def run_gdcs(self): """ Determine the presence of genomically-dispersed conserved sequences (genes from MLST, rMLST, and cgMLST analyses) """ # Run the GDCS analysis gdcs = GDCS(inputobject=self) gdcs.main() metadataprinter.MetadataPrinter(inputobject=self)
def legacy_vtyper(self): """ Legacy vtyper - uses ePCR """ legacy_vtyper = LegacyVtyper(inputobject=self, analysistype='legacy_vtyper', mismatches=2) legacy_vtyper.vtyper() metadataprinter.MetadataPrinter(inputobject=self)
def prophages(self): """ Prophage detection """ prophages = Prophages(args=self, analysistype='prophages', cutoff=90, unique=True) prophages.seekr() metadataprinter.MetadataPrinter(inputobject=self)
def univec(self): """ Univec contamination search """ univec = Univec(args=self, analysistype='univec', cutoff=80, unique=True) univec.seekr() metadataprinter.MetadataPrinter(inputobject=self)
def coregenome(self): """ Core genome calculation """ coregen = core.CoreGenome(args=self, analysistype='coregenome', genus_specific=True) coregen.seekr() core.AnnotatedCore(inputobject=self) metadataprinter.MetadataPrinter(inputobject=self)
def rmlst_assembled(self): """ Run rMLST analyses on assemblies """ if not os.path.isfile(os.path.join(self.reportpath, 'rmlst.csv')): rmlst = BLAST(args=self, analysistype='rmlst', cutoff=100) rmlst.seekr() else: parse = ReportParse(args=self, analysistype='rmlst') parse.report_parse() metadataprinter.MetadataPrinter(inputobject=self)
def ec_typer(self): """ Assembly-based serotyping """ ec = ECTyper(metadata=self.runmetadata, report_path=self.reportpath, assembly_path=os.path.join(self.path, 'raw_assemblies'), threads=self.cpus, logfile=self.logfile) ec.main() metadataprinter.MetadataPrinter(inputobject=self)
def univec(self): """ Univec contamination search """ if not os.path.isfile(os.path.join(self.reportpath, 'univec.csv')): univec = Univec(args=self, analysistype='univec', cutoff=80, unique=True) univec.seekr() metadataprinter.MetadataPrinter(inputobject=self)
def assembly_stats(self): """ Perform some basic quality analyses on the assemblies """ # Calculate assembly metrics on raw assemblies self.quality_features(analysis='polished') # ORF detection self.prodigal() # CLARK analyses self.clark() metadataprinter.MetadataPrinter(inputobject=self)
def sixteens(self): """ Run the 16S analyses """ SixteensFull(args=self, pipelinecommit=self.commit, startingtime=self.starttime, scriptpath=self.homepath, analysistype='sixteens_full', cutoff=0.95) metadataprinter.MetadataPrinter(inputobject=self)
def serosippr(self): """ Serotyping analyses """ Serotype(args=self, pipelinecommit=self.commit, startingtime=self.starttime, scriptpath=self.homepath, analysistype='serosippr', cutoff=0.90, pipeline=True) metadataprinter.MetadataPrinter(inputobject=self)
def prophages(self, cutoff=90): """ Prophage detection :param cutoff: cutoff value to be used in the analyses """ prophages = Prophages(args=self, analysistype='prophages', cutoff=cutoff, unique=True) if not os.path.isfile(os.path.join(self.reportpath, 'prophages.csv')): prophages.seekr() metadataprinter.MetadataPrinter(inputobject=self)
def serosippr(self): """ Serotyping analyses """ # pipeline=True) sero = BLAST(args=self, analysistype='serosippr', cutoff=90, genus_specific=True, unique=True) sero.seekr() metadataprinter.MetadataPrinter(inputobject=self)
def mob_suite(self): """ """ mob = MobRecon(metadata=self.runmetadata.samples, analysistype='mobrecon', databasepath=self.reffilepath, threads=self.cpus, logfile=self.logfile, reportpath=self.reportpath) mob.mob_recon() metadataprinter.MetadataPrinter(inputobject=self)
def genesippr(self): """ Find genes of interest """ GeneSippr(args=self, pipelinecommit=self.commit, startingtime=self.starttime, scriptpath=self.homepath, analysistype='genesippr', cutoff=0.95, pipeline=False, revbait=False) metadataprinter.MetadataPrinter(inputobject=self)
def objectprep(self): # Only find the data files if a datapath is provided if self.datapath: self.runmetadata = createobject.ObjectCreation(self) else: for sample in self.runmetadata.samples: sample.general.abundancefile = sample.general.abundance sample.general.assignmentfile = sample.general.classification sample.general.fastqfiles = [sample.general.combined] # Print the metadata to file metadataprinter.MetadataPrinter(self) # Load the results in the csv files into dictionaries self.taxids()
def ressippr(self): """ Resistance finding - raw reads """ res = Resistance(args=self, pipelinecommit=self.commit, startingtime=self.starttime, scriptpath=self.homepath, analysistype='resfinder', cutoff=0.7, pipeline=False, revbait=True) res.main() metadataprinter.MetadataPrinter(inputobject=self)