def buildIdbaStats(infile, outfile): ''' build idba stats: N50 Number of scaffolds Total scaffold length ''' PipelineGenomeAssembly.contig_to_stats(infile, outfile, PARAMS)
def buildMetavelvetStats(infile, outfile): ''' build metavelvet stats: N50 Number of scaffolds Total scaffold length ''' PipelineGenomeAssembly.contig_to_stats(infile, outfile, PARAMS)
def buildMetaphlanRelativeAbundance(infile, outfile): ''' metaphlan is a program used in metagenomics. It assigns reads to clades based on specific genetic markers via blastn searching ''' to_cluster = True # at present the pipeline will take a set of files # and compute the abundances of different taxonomic groups # based on ALL reads i.e. paired data are combined into # a single file for analysis if PARAMS["metaphlan_executable"] == "bowtie2": assert os.path.exists( PARAMS["metaphlan_db"] + ".1.bt2" ), "missing file %s: Are you sure you have the correct database for bowtie2?" % PARAMS[ "metaphlan_db"] + ".1.bt2" method = "--bowtie2db" elif PARAMS["metaphlan_executable"] == "blast": assert os.path.exists( PARAMS["metaphlan_db"] + "nin" ), "missing file %s: Are you sure you have the correct database for bowtie2?" % PARAMS[ "metaphlan_db"] + "nin" method = "--blastdb" statement = PipelineGenomeAssembly.Metaphlan().build(infile, method="rel_ab") P.run()
def runRay(infile, outfile): ''' run Ray on each track ''' to_cluster = True job_options = " -l mem_free=30G" statement = PipelineGenomeAssembly.Ray().build(infile) P.run()
def runMetavelvet(infile, outfile): ''' run meta-velvet on each track ''' to_cluster = True job_options = " -l mem_free=30G" statement = PipelineGenomeAssembly.Metavelvet().build(infile) P.run()
def buildContigLengths(infile, outfile): ''' output lengths for each contig in each of the assemblies ''' PipelineGenomeAssembly.build_scaffold_lengths(infile, outfile, PARAMS)