def buildIdbaStats(infile, outfile): ''' build idba stats: N50 Number of scaffolds Total scaffold length ''' PipelineMetagenomeAssembly.contig_to_stats(infile, outfile, PARAMS)
def filterContigs(infile, outfile): ''' filter contigs if specified in .ini file. If not specified then the pipeline will not remove any but will produce a new outfile - this is not space efficient and SHOULD BE CHANGED ''' if not PARAMS["filter"]: length = 0 else: length = PARAMS["filter"] PipelineMetagenomeAssembly.filterContigs(infile, outfile, length)
def runSpades(infile, outfile): ''' run spades on each track ''' job_options = " -l mem_free=30G" statement = PipelineMetagenomeAssembly.Spades().build(infile) P.run()
def runSoapdenovo(infile, outfile): ''' run soapdenovo ''' job_options = "-l mem_free=30G" statement = PipelineMetagenomeAssembly.SoapDenovo2().build(infile) P.run()
def runIdba(infile, outfile): ''' run idba on each track ''' job_options = " -l mem_free=30G" statement = PipelineMetagenomeAssembly.Idba().build(infile) P.run()
def runMetavelvet(infile, outfile): ''' run meta-velvet on each track ''' job_options = " -l mem_free=30G" statement = PipelineMetagenomeAssembly.Metavelvet().build(infile, PARAMS) P.run()
def poolReadsAcrossConditions(infiles, outfile): ''' pool reads across conditions ''' statement = PipelineMetagenomeAssembly.pool_reads(infiles, outfile) P.run()
def buildMetaphlanRelativeAbundance(infile, outfile): ''' metaphlan is a program used in metagenomics. It assigns reads to clades based on specific genetic markers via blastn searching ''' to_cluster = True # at present the pipeline will take a set of files # and compute the abundances of different taxonomic groups # based on ALL reads i.e. paired data are combined into # a single file for analysis if PARAMS["metaphlan_executable"] == "bowtie2": assert os.path.exists( PARAMS["metaphlan_db"] + ".1.bt2" ), "missing file %s: Are you sure you have the correct database for bowtie2?" % PARAMS[ "metaphlan_db"] + ".1.bt2" method = "--bowtie2db" elif PARAMS["metaphlan_executable"] == "blast": assert os.path.exists( PARAMS["metaphlan_db"] + "nin" ), "missing file %s: Are you sure you have the correct database for bowtie2?" % PARAMS[ "metaphlan_db"] + "nin" method = "--blastdb" statement = PipelineMetagenomeAssembly.Metaphlan().build(infile, method="rel_ab") P.run()
def runRay(infile, outfile): ''' run Ray on each track ''' job_options = " -l h=!andromeda,h=!cgatgpu1,h=!cgatsmp1,h=!gandalf,h=!saruman \ -pe mpi 10 \ -q all.q " statement = PipelineMetagenomeAssembly.Ray().build(infile) P.run()
def pool_out(infiles): ''' return outfile name dependent on input pairedness ''' # AH: patch required when importing pipeline if len(infiles) == 0: return "" out = {"separate": "1", False: ""} inf = infiles[0] paired = PipelineMetagenomeAssembly.PairedData().checkPairs(inf) if paired: paired = paired[0] format = PipelineMetagenomeAssembly.PairedData().getFormat(inf) outname = "pooled_reads.dir/agg-agg-agg.%s" % format return outname
def buildContigLengths(infile, outfile): ''' output lengths for each contig in each of the assemblies ''' PipelineMetagenomeAssembly.build_scaffold_lengths(infile, outfile, PARAMS)
def buildSoapdenovoConfig(infile, outfile): ''' run SGA on each track ''' PipelineMetagenomeAssembly.SoapDenovo2().config(infile, outfile, PARAMS)