def diamondAlignORF(infile, outfile): #set memory and threads job_memory = str(PARAMS["Diamond_memory"]) + "G" job_threads = int(PARAMS["Diamond_threads"]) #generate call to diamond statement = PipelineAnnotate.runDiamond(infile, outfile, PARAMS) P.run(statement)
def detectOrfs(infile, outfile): statementlist = [] #set job memory and threads job_memory = str(PARAMS["Prodigal_memory"]) + "G" job_threads = int(PARAMS["Prodigal_threads"]) #command to generate index files seqdat = PipelineAssembly.SequencingData(infile) #generate outfile without gz outfile = outfile.replace(".gz", "") #ensure input is FASTA if seqdat.paired == True: print("Cannot detect ORFs from paired-end reads.") else: if seqdat.fileformat == "fastq": statementlist.append("reformat.sh in={} out={}".format( infile, "orfs.dir/" + seqdat.cleanname + ".fa")) infile = "orfs.dir/" + seqdat.cleanname + ".fa" #generate the call to prodigal statementlist.append( PipelineAnnotate.runProdigal(infile, outfile, PARAMS)) #remove the temp FASTA if created if seqdat.fileformat == "fastq": statementlist.append("rm {}".format("orfs.dir/" + seqdat.cleanname + ".fa")) #compress the outputs statementlist.append("gzip {}".format(outfile)) statementlist.append("gzip {}".format( outfile.replace("peptides", "positions"))) statement = " && ".join(statementlist) P.run(statement)
def detectOrfs(infile, outfile): statementlist = [] #set job memory and threads job_memory = str(PARAMS["Prodigal_memory"]) + "G" job_threads = int(PARAMS["Prodigal_threads"]) #command to generate index files seqdat = PipelineAssembly.SequencingData(infile) #ensure input is FASTA if seqdat.paired == True: print( "Prodigal requires single/merged (i.e. not paired-end) reads for ORF detection." ) else: if seqdat.fileformat == "fastq": statementlist.append("reformat.sh in={} out={}".format( infile, "orfs.dir/" + seqdat.cleanname + ".fa")) infile = "orfs.dir/" + seqdat.cleanname + ".fa" #generate the call to prodigal statementlist.append( PipelineAnnotate.runProdigal(infile, outfile, PARAMS)) #remove the temp FASTA if created if seqdat.fileformat == "fastq": statementlist.append("rm {}".format("orfs.dir/" + seqdat.cleanname + ".fa")) statement = " && ".join(statementlist) P.run(statement)
def functionalAnnotSeed(infile, outfile): job_memory = str(PARAMS["Eggnogmapper_memory"]) + "G" job_threads = int(PARAMS["Eggnogmapper_threads"]) #generate call to eggnog-mapper #requires older version of diamond to use the eggnog mapper databases statement = "module load bio/diamond/0.8.22 && " statement += PipelineAnnotate.runEggmapSeed(infile, infile, PARAMS) P.run(statement)
def krakenAlignContig(infile, outfile): #set memory and threads job_memory = str(PARAMS["Kraken_memory"]) + "G" job_threads = int(PARAMS["Kraken_threads"]) #generate call to diamond statement = PipelineAnnotate.runKraken( infile, outfile, PARAMS, "{}scripts/translateKraken2.py".format( os.path.dirname(__file__).rstrip("pipelines"))) P.run(statement)
def functionalAnnotSeed(infile, outfile): job_memory = str(PARAMS["Eggnogmapper_memory"]) + "G" job_threads = int(PARAMS["Eggnogmapper_threads"]) #generate call to eggnog-mapper #option to add commands to load specific versions of diamond and python2 etc. if needed if PARAMS["Eggnogmapper_preload"] not in ["", "false"]: statement = "{} &&".format(PARAMS["Eggnogmapper_preload"]) else: statement = "" statement += PipelineAnnotate.runEggmapSeed(infile.replace(".log", ""), infile.replace(".log", ""), PARAMS) statement += ' && echo "Made file {}." > {}'.format( outfile.replace(".log", ""), outfile) P.run(statement)
def krakenAlignContig(infile, outfile): #set memory and threads job_memory = str(PARAMS["Kraken_memory"]) + "G" job_threads = int(PARAMS["Kraken_threads"]) #generate call to diamond statementlist = [ PipelineAnnotate.runKraken( infile, outfile.replace(".gz", ""), PARAMS, "{}scripts/translateKraken2.py".format( os.path.dirname(__file__).rstrip("pipelines"))) ] #compress outputs statementlist.append("gzip {}".format( outfile.replace("translated.gz", "out"))) statementlist.append("gzip {}".format(outfile.replace(".gz", ""))) statement = " && ".join(statementlist) P.run(statement)
def functionalAnnotChunks(infile, outfile): job_memory = str(PARAMS["Eggnogmapper_memory_annot"]) + "G" job_threads = int(str(PARAMS["Eggnogmapper_threads_annot"])) statement = [] if PARAMS["Eggnogmapper_scratch"] == "true": #copy the db into fast local SSD statement.append("cp {}eggnog.db $SCRATCH_DIR/eggnog.db".format( PARAMS["Eggnogmapper_eggdata"])) datadir = "$SCRATCH_DIR" else: datadir = PARAMS["Eggnogmapper_eggdata"] #get annotation from seeds statement.append( PipelineAnnotate.runEggmapAnnot( infile, outfile.replace(".emapper.annotations", ""), PARAMS, datadir)) statement = " && ".join(statement) #run the annotation step P.run(statement)
def functionalAnnotChunks(infile, outfile): job_memory = str(PARAMS["Eggnogmapper_memory_annot"]) + "G" job_threads = int(str(PARAMS["Eggnogmapper_threads_annot"])) statement = [] #option to add commands to load specific versions of diamond and python2 etc. if needed if PARAMS["Eggnogmapper_preload"] not in ["", "false"]: statement.append("{}".format(PARAMS["Eggnogmapper_preload"])) if PARAMS["Eggnogmapper_scratch"] == "true": #copy the db into fast local SSD statement.append("cp {}eggnog.db $SCRATCH_DIR/eggnog.db".format( PARAMS["Eggnogmapper_eggdata"])) datadir = "$SCRATCH_DIR" else: datadir = PARAMS["Eggnogmapper_eggdata"] #get annotation from seeds statement.append( PipelineAnnotate.runEggmapAnnot( infile.replace(".log", ""), outfile.replace(".emapper.annotations.log", ""), PARAMS, datadir)) statement.append('echo "Made file {}." > {}'.format( outfile.replace(".log", ""), outfile)) statement = " && ".join(statement) #run the annotation step P.run(statement)
def meganAnnot(infile, outfile): job_memory = str(PARAMS["Blast2lca_memory"]) + "G" job_threads = int(PARAMS["Blast2lca_threads"]) #generate call to blast2lca statement = PipelineAnnotate.runBlast2Lca(infile, outfile, PARAMS) P.run(statement)