def diamondAlignORF(infile, outfile):
    #set memory and threads
    job_memory = str(PARAMS["Diamond_memory"]) + "G"
    job_threads = int(PARAMS["Diamond_threads"])
    #generate call to diamond
    statement = PipelineAnnotate.runDiamond(infile, outfile, PARAMS)
    P.run(statement)
def detectOrfs(infile, outfile):
    statementlist = []
    #set job memory and threads
    job_memory = str(PARAMS["Prodigal_memory"]) + "G"
    job_threads = int(PARAMS["Prodigal_threads"])
    #command to generate index files
    seqdat = PipelineAssembly.SequencingData(infile)
    #generate outfile without gz
    outfile = outfile.replace(".gz", "")
    #ensure input is FASTA
    if seqdat.paired == True:
        print("Cannot detect ORFs from paired-end reads.")
    else:
        if seqdat.fileformat == "fastq":
            statementlist.append("reformat.sh in={} out={}".format(
                infile, "orfs.dir/" + seqdat.cleanname + ".fa"))
            infile = "orfs.dir/" + seqdat.cleanname + ".fa"
        #generate the call to prodigal
        statementlist.append(
            PipelineAnnotate.runProdigal(infile, outfile, PARAMS))
        #remove the temp FASTA if created
        if seqdat.fileformat == "fastq":
            statementlist.append("rm {}".format("orfs.dir/" +
                                                seqdat.cleanname + ".fa"))
        #compress the outputs
        statementlist.append("gzip {}".format(outfile))
        statementlist.append("gzip {}".format(
            outfile.replace("peptides", "positions")))
        statement = " && ".join(statementlist)
        P.run(statement)
def detectOrfs(infile, outfile):
    statementlist = []
    #set job memory and threads
    job_memory = str(PARAMS["Prodigal_memory"]) + "G"
    job_threads = int(PARAMS["Prodigal_threads"])
    #command to generate index files
    seqdat = PipelineAssembly.SequencingData(infile)
    #ensure input is FASTA
    if seqdat.paired == True:
        print(
            "Prodigal requires single/merged (i.e. not paired-end) reads for ORF detection."
        )
    else:
        if seqdat.fileformat == "fastq":
            statementlist.append("reformat.sh in={} out={}".format(
                infile, "orfs.dir/" + seqdat.cleanname + ".fa"))
            infile = "orfs.dir/" + seqdat.cleanname + ".fa"
        #generate the call to prodigal
        statementlist.append(
            PipelineAnnotate.runProdigal(infile, outfile, PARAMS))
        #remove the temp FASTA if created
        if seqdat.fileformat == "fastq":
            statementlist.append("rm {}".format("orfs.dir/" +
                                                seqdat.cleanname + ".fa"))
        statement = " && ".join(statementlist)
        P.run(statement)
def functionalAnnotSeed(infile, outfile):
    job_memory = str(PARAMS["Eggnogmapper_memory"]) + "G"
    job_threads = int(PARAMS["Eggnogmapper_threads"])
    #generate call to eggnog-mapper
    #requires older version of diamond to use the eggnog mapper databases
    statement = "module load bio/diamond/0.8.22 && "
    statement += PipelineAnnotate.runEggmapSeed(infile, infile, PARAMS)
    P.run(statement)
def krakenAlignContig(infile, outfile):
    #set memory and threads
    job_memory = str(PARAMS["Kraken_memory"]) + "G"
    job_threads = int(PARAMS["Kraken_threads"])
    #generate call to diamond
    statement = PipelineAnnotate.runKraken(
        infile, outfile, PARAMS, "{}scripts/translateKraken2.py".format(
            os.path.dirname(__file__).rstrip("pipelines")))
    P.run(statement)
def functionalAnnotSeed(infile, outfile):
    job_memory = str(PARAMS["Eggnogmapper_memory"]) + "G"
    job_threads = int(PARAMS["Eggnogmapper_threads"])
    #generate call to eggnog-mapper
    #option to add commands to load specific versions of diamond and python2 etc. if needed
    if PARAMS["Eggnogmapper_preload"] not in ["", "false"]:
        statement = "{} &&".format(PARAMS["Eggnogmapper_preload"])
    else:
        statement = ""
    statement += PipelineAnnotate.runEggmapSeed(infile.replace(".log", ""),
                                                infile.replace(".log", ""),
                                                PARAMS)
    statement += ' && echo "Made file {}." > {}'.format(
        outfile.replace(".log", ""), outfile)
    P.run(statement)
def krakenAlignContig(infile, outfile):
    #set memory and threads
    job_memory = str(PARAMS["Kraken_memory"]) + "G"
    job_threads = int(PARAMS["Kraken_threads"])
    #generate call to diamond
    statementlist = [
        PipelineAnnotate.runKraken(
            infile, outfile.replace(".gz", ""), PARAMS,
            "{}scripts/translateKraken2.py".format(
                os.path.dirname(__file__).rstrip("pipelines")))
    ]
    #compress outputs
    statementlist.append("gzip {}".format(
        outfile.replace("translated.gz", "out")))
    statementlist.append("gzip {}".format(outfile.replace(".gz", "")))
    statement = " && ".join(statementlist)
    P.run(statement)
def functionalAnnotChunks(infile, outfile):
    job_memory = str(PARAMS["Eggnogmapper_memory_annot"]) + "G"
    job_threads = int(str(PARAMS["Eggnogmapper_threads_annot"]))
    statement = []
    if PARAMS["Eggnogmapper_scratch"] == "true":
        #copy the db into fast local SSD
        statement.append("cp {}eggnog.db $SCRATCH_DIR/eggnog.db".format(
            PARAMS["Eggnogmapper_eggdata"]))
        datadir = "$SCRATCH_DIR"
    else:
        datadir = PARAMS["Eggnogmapper_eggdata"]
    #get annotation from seeds
    statement.append(
        PipelineAnnotate.runEggmapAnnot(
            infile, outfile.replace(".emapper.annotations", ""), PARAMS,
            datadir))
    statement = " && ".join(statement)
    #run the annotation step
    P.run(statement)
def functionalAnnotChunks(infile, outfile):
    job_memory = str(PARAMS["Eggnogmapper_memory_annot"]) + "G"
    job_threads = int(str(PARAMS["Eggnogmapper_threads_annot"]))
    statement = []
    #option to add commands to load specific versions of diamond and python2 etc. if needed
    if PARAMS["Eggnogmapper_preload"] not in ["", "false"]:
        statement.append("{}".format(PARAMS["Eggnogmapper_preload"]))
    if PARAMS["Eggnogmapper_scratch"] == "true":
        #copy the db into fast local SSD
        statement.append("cp {}eggnog.db $SCRATCH_DIR/eggnog.db".format(
            PARAMS["Eggnogmapper_eggdata"]))
        datadir = "$SCRATCH_DIR"
    else:
        datadir = PARAMS["Eggnogmapper_eggdata"]
    #get annotation from seeds
    statement.append(
        PipelineAnnotate.runEggmapAnnot(
            infile.replace(".log", ""),
            outfile.replace(".emapper.annotations.log", ""), PARAMS, datadir))
    statement.append('echo "Made file {}." > {}'.format(
        outfile.replace(".log", ""), outfile))
    statement = " && ".join(statement)
    #run the annotation step
    P.run(statement)
def meganAnnot(infile, outfile):
    job_memory = str(PARAMS["Blast2lca_memory"]) + "G"
    job_threads = int(PARAMS["Blast2lca_threads"])
    #generate call to blast2lca
    statement = PipelineAnnotate.runBlast2Lca(infile, outfile, PARAMS)
    P.run(statement)