Exemplo n.º 1
0
    def fastqc(self, datalocation, analysislocation, options):
        """Run FastQC for trimmed data files."""

        cd = check_dependencies_linux.CheckDependencies()
        os.chdir(os.path.join(cd.getSPARTAdir(options), "QC_analysis"))
        if not os.path.lexists(
                cd.getSPARTAdir(options) + "/QC_analysis/FastQC"):
            subprocess.call(["unzip", "fastqc_v0.11.3.zip"],
                            stdout=open(os.devnull, 'wb'))
        os.chdir(os.path.join(cd.getpwd(), "FastQC"))
        subprocess.call("chmod 755 fastqc", shell=True)
        print "FastQC is assessing your data set for overall quality"
        for file in os.listdir(datalocation):
            extension = file.split(".")[1]
            if extension == "fastq" or extension == "fq":
                if options.verbose:
                    subprocess.Popen(
                        "./fastqc " +
                        os.path.join(analysislocation, "QC", "trimmed" + file),
                        shell=True).wait()
                else:
                    subprocess.Popen(
                        "./fastqc --quiet " +
                        os.path.join(analysislocation, "QC", "trimmed" + file),
                        shell=True).wait()
        return
Exemplo n.º 2
0
    def trimmomatic(self, datalocation, analysislocation, options):
        """Run trimmomatic for SE reads and add the file prefix
        'trim' to the file name."""

        cd = check_dependencies_linux.CheckDependencies()
        os.chdir(os.path.join(cd.getSPARTAdir(options), "QC_analysis"))
        if not os.path.lexists(
                os.path.join(cd.getSPARTAdir(options), "QC_analysis",
                             "Trimmomatic-0.33")):
            #This will be a problem for Windows. Just distribute with unzipped binaries?
            subprocess.call(["unzip", "Trimmomatic-0.33.zip"],
                            stdout=open(os.devnull, 'wb'))
        os.chdir(os.path.join(cd.getpwd(), "Trimmomatic-0.33"))
        for file in os.listdir(datalocation):
            extension = file.split(".")[1]
            if extension == "fastq" or extension == "fq":
                subprocess.Popen(
                    "java -jar trimmomatic-0.33.jar SE -threads {threads} ".
                    format(threads=options.threads) +
                    os.path.join(datalocation, file) + " " +
                    os.path.join(analysislocation, "QC", "trimmed" + file) +
                    " ILLUMINACLIP:" + cd.getpwd() +
                    "/adapters/{illuminaclip} LEADING:{leading} TRAILING:{trailing} SLIDINGWINDOW:{slidingwindow} MINLEN:{minlen}"
                    .format(illuminaclip=options.illuminaclip,
                            leading=options.leading,
                            trailing=options.trailing,
                            slidingwindow=options.slidingwindow,
                            minlen=options.minlentrim),
                    shell=True).wait()

        return
Exemplo n.º 3
0
__author__ = 'benkjohnson'

import check_dependencies_linux
import optparse
import sys

cd = check_dependencies_linux.CheckDependencies()

optParser = optparse.OptionParser(
    usage="python SPARTA.py [options]",
    description=
    "Simple Program for Automated reference-based bacterial RNA-seq Transcriptome Analysis (SPARTA)",
    epilog=
    "Written by Benjamin K. Johnson ([email protected]), Michigan State University Department of "
    + "Microbiology and Molecular Genetics. (c) 2015")

#Future functionality
# optParser.add_option("--SE", help="Single-end read input. Default input choice is single-end if nothing is specified",
#                     action="store_true", default="True", dest="seqtype")
# optParser.add_option("--PE", help="Paired-end read input. Must have the exact same file name and end with _F for the forward read and _R for the reverse read",
#                     action="store_false", default="False", dest="seqtype")
optParser.add_option(
    "--cleanup",
    help=
    "Clean up the intermediate files to save space. Default action is to retain the intermediate files.",
    action="store_true",
    dest="cleanup")
optParser.add_option("--verbose",
                     help="Display more output for each step of the analysis.",
                     action="store_true",
                     dest="verbose")
    def bowtie(self, datalocation, analysislocation, options):
        """Run Bowtie for SE reads less than 50 bp in length.
        Will add the ability to run Bowtie2 for PE and SE with
        reads greater than 50 bp."""

        cd = check_dependencies_linux.CheckDependencies()
        qc = qc_analysis.QC_analysis()
        gff, genref = qc.findreferencefiles(datalocation)
        copy(genref, os.path.join(analysislocation, 'Bowtie'))
        genrefname = genref.split("/")[-1]
        copy(gff, os.path.join(analysislocation, 'HTSeq'))
        # subprocess.Popen("cp " + genref + " " + analysislocation + "/Bowtie", shell=True).wait()
        # subprocess.Popen("cp " + gff + " " + analysislocation + "/HTSeq", shell=True).wait()
        os.chdir(os.path.join(cd.getSPARTAdir(options),
                              "Mapping_and_counting"))
        # os.chdir(cd.getSPARTAdir() + "/Mapping_and_counting")
        if not os.path.lexists(
                os.path.join(cd.getSPARTAdir(options), "Mapping_and_counting",
                             "bowtie-1.1.1")):
            #This will be a problem for Windows users. Distribute with unzipped binaries?
            subprocess.call(["unzip", "bowtie-1.1.1-linux-x86_64.zip"],
                            stdout=open(os.devnull, 'wb'))
        os.chdir(os.path.join(cd.getpwd(), "bowtie-1.1.1"))
        for file in os.listdir(os.path.join(analysislocation, "QC")):
            extension = file.split(".")[-1]
            if extension == "gz":
                subprocess.Popen(
                    "gunzip -c " + os.path.join(analysislocation, "QC", file) +
                    " > " + os.path.join(analysislocation, "Bowtie",
                                         os.path.splitext(file)[0]),
                    shell=True).wait()
            else:
                copy(os.path.join(analysislocation, "QC", file),
                     os.path.join(analysislocation, "Bowtie"))

        if options.cleanup:
            for file in os.listdir(os.path.join(analysislocation, "QC")):
                extension = file.split(".")[-1]
                if extension == "gz" or extension in ["fq, fastq"]:
                    subprocess.Popen("rm " + os.path.join(
                        analysislocation, "QC", "{file}".format(file=file)),
                                     shell=True).wait()

        print "Building the Bowtie index from the reference genome"
        if options.verbose:
            subprocess.Popen(
                "./bowtie-build -f " +
                os.path.join(analysislocation, "Bowtie", genrefname) + " " +
                glob.glob(os.path.join(analysislocation, "Bowtie") +
                          "/*.fa*")[0].split(".")[0],
                shell=True).wait()
        else:
            subprocess.Popen(
                "./bowtie-build -q -f " +
                os.path.join(analysislocation, "Bowtie", genrefname) + " " +
                glob.glob(os.path.join(analysislocation, "Bowtie") +
                          "/*.fa*")[0].split(".")[0],
                shell=True).wait()
        allebwtfiles = glob.glob("*.ebwt")[:]
        for ebwtfile in allebwtfiles:
            copy(ebwtfile, os.path.join(analysislocation, "Bowtie"))
            # subprocess.Popen("cp " + ebwtfile + " " + analysislocation + "/Bowtie/", shell=True).wait()
        print "Mapping reads to the reference genome with Bowtie"
        for file in os.listdir(os.path.join(analysislocation, "Bowtie")):
            extension = os.path.splitext(file)[1]
            if extension == ".fq" or extension == ".fastq":
                fname = os.path.splitext(file)[0]
                strippedfile = fname[len('trimmed'):]
                if options.verbose:
                    subprocess.Popen(
                        "./bowtie -S --threads {threads} -p {procs} ".format(
                            threads=options.threads, procs=options.procs) +
                        glob.glob(
                            os.path.join(analysislocation, "Bowtie") +
                            "/*.fa*")[0].split(".")[0] + " " +
                        os.path.join(analysislocation, "Bowtie", file) +
                        " > " + os.path.join(analysislocation, "Bowtie",
                                             "align" + strippedfile + ".sam"),
                        shell=True).wait()
                elif options.mismatch:
                    subprocess.Popen(
                        "./bowtie -S --threads {threads} -p {procs} -v {mismatch} "
                        .format(threads=options.threads,
                                procs=options.procs,
                                mismatch=options.mismatch) + glob.glob(
                                    os.path.join(analysislocation, "Bowtie") +
                                    "/*.fa*")[0].split(".")[0] + " " +
                        os.path.join(analysislocation, "Bowtie", file) +
                        " > " + os.path.join(analysislocation, "Bowtie",
                                             "align" + strippedfile + ".sam"),
                        shell=True).wait()
                elif options.otherbowtieoptions:
                    subprocess.Popen(
                        "./bowtie -S --threads {threads} -p {procs} {otherbowtieoptions} "
                        .format(threads=options.threads,
                                procs=options.procs,
                                otherbowtieoptions=options.otherbowtieoptions)
                        + glob.glob(
                            os.path.join(analysislocation, "Bowtie") +
                            "/*.fa*")[0].split(".")[0] + " " +
                        os.path.join(analysislocation, "Bowtie", file) +
                        " > " + os.path.join(analysislocation, "Bowtie",
                                             "align" + strippedfile + ".sam"),
                        shell=True).wait()
                elif options.mismatch and options.otherbowtieoptions:
                    subprocess.Popen(
                        "./bowtie -S --threads {threads} -p {procs} -v {mismatch} {otherbowtieoptions} "
                        .format(threads=options.threads,
                                procs=options.procs,
                                mismatch=options.mismatch,
                                otherbowtieoptions=options.otherbowtieoptions)
                        + glob.glob(
                            os.path.join(analysislocation, "Bowtie") +
                            "/*.fa*")[0].split(".")[0] + " " +
                        os.path.join(analysislocation, "Bowtie", file) +
                        " > " + os.path.join(analysislocation, "Bowtie",
                                             "align" + strippedfile + ".sam"),
                        shell=True).wait()
                else:
                    subprocess.Popen(
                        "./bowtie -S --quiet --threads {threads} ".format(
                            threads=options.threads) + glob.glob(
                                os.path.join(analysislocation, "Bowtie") +
                                "/*.fa*")[0].split(".")[0] + " " +
                        os.path.join(analysislocation, "Bowtie", file) +
                        " > " + os.path.join(analysislocation, "Bowtie",
                                             "align" + strippedfile + ".sam"),
                        shell=True).wait()
        return
    def htseq(self, analysislocation, options):
        """Run htseq-count to count gene features post-Bowtie mapping"""

        cd = check_dependencies_linux.CheckDependencies()
        os.chdir(os.path.join(cd.getSPARTAdir(options),
                              "Mapping_and_counting"))
        if not os.path.lexists(
                os.path.join(cd.getSPARTAdir(options), "Mapping_and_counting",
                             "HTSeq-0.6.1")):
            subprocess.Popen("tar -zxf HTSeq-0.6.1.tar.gz",
                             stdout=open(os.devnull, 'w'),
                             shell=True).wait()
        # htseqcheck = cd.checkhtseq()
        # if htseqcheck == False:
        os.chdir(os.path.join(cd.getpwd(), "HTSeq-0.6.1"))
        subprocess.Popen("python setup.py build install --user",
                         shell=True,
                         stdout=open(os.devnull, 'wb'),
                         stderr=open(os.devnull, 'wb')).wait()
        os.chdir(os.path.join(cd.getpwd(), "build", "scripts-2.7"))
        gff = glob.glob(os.path.join(analysislocation, "HTSeq") + "/*.g*")[0]
        print "Counting gene features with HTSeq"
        for mapfile in os.listdir(os.path.join(analysislocation, "Bowtie")):
            extension = os.path.splitext(mapfile)[1]
            if extension == ".sam":
                fname = os.path.splitext(mapfile)[0]
                strippedmapfile = fname[len('align'):]
                if options.verbose:
                    subprocess.Popen(
                        "./htseq-count --mode={mode} --stranded={stranded} --order={order} --type={type} -a {minqual} --idattr={idattr} "
                        .format(mode=options.mode,
                                stranded=options.stranded,
                                order=options.order,
                                type=options.type,
                                minqual=options.minqual,
                                idattr=options.idattr) +
                        os.path.join(analysislocation, "Bowtie", mapfile) +
                        " " + gff + " > " +
                        os.path.join(analysislocation, "HTSeq",
                                     "map" + strippedmapfile + ".sam"),
                        shell=True).wait()
                else:
                    subprocess.Popen(
                        "./htseq-count --quiet --mode={mode} --stranded={stranded} --order={order} --type={type} -a {minqual} --idattr={idattr} "
                        .format(mode=options.mode,
                                stranded=options.stranded,
                                order=options.order,
                                type=options.type,
                                minqual=options.minqual,
                                idattr=options.idattr) +
                        os.path.join(analysislocation, "Bowtie", mapfile) +
                        " " + gff + " > " +
                        os.path.join(analysislocation, "HTSeq",
                                     "map" + strippedmapfile + ".sam"),
                        shell=True).wait()

        if options.cleanup:
            for file in os.listdir(os.path.join(analysislocation, "Bowtie")):
                subprocess.Popen("rm " + os.path.join(
                    analysislocation, "Bowtie", "{file}".format(file=file)),
                                 shell=True).wait()

        return
Exemplo n.º 6
0
 def __init__(self):
     cd = check_dependencies_linux.CheckDependencies()
     self._mydesktoppath = cd.getdesktoppath()