Exemplo n.º 1
0
def _prepare_folder_structure(assembler, assemblyDirectory):
    if common.directory_exists(assemblyDirectory):
        print(
            "Assembler {} asumer already computed as folder {} exists".format(
                assembler, assemblyDirectory))
        return 1
    return 0
Exemplo n.º 2
0
def _run_abyss_mergePairs(global_config, sample_config,
                          sorted_libraries_by_insert):
    print("running abyss-mergepairs ...")
    assembler = "abyss_mergePairs"
    outputName = sample_config["output"]
    currentDirectory = os.getcwd()
    assemblyDirectory = os.path.join(currentDirectory, assembler)
    if common.directory_exists(assemblyDirectory):
        return sample_config
    os.chdir(assemblyDirectory)  # now I am in the assembly directory
    sorted_libraries_by_insert = common.prepare_folder_structure(
        sorted_libraries_by_insert)
    # in abyss case there is no exectuable
    programBIN = global_config["Tools"][assembler]["bin"]
    program_options = global_config["Tools"][assembler]["options"]
    if assembler in sample_config:
        program_options = sample_config[assembler]
    ########### HERE IT START THE SPECIFIC ASSEMBLER PART

    program = programBIN
    command = []
    command.append(program)
    for option in program_options:
        command.append(option)

    libraries = {}
    for library, libraryInfo in sorted_libraries_by_insert:
        read1 = libraryInfo["pair1"]
        read2 = libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        insert = libraryInfo["insert"]
        std = libraryInfo["std"]
        outputNameArray = read1.split('/')[-1].split('_')
        outputName = "{}_{}".format(outputNameArray[0], outputNameArray[1])

        if orientation == "innie":
            if read2 is not None:
                currentCommand = command
                currentCommand.append('-o')
                currentCommand.append(outputName)
                currentCommand.append(read1)
                currentCommand.append(read2)
                abyss_stdOut = open("mergePairs_{}.stdOut".format(outputName),
                                    "a")
                abyss_stdErr = open("mergePairs_{}.stdErr".format(outputName),
                                    "a")
                print(command)
                subprocess.call(command,
                                stdout=abyss_stdOut,
                                stderr=abyss_stdErr)
                command_mv = [
                    "mv", "mergePairs_{}.stdErr".format(outputName),
                    "{}.txt".format(outputName)
                ]
                subprocess.call(command_mv)

    os.chdir("..")
    return sample_config
Exemplo n.º 3
0
def _run_abyss_mergePairs(global_config, sample_config, 
        sorted_libraries_by_insert):
    print("running abyss-mergepairs ...")
    assembler = "abyss_mergePairs"
    outputName = sample_config["output"]
    currentDirectory  = os.getcwd()
    assemblyDirectory = os.path.join(currentDirectory, assembler)
    if common.directory_exists(assemblyDirectory):
        return sample_config
    os.chdir(assemblyDirectory) # now I am in the assembly directory
    sorted_libraries_by_insert = common.prepare_folder_structure(
            sorted_libraries_by_insert)
    # in abyss case there is no exectuable
    programBIN      = global_config["Tools"][assembler]["bin"]
    program_options = global_config["Tools"][assembler]["options"]
    if assembler in sample_config:
        program_options=sample_config[assembler]
    ########### HERE IT START THE SPECIFIC ASSEMBLER PART

    program=programBIN
    command = []
    command.append(program)
    for option in program_options:
            command.append(option)

    libraries = {}
    for library, libraryInfo in sorted_libraries_by_insert:
        read1 = libraryInfo["pair1"]
        read2 = libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        insert = libraryInfo["insert"]
        std = libraryInfo["std"]
        outputNameArray  = read1.split('/')[-1].split('_')
        outputName = "{}_{}".format(outputNameArray[0], outputNameArray[1])

        if orientation=="innie":
            if read2 is not None:
                currentCommand = command;
                currentCommand.append('-o')
                currentCommand.append(outputName)
                currentCommand.append(read1)
                currentCommand.append(read2)
                abyss_stdOut = open("mergePairs_{}.stdOut".format(outputName),
                        "a")
                abyss_stdErr = open("mergePairs_{}.stdErr".format(outputName),
                        "a")
                print(command)
                subprocess.call(command, stdout=abyss_stdOut,
                        stderr=abyss_stdErr)
                command_mv = ["mv", "mergePairs_{}.stdErr".format(outputName),
                        "{}.txt".format(outputName)]
                subprocess.call(command_mv)

    os.chdir("..")
    return sample_config
Exemplo n.º 4
0
def _prepare_folder_structure(assembler,assemblyDirectory):
    if common.directory_exists(assemblyDirectory):
        print("Assembler {} asumer already computed as folder {} exists".format(
            assembler,assemblyDirectory))
        return 1
    return 0
Exemplo n.º 5
0
def _run_trinity(global_config, sample_config, sorted_libraries_by_insert):
    print("running trinity ...")
    assembler = "trinity"
    outputName = sample_config["output"]
    currentDirectory  = os.getcwd()
    assemblyDirectory = os.path.join(currentDirectory, assembler)
    if common.directory_exists(assemblyDirectory):
        return sample_config
    os.chdir(assemblyDirectory) # now I am in the assembly directory
    sorted_libraries_by_insert = common.prepare_folder_structure(
            sorted_libraries_by_insert)
     # in masurca case there is no exectuable as a make file must be created
    programBIN = global_config["Tools"][assembler]["bin"] + "Trinity"
    program_options = global_config["Tools"][assembler]["options"]
    if assembler in sample_config:
        program_options=sample_config[assembler]
    ########### HERE IT START THE SPECIFIC ASSEMBLER PART

    command = [programBIN]
    command.extend(["--seqType", "fq"])
    command.extend(["--JM", "100G"])
    if "threads" in sample_config:
        command.extend(["--CPU", str(sample_config["threads"])])

    for library, libraryInfo in sorted_libraries_by_insert:
        read1 = libraryInfo["pair1"]
        read2 = libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        insert = libraryInfo["insert"]
        std = libraryInfo["std"]
        if read2 is None:
            command.append("--single")
            command.append("{}".format(read1))
        elif orientation=="innie":
            command.append("--left")
            command.append("{}".format(read1))
            command.append("--right")
            command.append("{}".format(read2))
        else:
            print("trinity: somthing wrong or unexpected in the sample",
                    "config file")
            return sample_config
    command.extend(["--output", "trinity"])
    assembler_stdOut = open("trinity.stdOut", "w")
    assembler_stdErr = open("trinity.stdErr", "w")
    print(" ".join(command))

    returnValue = subprocess.call(" ".join(command), stdout=assembler_stdOut,
            stderr=assembler_stdErr, shell=True)

    # now align reads back to transcripts and estimate abundance
    os.chdir("trinity")
    programBIN = global_config["Tools"][assembler]["bin"] + \
            "util/align_and_estimate_abundance.pl"
    command = [programBIN]
    command.extend(["--transcripts", "Trinity.fasta"])
    command.extend(["--seqType", "fq"])
    for library, libraryInfo in sorted_libraries_by_insert:
        read1 = libraryInfo["pair1"]
        read2 = libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        insert = libraryInfo["insert"]
        std = libraryInfo["std"]
        if read2 is not None and orientation == "innie":
            command.append("--left")
            command.append("{}".format(os.path.splitext(read1)[0]))
            command.append("--right")
            command.append("{}".format(os.path.splitext(read2)[0]))

    command.extend(["--aln_method", "bowtie"])
    command.extend(["--est_method", "RSEM"])
    command.append("--debug")
    command.append("--trinity_mode")
    command.append("--prep_reference")

    if "threads" in sample_config:
        command.extend(["--thread_count", str(sample_config["threads"])])
    print(" ".join(command))
    returnValue = subprocess.call(" ".join(command), stdout=assembler_stdOut,
            stderr=assembler_stdErr, shell=True)

    #now copy results
    os.chdir("..")
    subprocess.call(["cp", "trinity/Trinity.fasta",
        "{}.fasta".format(outputName)])
    subprocess.call(["cp", "trinity/RSEM.isoforms.results",
        "{}.isoforms.results".format(outputName)])
    subprocess.call(["cp", "trinity/RSEM.genes.results",
        "{}.genes.results".format(outputName)])
    os.chdir(currentDirectory)
    return sample_config
Exemplo n.º 6
0
def _run_trinity(global_config, sample_config, sorted_libraries_by_insert):
    print("running trinity ...")
    assembler = "trinity"
    outputName = sample_config["output"]
    currentDirectory = os.getcwd()
    assemblyDirectory = os.path.join(currentDirectory, assembler)
    if common.directory_exists(assemblyDirectory):
        return sample_config
    os.chdir(assemblyDirectory)  # now I am in the assembly directory
    sorted_libraries_by_insert = common.prepare_folder_structure(
        sorted_libraries_by_insert)
    # in masurca case there is no exectuable as a make file must be created
    programBIN = global_config["Tools"][assembler]["bin"] + "Trinity"
    program_options = global_config["Tools"][assembler]["options"]
    if assembler in sample_config:
        program_options = sample_config[assembler]
    ########### HERE IT START THE SPECIFIC ASSEMBLER PART

    command = [programBIN]
    command.extend(["--seqType", "fq"])
    command.extend(["--JM", "100G"])
    if "threads" in sample_config:
        command.extend(["--CPU", str(sample_config["threads"])])

    for library, libraryInfo in sorted_libraries_by_insert:
        read1 = libraryInfo["pair1"]
        read2 = libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        insert = libraryInfo["insert"]
        std = libraryInfo["std"]
        if read2 is None:
            command.append("--single")
            command.append("{}".format(read1))
        elif orientation == "innie":
            command.append("--left")
            command.append("{}".format(read1))
            command.append("--right")
            command.append("{}".format(read2))
        else:
            print("trinity: somthing wrong or unexpected in the sample",
                  "config file")
            return sample_config
    command.extend(["--output", "trinity"])
    assembler_stdOut = open("trinity.stdOut", "w")
    assembler_stdErr = open("trinity.stdErr", "w")
    print(" ".join(command))

    returnValue = subprocess.call(" ".join(command),
                                  stdout=assembler_stdOut,
                                  stderr=assembler_stdErr,
                                  shell=True)

    # now align reads back to transcripts and estimate abundance
    os.chdir("trinity")
    programBIN = global_config["Tools"][assembler]["bin"] + \
            "util/align_and_estimate_abundance.pl"
    command = [programBIN]
    command.extend(["--transcripts", "Trinity.fasta"])
    command.extend(["--seqType", "fq"])
    for library, libraryInfo in sorted_libraries_by_insert:
        read1 = libraryInfo["pair1"]
        read2 = libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        insert = libraryInfo["insert"]
        std = libraryInfo["std"]
        if read2 is not None and orientation == "innie":
            command.append("--left")
            command.append("{}".format(os.path.splitext(read1)[0]))
            command.append("--right")
            command.append("{}".format(os.path.splitext(read2)[0]))

    command.extend(["--aln_method", "bowtie"])
    command.extend(["--est_method", "RSEM"])
    command.append("--debug")
    command.append("--trinity_mode")
    command.append("--prep_reference")

    if "threads" in sample_config:
        command.extend(["--thread_count", str(sample_config["threads"])])
    print(" ".join(command))
    returnValue = subprocess.call(" ".join(command),
                                  stdout=assembler_stdOut,
                                  stderr=assembler_stdErr,
                                  shell=True)

    #now copy results
    os.chdir("..")
    subprocess.call(
        ["cp", "trinity/Trinity.fasta", "{}.fasta".format(outputName)])
    subprocess.call([
        "cp", "trinity/RSEM.isoforms.results",
        "{}.isoforms.results".format(outputName)
    ])
    subprocess.call([
        "cp", "trinity/RSEM.genes.results",
        "{}.genes.results".format(outputName)
    ])
    os.chdir(currentDirectory)
    return sample_config