Пример #1
0
def runSeqGen(workingFile, srp_hap_file, srp_tree_file, debug):

    seqgen_infile = workingFile + "_seqgen.phylip"
    seqgen = runExtProg(seqgenDir + "./seq-gen", pdir=seqgenDir, length=3)
    seqgen.set_param_at("-mHKY", 1)
    seqgen.set_param_at("-t2", 2)
    seqgen.set_param_at("-k1", 3)
    #     seqgen.set_param_at("-d0.1", 4)
    # seqgen.set_param_at("-s0.00001", 4)
    seqgen.set_stdin(seqgen_infile)

    all_unique = False
    repeat = 0
    while not all_unique:
        if repeat == 100:
            runBSSC(workingFile, srp_tree_file, debug)
            print "==========rerun BSSC========"
            repeat = 0
        repeat += 1
        #        print repeat
        seqgen.run(0)
        all_unique = check_unique_sequences(seqgen)

    temp_handle = open(workingFile + "_seqgen_out.phylip", "w")
    temp_handle.write(seqgen.output)
    temp_handle.close()

    SeqIO.convert(workingFile + "_seqgen_out.phylip", "phylip",
                  workingFile + ".fasta", "fasta")
    shutil.copy(workingFile + ".fasta", srp_hap_file)
Пример #2
0
def runSeqGen(workingFile, srp_hap_file, srp_tree_file, debug):

    seqgen_infile = workingFile + "_seqgen.phylip"
    seqgen = runExtProg(seqgenDir + "./seq-gen", pdir=seqgenDir, length=3)
    seqgen.set_param_at("-mHKY", 1)
    seqgen.set_param_at("-t2", 2)
    seqgen.set_param_at("-k1", 3)
#     seqgen.set_param_at("-d0.1", 4)
    # seqgen.set_param_at("-s0.00001", 4)
    seqgen.set_stdin(seqgen_infile)

    all_unique = False
    repeat = 0
    while not all_unique:
        if repeat == 100:
            runBSSC(workingFile, srp_tree_file, debug)
            print "==========rerun BSSC========"
            repeat = 0
        repeat += 1
#        print repeat
        seqgen.run(0)
        all_unique = check_unique_sequences(seqgen)


    temp_handle = open(workingFile + "_seqgen_out.phylip", "w")
    temp_handle.write(seqgen.output)
    temp_handle.close()

    SeqIO.convert(workingFile + "_seqgen_out.phylip", "phylip", workingFile + ".fasta", "fasta")
    shutil.copy(workingFile + ".fasta", srp_hap_file)
Пример #3
0
def runSam2Fasta(workingFile,
                 result_file_prefix,
                 samfile_prefix,
                 srp_read_prefix,
                 error_free_model=True,
                 debug=False):

    reference_file = workingFile + ".cons"
    srp_temp_file = workingFile + "_temp.fasta"
    sam_outfile = workingFile + samfile_prefix + ".sam"

    #     sam2fasta = runExtProg(softwareDir + "./sam2fasta.py", length=3)
    sam2fasta = runExtProg("./sam2fasta_mod.py", length=3)
    sam2fasta.set_param_at(reference_file, 1)
    sam2fasta.set_param_at(sam_outfile, 2)
    sam2fasta.set_param_at(srp_temp_file, 3)
    sam2fasta.run(debug)

    shutil.copy(sam_outfile, result_file_prefix + samfile_prefix + ".sam")
    shutil.copy(
        srp_temp_file,
        result_file_prefix + srp_read_prefix + samfile_prefix + ".fasta")

    if error_free_model:
        sam_outfile = workingFile + samfile_prefix + "_errFree.sam"
        sam2fasta.set_param_at(sam_outfile, 2)
        sam2fasta.run(debug)

        shutil.copy(sam_outfile,
                    result_file_prefix + samfile_prefix + "_errFree.sam")
        shutil.copy(
            srp_temp_file, result_file_prefix + srp_read_prefix +
            samfile_prefix + "_errFree.fasta")
Пример #4
0
def runBSSC(workingFile, srp_tree_file, debug):

    bssc = runExtProg(bsscDir + "BSSC_original", pdir=bsscDir, length=3)
    bssc.set_param_at("-f", 1)
    bssc.set_param_at(workingFile + "_BSSC.par", 2)
    bssc.set_param_at(1, 3)

    bssc_paup_result = workingFile + "_BSSC.paup"
#     bssc_tree_result = workingFile + "_true_trees.trees"

    try:
        os.remove(bssc_paup_result)
#         os.remove(workingFile + "_0.pau")
    except OSError:
        pass

    while not os.path.exists(bssc_paup_result):
        bssc.run()

#     input_handle = open(bssc_tree_result, "rU")
    input_handle = open(bssc_paup_result, "rU")
    for line in input_handle:
        if line.find("tree true_tree_1") > 0:
            line = line.strip()
            start = line.index("U]") + 3
            treeString = line[start:]
            tree = Phylo.read(StringIO(treeString), 'newick')
    input_handle.close()

    for clade in tree.find_clades():
        if clade.name:
            match = re.match(MATCH_TREE_NODE, clade.name)
            if match:
                index = match.group(2)
                clade.name = "hap_" + str(int(index) - 1)


    input_handle = open(bssc_paup_result, "rU")
    sequences = AlignIO.read(input_handle, "nexus")
    input_handle.close()
    seq = sequences[0]

    ref_handle = open(workingFile + ".cons", "w")
    ref_handle.write(">%s\n%s\n" % ("Ref", seq.seq))
    ref_handle.close()

    output_handle = open(workingFile + "_seqgen.phylip", "w")
    output_handle.write("1 1200\n")
    output_handle.write("%s %s\n" % ("ancestor", seq.seq))
    output_handle.write("1\n")
    Phylo.write(tree, output_handle, "newick")
    output_handle.close()

    output_handle = open(srp_tree_file, "w")
    Phylo.write(tree, output_handle, "newick")
    output_handle.close()
Пример #5
0
def runBSSC(workingFile, srp_tree_file, debug):

    bssc = runExtProg(bsscDir + "BSSC_original", pdir=bsscDir, length=3)
    bssc.set_param_at("-f", 1)
    bssc.set_param_at(workingFile + "_BSSC.par", 2)
    bssc.set_param_at(1, 3)

    bssc_paup_result = workingFile + "_BSSC.paup"
    #     bssc_tree_result = workingFile + "_true_trees.trees"

    try:
        os.remove(bssc_paup_result)
#         os.remove(workingFile + "_0.pau")
    except OSError:
        pass

    while not os.path.exists(bssc_paup_result):
        bssc.run()

#     input_handle = open(bssc_tree_result, "rU")
    input_handle = open(bssc_paup_result, "rU")
    for line in input_handle:
        if line.find("tree true_tree_1") > 0:
            line = line.strip()
            start = line.index("U]") + 3
            treeString = line[start:]
            tree = Phylo.read(StringIO(treeString), 'newick')
    input_handle.close()

    for clade in tree.find_clades():
        if clade.name:
            match = re.match(MATCH_TREE_NODE, clade.name)
            if match:
                index = match.group(2)
                clade.name = "hap_" + str(int(index) - 1)

    input_handle = open(bssc_paup_result, "rU")
    sequences = AlignIO.read(input_handle, "nexus")
    input_handle.close()
    seq = sequences[0]

    ref_handle = open(workingFile + ".cons", "w")
    ref_handle.write(">%s\n%s\n" % ("Ref", seq.seq))
    ref_handle.close()

    output_handle = open(workingFile + "_seqgen.phylip", "w")
    output_handle.write("1 1200\n")
    output_handle.write("%s %s\n" % ("ancestor", seq.seq))
    output_handle.write("1\n")
    Phylo.write(tree, output_handle, "newick")
    output_handle.close()

    output_handle = open(srp_tree_file, "w")
    Phylo.write(tree, output_handle, "newick")
    output_handle.close()
Пример #6
0
def runShrimp(workingFile, shrimp_infile_prefix, shrimp_prefix, debug):
    reference_file = workingFile + ".cons"

    shrimp_infile = workingFile + shrimp_infile_prefix
    sam_outfile = workingFile + shrimp_prefix + ".sam"

    shrimp = runExtProg(shrimpDir + "gmapper-ls", length=3)
    shrimp.set_param_at(shrimp_infile, 1)
    shrimp.set_param_at(reference_file, 2)
    shrimp.set_param_at("-h 25%", 3)
    shrimp.add_switch("--qv-offset 33")  # # for ART output
    #     shrimp.add_switch("--ignore-qvs")
    #    shrimp.set_param_at("-P", 3)
    shrimp.run(debug)

    outfile_handle = open(sam_outfile, "w")
    outfile_handle.write(shrimp.output)
Пример #7
0
def runMetaSim(workingFile, workingDir, debug):
    metaSim_infile = workingFile + ".fasta"

    if os.path.exists(workingFile + "-454.fna"):
        os.remove(workingFile + "-454.fna")

    metaSim = runExtProg(metaSimDir + "./MetaSim", pdir=metaSimDir, length=9)
    metaSim.set_param_at("cmd", 1)
    metaSim.set_param_at("--454", 2)
    metaSim.set_param_at("-f250", 3)
    metaSim.set_param_at("-t25", 4)
    metaSim.set_param_at("-r2000", 5)
    metaSim.set_param_at("-c", 6)
    metaSim.set_param_at("-d", 7)
    metaSim.set_param_at(workingDir, 8)
    metaSim.set_param_at(metaSim_infile, 9)
    metaSim.run(debug)
Пример #8
0
def runShrimp(workingFile, shrimp_infile_prefix, shrimp_prefix, debug):
    reference_file = workingFile + ".cons"

    shrimp_infile = workingFile + shrimp_infile_prefix
    sam_outfile = workingFile + shrimp_prefix + ".sam"

    shrimp = runExtProg(shrimpDir + "gmapper-ls", length=3)
    shrimp.set_param_at(shrimp_infile, 1)
    shrimp.set_param_at(reference_file, 2)
    shrimp.set_param_at("-h 25%", 3)
    shrimp.add_switch("--qv-offset 33")  # # for ART output
#     shrimp.add_switch("--ignore-qvs")
#    shrimp.set_param_at("-P", 3)
    shrimp.run(debug)

    outfile_handle = open(sam_outfile, "w")
    outfile_handle.write(shrimp.output)
Пример #9
0
def runART(workingFile, art_output_prefix, error_free_model=True, debug=False):
    art_infile = workingFile + ".fasta"

    art = runExtProg(artDir + "./art_illumina", pdir=artDir)
    art.set_param_at("-sam", 1)
    art.add_switch("-i %s" % art_infile)
    art.add_switch("-l 75")
    art.add_switch("-f 20")
    art.add_param("-o %s%s" % (workingFile, art_output_prefix))
    if error_free_model:
        art.add_switch("-ef")
#     art.add_param("-na")
#    print art.get_extract_switch()
#     art.set_param_at("-c", 6)
#     art.set_param_at("-d", 7)
#     art.set_param_at(workingDir, 8)
#     art.set_param_at(art_infile, 9)
    art.run(debug)
Пример #10
0
def runART(workingFile, art_output_prefix, error_free_model=True, debug=False):
    art_infile = workingFile + ".fasta"

    art = runExtProg(artDir + "./art_illumina", pdir=artDir)
    art.set_param_at("-sam", 1)
    art.add_switch("-i %s" % art_infile)
    art.add_switch("-l 75")
    art.add_switch("-f 20")
    art.add_param("-o %s%s" % (workingFile, art_output_prefix))
    if error_free_model:
        art.add_switch("-ef")
#     art.add_param("-na")
#    print art.get_extract_switch()
#     art.set_param_at("-c", 6)
#     art.set_param_at("-d", 7)
#     art.set_param_at(workingDir, 8)
#     art.set_param_at(art_infile, 9)
    art.run(debug)
Пример #11
0
def runMetaSim(workingFile, workingDir, debug):
    metaSim_infile = workingFile + ".fasta"

    if os.path.exists(workingFile + "-454.fna"):
        os.remove(workingFile + "-454.fna")


    metaSim = runExtProg(metaSimDir + "./MetaSim", pdir=metaSimDir, length=9)
    metaSim.set_param_at("cmd", 1)
    metaSim.set_param_at("--454", 2)
    metaSim.set_param_at("-f250", 3)
    metaSim.set_param_at("-t25", 4)
    metaSim.set_param_at("-r2000", 5)
    metaSim.set_param_at("-c", 6)
    metaSim.set_param_at("-d", 7)
    metaSim.set_param_at(workingDir, 8)
    metaSim.set_param_at(metaSim_infile, 9)
    metaSim.run(debug)
Пример #12
0
def runSam2Fasta(workingFile, result_file_prefix, samfile_prefix, srp_read_prefix, error_free_model=True, debug=False):

    reference_file = workingFile + ".cons"
    srp_temp_file = workingFile + "_temp.fasta"
    sam_outfile = workingFile + samfile_prefix + ".sam"

#     sam2fasta = runExtProg(softwareDir + "./sam2fasta.py", length=3)
    sam2fasta = runExtProg("./sam2fasta_mod.py", length=3)
    sam2fasta.set_param_at(reference_file, 1)
    sam2fasta.set_param_at(sam_outfile, 2)
    sam2fasta.set_param_at(srp_temp_file, 3)
    sam2fasta.run(debug)


    shutil.copy(sam_outfile, result_file_prefix + samfile_prefix + ".sam")
    shutil.copy(srp_temp_file, result_file_prefix + srp_read_prefix + samfile_prefix + ".fasta")

    if error_free_model:
        sam_outfile = workingFile + samfile_prefix + "_errFree.sam"
        sam2fasta.set_param_at(sam_outfile, 2)
        sam2fasta.run(debug)

        shutil.copy(sam_outfile, result_file_prefix + samfile_prefix + "_errFree.sam")
        shutil.copy(srp_temp_file, result_file_prefix + srp_read_prefix + samfile_prefix + "_errFree.fasta")