Exemplo n.º 1
0
def get_para_1004():
    # get parameters from fly data . model : gtr, nest, parameter: likelihood, aic, w, psi

    print "---- "
    dsh.extract_parameter(dot_result_folder, para_path, ["gtr"], gtr_list_para)
    dsh.extract_parameter(dot_result_folder, para_path, ["nest"],
                          nest_list_para)
Exemplo n.º 2
0
 def test_gap_check_traversal(self):
     folder_input = os.path.abspath(os.curdir)
     output_gap_lst = "gap_report.txt"
     try:
         dsh.gap_check_traversal(folder_input, output_gap_lst)
     except dsh.idSequenceUnKnow:
         print "ok , exception raisen"
     else:
         self.fail("nope , no exception when there should have some ")
Exemplo n.º 3
0
    def test_available_traversal(self):
        folder_input = os.path.abspath(os.curdir)
        output_gap_lst = "available_sites.txt"
        job_ids = ["tmp"]
        try:
            dsh.report_traversal_available_site(input_folder=folder_input,output_file=output_gap_lst)
        except dsh.idSequenceUnKnow:
            pass
        else:
            self.fail("id unknow")

        dsh.report_traversal_available_site(input_folder=folder_input,output_file=output_gap_lst,
        jobids=job_ids)

        self.assertTrue(os.path.exists(output_gap_lst))
Exemplo n.º 4
0
def main1():
    paras = ["likelihood", "aic", "w"]
    path_working = "/home/zerodel/Workspace/former10"
    mid = "10"

    gene_all = dsh.transfer_for_r("gtr", mid, paras, path_working)
    gene_all_tmp = dsh.transfer_for_r("gu", mid, paras, path_working)

    if not gene_all == gene_all_tmp:
        print "not same length"

    gene_all_tmp = dsh.transfer_for_r("nest", mid, paras, path_working)

    if not gene_all == gene_all_tmp:
        print "not same length"
Exemplo n.º 5
0
def gap_check_traversal(input_folder, output_file, given_sequence_file=""):
    """
    check gap proportion on all files in "input_folder", and export as lst file to "output_file"
    with (optional) given_sequence_file

    :param input_folder:
    :param output_file:
    :param given_sequence:
    :return:
    """
    curdir_abs = os.path.abspath(os.curdir)

    if not given_sequence_file:
        jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join(given_sequence_file))
    else:
        raise dsh.idSequenceUnKnow
        #jobids = sorted([file_input for file_input in os.listdir(input_folder) if ".input" == os.path.splitext(file_input)[-1]])

    with open(output_file, "w") as writer:
        writer.write("gaps\tfull\n")
        for jobid in jobids:
            input_name = os.path.join(input_folder, jobid + ".input")
            aln_name = os.path.join(input_folder, jobid + ".aln")
            if os.path.exists(input_name):
                num_gap, full_nt_length = gap_counting_input(input_name)
            elif os.path.exists(aln_name):
                num_gap, full_nt_length = gap_counting_input(aln_name)
            else:
                raise dsh.WrongFileTypeForGapCheck
            writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length)))

    os.chdir(curdir_abs)
Exemplo n.º 6
0
def snp_count_species(lst_file, folder_rnasnp_file):
    # read gene id sequence
    path_dir = os.path.dirname(os.path.abspath(lst_file))
    lst_name_bare = os.path.splitext(lst_file)[0]

    jobids = dsh.get_gene_id_sequnce_from_lst(lst_file)
    file_name_vector = [
        os.path.join(folder_rnasnp_file, ".".join([jobid, "rnasnp"]))
        for jobid in jobids
    ]

    is_greater_than = False
    data_criterion = "pvalue2"
    threshold = 0.1

    gene_snp_count = []
    for single_file in file_name_vector:
        if os.path.exists(single_file):
            gene_snp_count.append(
                str(
                    count_snp_per_file(single_file, is_greater_than, threshold,
                                       data_criterion) / 3.0))
        else:
            gene_snp_count.append("NA")
    # writer a new lst file for snp count in some species
    with open(os.path.join(path_dir, lst_name_bare + "SnpCount.lst"),
              "w") as exporter:
        exporter.write("geneid\tsnpCountPerGene\n")
        for index_g, jobid in enumerate(jobids):
            line_to_export = "%s\t%s\n" % (jobid, gene_snp_count[index_g])
            exporter.write(line_to_export)
Exemplo n.º 7
0
def main1():
    ## get the gaps number and full_length .
    aln_path = "/home/zerodel/Workspace/Yeast/result/main_full_length"
    os.chdir(aln_path)
    aln_files = [file1 for file1 in os.listdir(aln_path) if ".aln" == os.path.splitext(file1)[-1]]
    jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join("/home/zerodel/Workspace/Yeast/result/ExtractedParameter", "gtr.lst"))
    with open("/home/zerodel/Workspace/Yeast/result/ExtractedParameter/gapyeast.lst", "w") as writer:
        writer.write("gaps\tfull\n")
        for jobid in jobids:
            num_gap, full_nt_length = gap_counting_aln(jobid + ".aln")
            writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length)))
Exemplo n.º 8
0
def main3():
    ## get the gaps number and full_length .
    aln_path = "/home/zerodel/Workspace/sp2"
    os.chdir(aln_path)
    aln_files = [file1 for file1 in os.listdir(aln_path) if ".aln" == os.path.splitext(file1)[-1]]
    jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join("/home/zerodel/GitProjects/python-rna-structure/data/para", "nest2.lst"))
    with open("/home/zerodel/GitProjects/python-rna-structure/data/para/gap10.lst", "w") as writer:
        writer.write("gaps\tfull\n")
        for jobid in jobids:
            num_gap, full_nt_length = gap_counting_input(jobid + ".input")
            writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length)))
Exemplo n.º 9
0
def main3():
    path_result = "/home/zerodel/Workspace/merge0914/2sp"

    dir_now = os.path.abspath(os.curdir)

    os.chdir(path_result)
    para = "psi"
    model = "nest"
    mid = "2"
    system_order = "grep -i " + para + " *" + model + ".result >" + "../" + model + mid + para + ".txt"
    os.system(system_order)
    path_working = "/home/zerodel/Workspace/merge0914"

    gene_all = dsh.transfer_for_r("nest", mid, ["psi"], path_working)
Exemplo n.º 10
0
def snp_count_species(lst_file, folder_rnasnp_file):
    # read gene id sequence
    path_dir = os.path.dirname(os.path.abspath(lst_file))
    lst_name_bare = os.path.splitext(lst_file)[0]

    jobids = dsh.get_gene_id_sequnce_from_lst(lst_file)
    file_name_vector = [os.path.join(folder_rnasnp_file, ".".join([jobid, "rnasnp"])) for jobid in jobids]

    is_greater_than = False
    data_criterion = "pvalue2"
    threshold = 0.1

    gene_snp_count = []
    for single_file in file_name_vector:
        if os.path.exists(single_file):
            gene_snp_count.append(str(count_snp_per_file(single_file, is_greater_than, threshold, data_criterion)/3.0))
        else:
            gene_snp_count.append("NA")
    # writer a new lst file for snp count in some species
    with open(os.path.join(path_dir, lst_name_bare + "SnpCount.lst"), "w") as exporter:
        exporter.write("geneid\tsnpCountPerGene\n")
        for index_g, jobid in enumerate(jobids):
            line_to_export = "%s\t%s\n" % (jobid, gene_snp_count[index_g])
            exporter.write(line_to_export)
Exemplo n.º 11
0
def prepare4r():
    dsh.transfer_for_r("gtr", "", gtr_list_para,
                       "/home/zerodel/Workspace/Fly/ExtractedParameters")
    dsh.transfer_for_r("nest", "", nest_list_para,
                       "/home/zerodel/Workspace/Fly/ExtractedParameters")
Exemplo n.º 12
0
def test_aln():
    aln_path = "/home/zerodel/Workspace/Fly/align_fly/fly_12_species"
    given_genes = "dyak,dere,dpse,dper,dgri,dmoj,dvir,dwil,dana,dsec,dsim,dmel".split(",")
    dsh.test_gene_species_match(aln_path, given_genes)
Exemplo n.º 13
0
def get_para_1004():
    # get parameters from fly data . model : gtr, nest, parameter: likelihood, aic, w, psi

    print "---- "
    dsh.extract_parameter(dot_result_folder, para_path, ["gtr"], gtr_list_para)
    dsh.extract_parameter(dot_result_folder, para_path, ["nest"], nest_list_para)
Exemplo n.º 14
0
def prepare4r():
    dsh.transfer_for_r("gtr", "", gtr_list_para, "/home/zerodel/Workspace/Fly/ExtractedParameters")
    dsh.transfer_for_r("nest", "", nest_list_para, "/home/zerodel/Workspace/Fly/ExtractedParameters")
Exemplo n.º 15
0
def test_aln():
    aln_path = "/home/zerodel/Workspace/mouse/mouse_8_species"
    given_genes = "macaque,chimp,human,dog,cow,horse,rat,mouse".split(",")
    dsh.test_gene_species_match(aln_path, given_genes)
Exemplo n.º 16
0
def test_aln():
    aln_path = "/home/zerodel/Workspace/Fly/align_fly/fly_12_species"
    given_genes = "dyak,dere,dpse,dper,dgri,dmoj,dvir,dwil,dana,dsec,dsim,dmel".split(
        ",")
    dsh.test_gene_species_match(aln_path, given_genes)
Exemplo n.º 17
0
 def test_gap(self):
     file_hyphy_input = "./tmp.aln"
     availble_sites,full_length = dsh.sum_available_sites(file_hyphy_input)
     self.assertEqual(full_length, 4)
     self.assertEqual(availble_sites,3)
Exemplo n.º 18
0
 def test_gap_traversal2(self):
     folder_input = os.path.abspath(os.curdir)
     output_gap_lst = "gap_report.txt"
     jobids = ["tmp"]
     dsh.gap_check_traversal(input_folder=folder_input, output_file=output_gap_lst,jobids=jobids)
     self.assertTrue(os.path.exists(output_gap_lst))
Exemplo n.º 19
0
def test_aln():
    aln_path = "/home/zerodel/Workspace/mouse/mouse_8_species"
    given_genes = "macaque,chimp,human,dog,cow,horse,rat,mouse".split(",")
    dsh.test_gene_species_match(aln_path, given_genes)