def snp_count_species(lst_file, folder_rnasnp_file):
    # read gene id sequence
    path_dir = os.path.dirname(os.path.abspath(lst_file))
    lst_name_bare = os.path.splitext(lst_file)[0]

    jobids = dsh.get_gene_id_sequnce_from_lst(lst_file)
    file_name_vector = [
        os.path.join(folder_rnasnp_file, ".".join([jobid, "rnasnp"]))
        for jobid in jobids
    ]

    is_greater_than = False
    data_criterion = "pvalue2"
    threshold = 0.1

    gene_snp_count = []
    for single_file in file_name_vector:
        if os.path.exists(single_file):
            gene_snp_count.append(
                str(
                    count_snp_per_file(single_file, is_greater_than, threshold,
                                       data_criterion) / 3.0))
        else:
            gene_snp_count.append("NA")
    # writer a new lst file for snp count in some species
    with open(os.path.join(path_dir, lst_name_bare + "SnpCount.lst"),
              "w") as exporter:
        exporter.write("geneid\tsnpCountPerGene\n")
        for index_g, jobid in enumerate(jobids):
            line_to_export = "%s\t%s\n" % (jobid, gene_snp_count[index_g])
            exporter.write(line_to_export)
def gap_check_traversal(input_folder, output_file, given_sequence_file=""):
    """
    check gap proportion on all files in "input_folder", and export as lst file to "output_file"
    with (optional) given_sequence_file

    :param input_folder:
    :param output_file:
    :param given_sequence:
    :return:
    """
    curdir_abs = os.path.abspath(os.curdir)

    if not given_sequence_file:
        jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join(given_sequence_file))
    else:
        raise dsh.idSequenceUnKnow
        #jobids = sorted([file_input for file_input in os.listdir(input_folder) if ".input" == os.path.splitext(file_input)[-1]])

    with open(output_file, "w") as writer:
        writer.write("gaps\tfull\n")
        for jobid in jobids:
            input_name = os.path.join(input_folder, jobid + ".input")
            aln_name = os.path.join(input_folder, jobid + ".aln")
            if os.path.exists(input_name):
                num_gap, full_nt_length = gap_counting_input(input_name)
            elif os.path.exists(aln_name):
                num_gap, full_nt_length = gap_counting_input(aln_name)
            else:
                raise dsh.WrongFileTypeForGapCheck
            writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length)))

    os.chdir(curdir_abs)
def main1():
    ## get the gaps number and full_length .
    aln_path = "/home/zerodel/Workspace/Yeast/result/main_full_length"
    os.chdir(aln_path)
    aln_files = [file1 for file1 in os.listdir(aln_path) if ".aln" == os.path.splitext(file1)[-1]]
    jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join("/home/zerodel/Workspace/Yeast/result/ExtractedParameter", "gtr.lst"))
    with open("/home/zerodel/Workspace/Yeast/result/ExtractedParameter/gapyeast.lst", "w") as writer:
        writer.write("gaps\tfull\n")
        for jobid in jobids:
            num_gap, full_nt_length = gap_counting_aln(jobid + ".aln")
            writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length)))
def main3():
    ## get the gaps number and full_length .
    aln_path = "/home/zerodel/Workspace/sp2"
    os.chdir(aln_path)
    aln_files = [file1 for file1 in os.listdir(aln_path) if ".aln" == os.path.splitext(file1)[-1]]
    jobids = dsh.get_gene_id_sequnce_from_lst(os.path.join("/home/zerodel/GitProjects/python-rna-structure/data/para", "nest2.lst"))
    with open("/home/zerodel/GitProjects/python-rna-structure/data/para/gap10.lst", "w") as writer:
        writer.write("gaps\tfull\n")
        for jobid in jobids:
            num_gap, full_nt_length = gap_counting_input(jobid + ".input")
            writer.write("%s\t%s\n" % (str(num_gap), str(full_nt_length)))
def snp_count_species(lst_file, folder_rnasnp_file):
    # read gene id sequence
    path_dir = os.path.dirname(os.path.abspath(lst_file))
    lst_name_bare = os.path.splitext(lst_file)[0]

    jobids = dsh.get_gene_id_sequnce_from_lst(lst_file)
    file_name_vector = [os.path.join(folder_rnasnp_file, ".".join([jobid, "rnasnp"])) for jobid in jobids]

    is_greater_than = False
    data_criterion = "pvalue2"
    threshold = 0.1

    gene_snp_count = []
    for single_file in file_name_vector:
        if os.path.exists(single_file):
            gene_snp_count.append(str(count_snp_per_file(single_file, is_greater_than, threshold, data_criterion)/3.0))
        else:
            gene_snp_count.append("NA")
    # writer a new lst file for snp count in some species
    with open(os.path.join(path_dir, lst_name_bare + "SnpCount.lst"), "w") as exporter:
        exporter.write("geneid\tsnpCountPerGene\n")
        for index_g, jobid in enumerate(jobids):
            line_to_export = "%s\t%s\n" % (jobid, gene_snp_count[index_g])
            exporter.write(line_to_export)