Пример #1
0
def check_srna(input, output):
    """
    Check whether the input species in the SSU rRNA database
    :param input: the species names
    :param output: the directory contains check result
    """
    if not os.path.exists(output):
        os.makedirs(output)
    fw_name = "PhySpeTree_srna_checked.txt"
    open_path = os.path.join(output, fw_name)
    fw = open(open_path, 'wb')

    input_path = checkFile(input)
    input_list = readIputFile(input_path)

    in_match, no_match = check_organism(input_list, "kegg_to_silva_id.txt")

    if no_match.__len__() == 0:
        print(print_style('INFO: ', fore='green') + "All species are match in SILVA DATABASE")
        fw.write("All species are match in SILVA DATABASE")
    else:
        print(print_style('WARNING: ', fore='red') + "The following species are not supported by SILVA DATABASE:")
        fw.write("The following species are not supported by SILVA DATABASE:\n")
        for line in no_match:
            print(line)
            fw.write(line + "\n")

    print("Checked  whether the input species names in SILVA DATABASE completed.")
    print("Checked result is store in {0}".format(open_path))
    fw.close()
Пример #2
0
def annotatingLabels(input, output):
    """
Change labels
    :param input: input a files contain abb names
    :param output: output directory path
    :return: None
    """
    if not os.path.exists(output):
        os.makedirs(output)
    fw_name = "labels.txt"
    open_path = os.path.join(output, fw_name)
    fw = open(open_path, 'wb')
    fw.write('LABELS\nSEPARATOR TAB\nDATA\n')
    inputfile = checkFile(input)
    input_list = readIputFile(inputfile)
    tax_list = taxlist()
    for i in input_list:
        for j in tax_list:
            if i == j[0]:
                fw.write("{0}\t{1}\n".format(j[0], j[1]))
            else:
                pass
    fw.close()
    print("Change abbreviation names to full names complete")
    print("change labels file was save in {0}".format(open_path))
Пример #3
0
def check_hcp(input, output):
    """
    Check whether the input species in the hcp database
    :param input: the species names
    :param output: the directory contains check result
    """
    if not os.path.exists(output):
        os.makedirs(output)
    fw_name = "PhySpeTree_hcp_checked.txt"
    open_path = os.path.join(output, fw_name)
    fw = open(open_path, 'w')

    input_path = checkFile(input)
    input_list = readIputFile(input_path)

    in_match, no_match = check_organism(input_list, "organism_kegg_to_tax.txt")

    if no_match.__len__() == 0:
        print(
            print_style('INFO: ', fore='green') +
            "All species are match in KEGG DATABASE.")
        fw.write("All species are match in KEGG DATABASE")
    else:
        print(
            print_style('WARNING: ', fore='red') +
            "The following species are not supported by KEGG DATABASE:")
        fw.write("The following species are not supported by KEGG DATABASE:\n")
        for line in no_match:
            print(line)
            fw.write(line + "\n")

    print(
        "Checked  whether the input species names in KEGG DATABASE completed.")
    print("Checked result is store in {0}".format(open_path))
    fw.close()
Пример #4
0
def check_ehcp(input, output):
    """
    Check the highly conserved proteins will be prepared.
    :param input: the species abbreviated names
    :param output: a directory contain check result
    """
    # prepare a file write result
    if not os.path.exists(output):
        os.makedirs(output)
    fw_name = "PhySpeTree_echp_extend.txt"
    open_path = os.path.join(output, fw_name)
    fw = open(open_path, 'wb')

    # check input names
    input_path = checkFile(input)
    input_list = readIputFile(input_path)
    colname = getcolname()
    relist, match_ko = getspecies(input_list, colname)
    p = 1
    for line in match_ko:
        hcpname = hcp_name(line.strip())
        massage = "'{0}' ----------------------------------> p{1}.fasta\n".format(hcpname, str(p))
        print(massage)
        fw.write(massage)
        p += 1
    print("Checked extend highly conserved proteins is completed.")
    print("Checked result was store in {0}".format(open_path))
    fw.close()
Пример #5
0
def check_ehcp(input, output):
    """
    Check the highly conserved proteins will be prepared.
    :param input: the species abbreviated names
    :param output: a directory contain check result
    """
    # prepare a file write result
    if not os.path.exists(output):
        os.makedirs(output)
    fw_name = "PhySpeTree_echp_extend.txt"
    open_path = os.path.join(output, fw_name)
    fw = open(open_path, 'w')

    # check input names
    input_path = checkFile(input)
    input_list = readIputFile(input_path)
    colname = getcolname()
    relist, match_ko = getspecies(input_list, colname)
    p = 1
    for line in match_ko:
        hcpname = hcp_name(line.strip())
        massage = "'{0}' ----------------------------------> p{1}.fasta\n".format(
            hcpname, str(p))
        print(massage)
        fw.write(massage)
        p += 1
    print("Checked extend highly conserved proteins is completed.")
    print("Checked result was store in {0}".format(open_path))
    fw.close()
Пример #6
0
def starting_esrna(in_put, out_put, args_muscle, args_muscle_p, args_clustalw,
                   args_clustalw_p, args_gblocks, args_raxml, args_raxml_p,
                   args_fasttree, args_fasttree_p, args_thread,
                   args_extenddata):
    '''reconstruct phylogenetic tree by ssu rna extend method'''
    extend_check = checkFile(args_extenddata)
    ssu_input = checkSilvaOrganism(in_put)
    out_retrieve = retrieve16srna(ssu_input, out_put)
    retrieve_srna_path = os.path.join(out_retrieve, 'rna_sequence.fasta')

    fw = open(retrieve_srna_path, 'ab')
    with open(extend_check) as read:
        for line in read:
            fw.write(line)
    fw.close()

    # set default aligned by muscle if not specify clustalw
    if args_clustalw:
        out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p)
    elif args_muscle:
        out_alg = domuscle(out_retrieve, out_put, args_muscle_p)
    # gblocks
    if args_gblocks is gblockspara_pro:
        args_gblocks = gblockspara_dna
    out_gblock = dogblocks(out_alg, args_gblocks)
    out_f2p = fasta2phy(out_gblock)
    # reconstruct tree
    if args_fasttree:
        args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip()
        doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread)
    elif args_raxml:
        if args_raxml_p is raxmlpara_pro:
            args_raxml_p = raxmlpara_dna
            doraxml(out_f2p, out_put, args_raxml_p, args_thread)
Пример #7
0
def colorLabel(input, output, taxon):
    """
    Main function to color label
    :param input: input file names
    :param output: output directory path
    :param taxon: choice annotation by ['kingdom', 'phylum', 'class', 'order']
    :return: no
    """

    if not os.path.exists(output):
        os.makedirs(output)
    # writ file name
    fw_name = "labels_color_by_" + taxon + ".txt"
    open_path = os.path.join(output, fw_name)
    fw = open(open_path, 'w')
    # write annotation range head
    fw.write(cite_label)
    fw.write('\n')
    fw.write('TREE_COLORS\nSEPARATOR TAB\nDATA\n')
    # check and get input list
    inputfile = checkFile(input)
    input_list = readIputFile(inputfile)
    # get match list and annotation dict to annotation
    match_list, anno_dict = matchInput(input_list, taxon)
    # write to annotation to file
    for line in match_list:
        color = anno_dict[line[1]]
        write_data = "{0}\tlabel\t{1}\n".format(line[0], color)
        fw.write(write_data)
    fw.close()
    print("Color labels by {0} was complete.".format(taxon))
    print("Color labels annotation was save in {0}".format(open_path))
Пример #8
0
def annotatingLabels(input, output):
    """
Change labels
    :param input: input a files contain abb names
    :param output: output directory path
    :return: None
    """

    if not os.path.exists(output):
        os.makedirs(output)
    fw_name = "labels.txt"
    open_path = os.path.join(output, fw_name)
    fw = open(open_path, 'w')
    fw.write(cite)
    fw.write('\n')
    fw.write('LABELS\nSEPARATOR TAB\nDATA\n')
    inputfile = checkFile(input)
    input_list = readIputFile(inputfile)

    if is_number(input_list[0]):
        tax_list = taxlistid()
    else:
        tax_list = taxlist()
    for i in input_list:
        for j in tax_list:
            if i == j[0]:
                fw.write("{0}\t{1}\n".format(j[0], j[1]))
    fw.close()
    print("Change abbreviation names to full names complete")
    print("change labels file was save in {0}".format(open_path))
Пример #9
0
def colorRange(input, output, taxon):
    """
    Main function to color range
    :param input: input file names
    :param output: output directory path
    :param taxon: choice annotation by ['kingdom', 'phylum', 'class', 'order']
    :return: no
    """
    if not os.path.exists(output):
        os.makedirs(output)
    # writ file name
    fw_name = "range_color_by_" + taxon + ".txt"
    open_path = os.path.join(output, fw_name)
    fw = open(open_path, 'wb')
    # write annotation range head
    fw.write('TREE_COLORS\nSEPARATOR TAB\nDATA\n')
    # check and get input list
    inputfile = checkFile(input)
    input_list = readIputFile(inputfile)
    # get match list and annotation dict to annotation
    match_list, anno_dict = matchInput(input_list, taxon)
    # write to annotation to file
    for line in match_list:
        color = anno_dict[line[1]]
        write_data = "{0}\trange\t{1}\t{2}\n".format(line[0], color, line[1])
        fw.write(write_data)
    fw.close()
    print("Color range by {0} was complete.".format(taxon))
    print("Color range annotation was save in {0}".format(open_path))
Пример #10
0
def starting_esrna(in_put, out_put, args_muscle, args_muscle_p, args_clustalw,
                   args_clustalw_p, args_mafft, args_mafft_p, args_gblocks,
                   args_gblocks_p, args_trimal, args_trimal_p, args_raxml,
                   args_raxml_p, args_fasttree, args_fasttree_p, args_iqtree,
                   args_iqtree_p, args_thread, args_extenddata, args_db):
    '''reconstruct phylogenetic tree by ssu rna extend method'''
    extend_check = checkFile(args_extenddata)
    ssu_input, recovery_dic = checkSilvaOrganism(in_put)
    start = time.time()
    out_retrieve = retrieve16srna(ssu_input, out_put, args_db)
    end = time.time()
    auto_build_log.info(
        'Retrieving SSU rRNA sequences used time: {} Seconds'.format(end -
                                                                     start))
    if not recovery_dic == []:
        recovery_silva(out_retrieve, recovery_dic, ssu_input)
    retrieve_srna_path = os.path.join(out_retrieve, 'rna_sequence.fasta')

    fw = open(retrieve_srna_path, 'a')
    with open(extend_check) as read:
        for line in read:
            fw.write(line)
    fw.close()

    # set default aligned by muscle if not specify clustalw
    start2 = time.time()
    if args_clustalw:
        out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft(out_retrieve, out_put, args_mafft_p)
    elif args_muscle:
        out_alg = domuscle(out_retrieve, out_put, args_muscle_p)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_alg, args_trimal_p)
    elif args_gblocks:
        if args_gblocks_p is gblockspara_pro:
            args_gblocks_p = gblockspara_dna
            out_gblock = dogblocks(out_alg, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)
    # reconstruct tree
    if args_fasttree:
        args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip()
        doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        if args_raxml_p is raxmlpara_pro:
            args_raxml_p = raxmlpara_dna
            doraxml(out_f2p, out_put, args_raxml_p, args_thread)
    end2 = time.time()
    auto_build_log.info(
        'Constructing species tree used time: {} Seconds'.format(end2 -
                                                                 start2))
Пример #11
0
def starting_esrna(in_put, out_put,
                   args_muscle, args_muscle_p,
                   args_clustalw, args_clustalw_p,
                   args_mafft, args_mafft_p,
                   args_gblocks, args_gblocks_p,
                   args_trimal, args_trimal_p,
                   args_raxml, args_raxml_p,
                   args_fasttree, args_fasttree_p,
                   args_iqtree,args_iqtree_p,
                   args_thread, args_extenddata):
    '''reconstruct phylogenetic tree by ssu rna extend method'''
    extend_check = checkFile(args_extenddata)
    ssu_input,recovery_dic = checkSilvaOrganism(in_put)
    out_retrieve = retrieve16srna(ssu_input, out_put)
    if not recovery_dic == {}:
        recovery(out_retrieve,recovery_dic)
    retrieve_srna_path = os.path.join(out_retrieve, 'rna_sequence.fasta')

    fw = open(retrieve_srna_path, 'ab')
    with open(extend_check) as read:
        for line in read:
            fw.write(line)
    fw.close()

    # set default aligned by muscle if not specify clustalw
    if args_clustalw:
        out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft(out_retrieve, out_put, args_mafft_p)
    elif args_muscle:
        out_alg = domuscle(out_retrieve, out_put, args_muscle_p)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_alg, args_trimal_p)
    elif args_gblocks:
        if args_gblocks_p is gblockspara_pro:
            args_gblocks_p = gblockspara_dna
            out_gblock = dogblocks(out_alg, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)
    # reconstruct tree
    if args_fasttree:
        args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip()
        doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        if args_raxml_p is raxmlpara_pro:
            args_raxml_p = raxmlpara_dna
            doraxml(out_f2p, out_put, args_raxml_p, args_thread)