Exemplo n.º 1
0
def run_parsimony_algorithms(current_tree, nodelist):
    global START_TIME
    global CURRENT_TIME
    CURRENT_TIME = datetime.datetime.now().replace(microsecond=0)
    print(colored("---------------- Fitch parsimony ----------------",
                  "green"))
    fitch_MP_tree = deepcopy(current_tree)
    fitch_MP_nodelist = deepcopy(nodelist)
    fitch_parsimony(fitch_MP_tree.clade, fitch_MP_nodelist, 3)
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("---------------- my parsimony ----------------", "green"))
    my_MP_tree = deepcopy(current_tree)
    my_MP_nodelist = deepcopy(nodelist)
    my_parsimony(my_MP_tree.clade, my_MP_nodelist)
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(
        colored("---------------- Sankoff parsimony ----------------",
                "green"))
    sankoff_MP_tree = deepcopy(current_tree)
    sankoff_MP_nodelist = deepcopy(nodelist)
    sankoff_parsimony(sankoff_MP_tree, sankoff_MP_nodelist)
    CURRENT_TIME = print_time(CURRENT_TIME)
    # --------------------------------------------------------
    print(colored("-------- evaluation --------", "green"))
    differences = evaluation(nodelist, fitch_MP_nodelist, my_MP_nodelist,
                             sankoff_MP_nodelist)
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("--------------------------------", "green"))
    return differences
def main():
    global START_TIME
    global CURRENT_TIME
    global nodelist
    
    print(colored("---------------- read tree ----------------", "green"))
    subtree_path = './data/subtree/Eukaryota.tre'
    tree = Phylo.read(subtree_path, 'newick')
    CURRENT_TIME = print_time(CURRENT_TIME)

    print(colored("---------------- read nodelist ----------------", "green"))
    nodelist_path = './data/nodelist/Eukaryota-castor.csv' 
    #                0    1              2       3       4           5
    # nodelist    - [id, originaltag, finaltag, depth, heights, nr_children]
    with open(nodelist_path, 'r') as csv_file:
        reader = csv.reader(csv_file, delimiter=',')
        next(reader, None)      # skip the header
        for row in reader:
            if row != []:
                ott_id = row[0]
                originaltag = row[1]
                finaltag = row[2]
                nodelist.append([ott_id, originaltag, finaltag])
    CURRENT_TIME = print_time(CURRENT_TIME)

    print(colored("---------------- prepare tree ----------------", "green"))
    prepare_tree(tree.clade)
    print(colored("---------------- Save tree ----------------", "green"))
    Phylo.write(tree, './results/Eukaryota_tree-castor.tre', 'newick')
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("--------------------------------", "green"))
    return
Exemplo n.º 3
0
def run_parsimony_algorithms(current_tree, nodelist):
    global START_TIME
    global CURRENT_TIME
    CURRENT_TIME = datetime.datetime.now().replace(microsecond=0)
    print(colored("---------------- Fitch1 parsimony ----------------", "green"))
    fitch_MP_tree1 = deepcopy(current_tree)
    fitch_MP_nodelist1 = deepcopy(nodelist)
    fitch_parsimony(fitch_MP_tree1.clade, fitch_MP_nodelist1, 1)
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("---------------- Fitch2 parsimony ----------------", "green"))
    fitch_MP_tree2 = deepcopy(current_tree)
    fitch_MP_nodelist2 = deepcopy(nodelist)
    fitch_parsimony(fitch_MP_tree2.clade, fitch_MP_nodelist2, 2)
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("---------------- Fitch3 parsimony ----------------", "green"))
    fitch_MP_tree3 = deepcopy(current_tree)
    fitch_MP_nodelist3 = deepcopy(nodelist)
    fitch_parsimony(fitch_MP_tree3.clade, fitch_MP_nodelist3, 3)
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("---------------- Fitch4 parsimony ----------------", "green"))
    fitch_MP_tree4 = deepcopy(current_tree)
    fitch_MP_nodelist4 = deepcopy(nodelist)
    fitch_parsimony(fitch_MP_tree4.clade, fitch_MP_nodelist4, 4)
    CURRENT_TIME = print_time(CURRENT_TIME)
    # --------------------------------------------------------
    print(colored("-------- evaluation --------", "green"))
    differences = evaluation(nodelist, fitch_MP_nodelist1, fitch_MP_nodelist2, fitch_MP_nodelist3, fitch_MP_nodelist4)
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("--------------------------------", "green"))
    return differences
Exemplo n.º 4
0
def main():
    global START_TIME
    global CURRENT_TIME
    global freelivings
    global parasites
    global nr_leave_nodes
    global nr_used_freelivings
    global nr_used_parasites
    global unknown
    global nodelist
    global doubleTagged

    print(
        colored(
            "------------------------ build nodelists ------------------------",
            "green"))
    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    CURRENT_TIME = print_time(START_TIME)

    print(
        colored(
            "---------------- read parasites and freelivings ----------------",
            "green"))
    print("Freelivings:")
    freelivings = read_tags(path_freelivings)
    print("Parasites:")
    parasites = read_tags(path_parasites)
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("---------------- read tree ----------------", "green"))
    subtree_path = './data/subtree/' + subtree_name + '.tre'
    print("Build nodelist for:", subtree_name)
    tree = Phylo.read(subtree_path, 'newick')
    print(colored("---------------- tag tree ----------------", "green"))
    fill_tree_with_tags(tree.clade, 0)
    print(colored(nr_leave_nodes, 'blue'), "leave nodes are in the tree")
    print(colored(nr_used_freelivings, 'blue'), "freeliving tags were used,",
          colored(nr_used_parasites, 'blue'), "parasite tags were used =>",
          colored(unknown, 'blue'), "unknown leave nodes")
    print(
        "Rootnode, Depth, Heigths: [Min, Max, Mean], Originaltag, Finaltag, Nr_children"
    )
    print(nodelist[0])
    print(doubleTagged, "are tagged as P, but could also be FL!")
    # ---- reset countings ----
    nr_leave_nodes = 0
    nr_used_freelivings = 0
    nr_used_parasites = 0
    unknown = 0
    nodelist = []
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("--------------------------------", "green"))
    return
Exemplo n.º 5
0
def get_random_tagged_tree(number_leafnodes, percentage_parasites,
                           percentage_unknown, p_multifurcation,
                           beta_distribution_parameters):
    """build a random binary tree fully tagged with FL and P"""
    # Arguments:
    #   number_leafnodes                - needed for randomized function
    #   percentage_parasites
    #   percentage_unknown              - proportion of unknown leafnodes
    #   percentage_multifurcation
    #   beta_distribution_parameters    - [A_FL, B_FL, A_P, B_P]

    global percentage_multifurcation
    percentage_multifurcation = p_multifurcation

    START_TIME = datetime.datetime.now().replace(microsecond=0)
    CURRENT_TIME = datetime.datetime.now().replace(microsecond=0)
    print("---- randomized tree ----")
    current_percentage_parasites = 0
    # randomized(cls, taxa, branch_length=1.0, branch_stdev=None)
    #   Create a randomized bifurcating tree given a list of taxa.
    #   https://github.com/biopython/biopython/blob/master/Bio/Phylo/BaseTree.py
    randomized_tree = Phylo.BaseTree.Tree.randomized(number_leafnodes)
    randomized_tree.clade.name = 'root'
    boolean = True
    CURRENT_TIME = print_time(START_TIME)
    print("---- tag tree ----")
    while boolean:
        current_tree = deepcopy(randomized_tree)
        result = tag_tree(
            current_tree.clade, [], 0, [0, 0], percentage_parasites,
            percentage_unknown,
            beta_distribution_parameters)  # father_tag = 0 -> free living
        nodelist = result[1]
        leaf_distr = result[2]
        # child_depth = child_depth + result[3]
        # %P = #FL / (#P + #FL) * 100
        current_percentage_parasites = leaf_distr[1] / (leaf_distr[0] +
                                                        leaf_distr[1])
        print("tried", current_percentage_parasites * 100,
              "% of parasites")  # 40% parasites?
        if (percentage_parasites -
                permitted_deviation) < current_percentage_parasites < (
                    percentage_parasites + permitted_deviation):
            boolean = False
    print("----")
    CURRENT_TIME = print_time(CURRENT_TIME)
    print("----")
    # print(current_percentage_parasites, '% parasites,', 100 - current_percentage_parasites, '% free-living')
    return [current_tree, nodelist]
Exemplo n.º 6
0
def main():
    global START_TIME
    global CURRENT_TIME

    print('Run castor - Sankoff parsimony - for ', subtree_name)

    print(colored("---------------- Sankoff parsimony ----------------", "green"))
    nodelist = sankoff_parsimony(-1)
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("---------------- Save nodelist ----------------", "green"))
    nodelist_path = './data/nodelist/' + subtree_name + '-castor.csv' 
    header = ['ott_id', 'originaltag', 'finaltag', 'depth', 'heights', 'nr_children']
    with open(nodelist_path, 'w') as nodelist_file:
        writer = csv.writer(nodelist_file, delimiter=',')
        writer.writerow(header)
        for row in nodelist:
            writer.writerow(row)
    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("--------------------------------", "green"))
    return
Exemplo n.º 7
0
def main():
    global START_TIME
    global CURRENT_TIME
    global freelivings
    global parasites
    global archaea_or_bacteria
    global internal_parasite
    global internal_freeliving

    print(
        colored(
            "------------------------ edit species lists ------------------------",
            "green"))
    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    CURRENT_TIME = print_time(START_TIME)

    print(
        colored(
            "---------------- read parasites and freelivings ----------------",
            "green"))
    print("Freelivings:")
    freelivings = read_tags(path_freelivings)
    print("Parasites:")
    parasites = read_tags(path_parasites)
    CURRENT_TIME = print_time(CURRENT_TIME)

    print(
        colored(
            "---------------- delete Archaea and Bacteria ----------------",
            "green"))
    subtree_names = ['Bacteria', 'Archaea']
    for item in subtree_names:
        print(item, ':')
        subtree_path = './data/subtree/' + item + '.tre'
        tree = Phylo.read(subtree_path, 'newick')
        delete_archaea_or_bacteria(tree.clade)
        CURRENT_TIME = print_time(CURRENT_TIME)

    print(
        colored("---------------- delete internal Eukaryota ----------------",
                "green"))
    subtree_path = './data/subtree/Eukaryota.tre'
    tree = Phylo.read(subtree_path, 'newick')
    delete_internal_nodes(tree.clade)

    CURRENT_TIME = print_time(CURRENT_TIME)
    print(colored("--------------------------------", "green"))

    print(colored(archaea_or_bacteria, 'blue'),
          "archaea or bacteria found and deleted")
    print(colored(internal_freeliving,
                  'blue'), "internal freeliving tags found and",
          colored(internal_parasite, 'blue'),
          "internal parasite tags found and deleted")

    print("Freelivings:", len(freelivings))
    print("Parasites:", len(parasites))

    # -------------------------------------------------
    csv_title = './data/interaction_data/reduced_freelivings.csv'
    with open(csv_title, 'w') as species_file:
        writer = csv.writer(species_file, quoting=csv.QUOTE_ALL)
        writer.writerow(freelivings)
    csv_title = './data/interaction_data/reduced_parasites.csv'
    with open(csv_title, 'w') as species_file:
        writer = csv.writer(species_file, quoting=csv.QUOTE_ALL)
        writer.writerow(parasites)
    # -------------------------------------------------
    return
Exemplo n.º 8
0
def main():
    """Main method"""
    global START_TIME
    global CURRENT_TIME
    print(
        colored(
            "------------------------ start simulation ------------------------",
            "green"))
    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    CURRENT_TIME = print_time(START_TIME)
    print(colored("---------------- metadata ----------------", "green"))
    metadata()
    print(colored("---------------- parameters ----------------", "green"))
    print("Simulate", colored(number_trees, 'blue'), "random trees with",
          colored(number_leafnodes, 'blue'), "leafnodes",
          colored(percentage_parasites * 100, 'blue'), "% parasites and",
          colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes and",
          colored(percentage_multifurcation * 100, 'blue'),
          "% multifurcation of internal nodes.")
    beta_distribution_parameters = decide_for_beta_distribution_parameters()
    print(beta_distribution_parameters)
    diffs = [["Fitch", "My", "Sankoff"]]
    for i in range(1, number_trees + 1):
        print("Tree", colored(i, 'red'))
        print(
            colored("---------------- get random tree ----------------",
                    "green"))
        result = buildTree.get_random_tagged_tree(
            number_leafnodes, percentage_parasites, percentage_unknown,
            percentage_multifurcation, beta_distribution_parameters)
        current_tree = result[0]
        nodelist = result[1]
        # CURRENT_TIME = print_time(CURRENT_TIME)
        print(
            colored("---------------- multifurcate tree ----------------",
                    "green"))
        buildTree.get_non_binary_tree(current_tree.clade, nodelist)
        CURRENT_TIME = print_time(CURRENT_TIME)
        print(
            colored(
                "---------------- maximum parsimony algorithms ----------------",
                "green"))
        diff_percentage = run_parsimony_algorithms(current_tree, nodelist)
        diffs.append(diff_percentage)
        # ---------------- drawings ----------------
        # do_some_drawings(current_tree, nodelist, parsimony_tree, parsimony_nodelist)
        time_new = datetime.datetime.now().replace(microsecond=0)
        print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
        print("whole time needed:", time_new - START_TIME)
        print(colored("--------------------------------", "red"))
    # print("saved in:")
    # csv_title = "evaluation/" + str(number_leafnodes) + " leafnodes - " + str(number_trees) + " trees - " + str(round(percentage_U * 100, 2)) + "% unknown.csv"
    # print(csv_title)
    # with open(csv_title, 'w', newline='') as csvfile:
    #     writer = csv.writer(csvfile)
    #     writer.writerows(diffs)

    f_dif = 0.0
    m_dif = 0.0
    s_dif = 0.0
    for i in range(1, number_trees + 1):
        f_dif += float(diffs[i][0])
        m_dif += float(diffs[i][1])
        s_dif += float(diffs[i][2])
    f_dif = round(f_dif / number_trees, 2)
    m_dif = round(m_dif / number_trees, 2)
    s_dif = round(s_dif / number_trees, 2)

    row = [percentage_unknown, percentage_multifurcation, f_dif, m_dif, s_dif]
    csv_title = "data/simulation/" + str(int(
        percentage_parasites * 100)) + "-unknown-multifurcation.csv"
    fp = open(csv_title, 'a')
    writer = csv.writer(fp)
    writer.writerow((row))
    fp.close()
    print("saved in:")
    print(csv_title)

    print(colored("--------------------------------", "green"))
    print(colored(number_trees, 'blue'), " trees simulated with",
          colored(number_leafnodes, 'blue'), "leafnodes",
          colored(percentage_parasites * 100, 'blue'), "% parasites and",
          colored(percentage_unknown * 100, 'blue'), "% unknown leafnodes and",
          colored(percentage_multifurcation * 100, 'blue'),
          "% of multifurcation of the internal nodes.")
    print("correctly predicted (including already known leaf nodes):")
    print("differences Fitch / My / Sankoff")
    percentage_correctly_predicted = "| " + str(f_dif) + " % | " + str(
        m_dif) + " % | " + str(s_dif) + " % |"
    print(colored(percentage_correctly_predicted, 'red'))
    print(colored("--------------------------------", "green"))
    return
Exemplo n.º 9
0
def main():
    """Main method"""
    global START_TIME
    global CURRENT_TIME
    print(colored("------------------------ start simulation ------------------------", "green"))
    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    CURRENT_TIME = print_time(START_TIME)
    print(colored("---------------- metadata ----------------", "green"))
    metadata()
    print(colored("---------------- parameters ----------------", "green"))
    print("Simulate", colored(number_trees, 'blue'), "random trees with", 
        colored(number_leafnodes, 'blue'), "leafnodes", 
        colored(percentage_parasites*100, 'blue'), "% parasites and",
        colored(percentage_unknown*100, 'blue'), "% unknown leafnodes and",
        colored(percentage_multifurcation*100, 'blue'), "% of multifurcation of the internal nodes.")
    beta_distribution_parameters = decide_for_beta_distribution_parameters(percentage_parasites)
    print(beta_distribution_parameters)
    diffs = [["Fitch1", "Fitch2", "Fitch3", "Fitch4"]]
    for i in range(1, number_trees + 1):
        print("Tree", colored(i, 'red'))
        print(colored("---------------- get random tree ----------------", "green"))
        result = buildTree.get_random_tagged_tree(number_leafnodes, percentage_parasites, percentage_unknown, percentage_multifurcation, beta_distribution_parameters)
        current_tree = result[0]
        nodelist = result[1]
        # CURRENT_TIME = print_time(CURRENT_TIME)
        print(colored("---------------- multifurcate tree ----------------", "green"))
        buildTree.get_non_binary_tree(current_tree.clade, nodelist)
        CURRENT_TIME = print_time(CURRENT_TIME)
        print(colored("---------------- maximum parsimony algorithms ----------------", "green"))
        diff_percentage = run_parsimony_algorithms(current_tree, nodelist)
        diffs.append(diff_percentage)
        time_new = datetime.datetime.now().replace(microsecond=0)
        print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
        print("whole time needed:", time_new - START_TIME)
        print(colored("--------------------------------", "red"))

    f_dif1 = 0.0
    f_dif2 = 0.0
    f_dif3 = 0.0
    f_dif4 = 0.0
    for i in range(1, number_trees + 1):
        f_dif1 += float(diffs[i][0])
        f_dif2 += float(diffs[i][1])
        f_dif3 += float(diffs[i][2])
        f_dif4 += float(diffs[i][3])
    f_dif1 = round(f_dif1 / number_trees, 2)
    f_dif2 = round(f_dif2 / number_trees, 2)
    f_dif3 = round(f_dif3 / number_trees, 2)
    f_dif4 = round(f_dif4 / number_trees, 2)

    row = [percentage_unknown, percentage_multifurcation, f_dif1, f_dif2, f_dif3, f_dif4]
    csv_title = "data/simulation/fitch_" + str(int(percentage_parasites*100)) + "-unknown-multifurcation.csv"
    fp = open(csv_title, 'a')
    writer = csv.writer(fp)
    writer.writerow((row)) 
    fp.close()
    print("saved in:")
    print(csv_title)

    print(colored("--------------------------------", "green"))
    print(colored(number_trees, 'blue'), " trees simulated with", 
        colored(number_leafnodes, 'blue'), "leafnodes", 
        colored(percentage_parasites*100, 'blue'), "% parasites and",
        colored(percentage_unknown*100, 'blue'), "% unknown leafnodes and",
        colored(percentage_multifurcation*100, 'blue'), "% of multifurcation of the internal nodes.")
    print("correctly predicted (including already known leaf nodes):")
    print("differences Fitch1 / Fitch2 / Fitch3 / Fitch4")
    percentage_correctly_predicted = "| " + str(f_dif1) +" % | " + str(f_dif2) + " % | " + str(f_dif3) + " % |" + str(f_dif4) + " % |"
    print(colored(percentage_correctly_predicted, 'red'))
    print(colored("--------------------------------", "green"))
    return