Пример #1
0
def split_randomly_into_two_sets(list_of_child):
    left_tree = tr.Tree()
    right_tree = tr.Tree()
    left_subtree_size = random.choice(range(0,len(list_of_child)))
    for i in range(0,len(list_of_child)):
        if i < left_subtree_size:
            left_tree.seed_node.add_child(list_of_child[i])
        else:
            right_tree.seed_node.add_child(list_of_child[i])
    if left_subtree_size == 0:
        left_tree = None
    if left_subtree_size == len(list_of_child):
        right_tree = None
    return tree_operations.collapse_edges(left_tree),tree_operations.collapse_edges(right_tree)
Пример #2
0
def create_tree_for_color_noise(parameters):
    print('Creating noise in colors')
    noise = parameters[0]
    G_internal_colors = parameters[2]
    S_colors = parameters[3]
    number_of_leaves = parameters[6]

    number_of_random_changes = number_of_leaves * (noise / 100)
    random_for_prec = random_for_precentage
    for rand_num in range(0, random_for_prec):
        input = open(path + '/0/sigma0.0' + '.txt', 'r')
        sigma = []
        for line in input:
            sigma.append(eval(line))
        sigma = sigma[0]
        input = open(path + '/0/colors0.0' + '.txt', 'r')
        colors = []
        for line in input:
            colors.append(eval(line))
        colors = colors[0]

        S = tr.Tree.get_from_path(path + "/phyliptree(binary,all).phy", schema="newick")
        G = tr.Tree.get_from_path(path + "/GeneTree(binary)_local.txt", schema="newick")

        S = utiles.init_internal_labels(S, 'x', sigma, path)
        G = utiles.init_internal_labels(G, 'u', sigma, path)

        G = tree_operations.collapse_edges(G)
        S = tree_operations.collapse_edges(S)

        S_labels_table, G_labels_table,sigma = inits.init_taxon_to_label_table(S, G, sigma)

        sigma, old_sigma = inits.update_sigma(S, G, 0, sigma, False, path, True, S_labels_table, G_labels_table)
        colors, old_colors = inits.update_colors(S, colors, True)

        i = 0
        while i < number_of_random_changes:
            G_internal_colors = tree_operations.color_tree(G, 'G', G_internal_colors, colors, sigma)
            S_colors = tree_operations.color_tree(S, 'S', S_colors, colors, sigma)
            number_of_nodes = tree_operations.number_of_leafs(G, 'G')
            random_vertex_to_change_color = randome_leave_from_tree(G, number_of_nodes, True)
            colors = change_color_of_vertex(random_vertex_to_change_color, colors, None, sigma, True)
            i += 1
        old_colors = return_color_to_taxon(S, colors)
        save_data(old_sigma, old_colors, {}, noise, rand_num,compare,path+'/color')
Пример #3
0
def init_internal_labels(tree, char, old_sigma, path):
    counter = 1
    dic = ''
    for nd in tree.postorder_node_iter():
        nd.label = char + str(counter)
        counter += 1
        if nd.taxon:
            dic = dic + nd.label + ' : ' + nd.taxon.label
            if char == 'u':
                dic = dic + ' (' + old_sigma[nd.taxon.label] + ') '
            dic += '\n'
    to_create = path + '/saved_data/'
    os.makedirs(os.path.dirname(to_create), exist_ok=True)
    tree_operations.collapse_edges(tree)
    if char == 'u':
        file = open(path + '/saved_data/G_keys.txt', 'w')
        file.write(str(dic))
        file.close()
    if char == 'x':
        file = open(path + '/saved_data/S_keys.txt', 'w')
        file.write(str(dic))
        file.close()
    return tree
Пример #4
0
def RSAM_finder_multithread(parameters):
    noise_in = ''

    list_of_scores_for_rand_num = {}
    max_score_p1_list = []
    max_score_p1_and_p2_list = []
    random_for_prec_curr = random_for_prec
    for rand_num in range(0, random_for_prec_curr):

        noise_level, noise_in, p1, p2 = parameters
        path_change_in = path + '/' + noise_in
        os.makedirs(os.path.dirname(path_change_in), exist_ok=True)
        path_change_in = path_change_in + '/saved_data'
        os.makedirs(os.path.dirname(path_change_in), exist_ok=True)
        path_change_in = path + '/' + noise_in

        S_dis_matrix = {}
        nodes_table = {}
        S_colors = {}
        all_vertices = {}
        new_G = nx.DiGraph()

        nCr_lookup_table = {}
        fact_lookup_table = {}

        G = tr.Tree.get_from_path(path + "/GeneTree(binary," + geneExt +
                                  ")_local.txt",
                                  schema="newick")
        S = tr.Tree.get_from_path(path + "/phyliptree(binary," +
                                  speciesTreespecification + ").phy",
                                  schema="newick")

        input = open(
            path_change_in + '/' + str(noise_level) + '/sigma' +
            str(noise_level) + '.' + str(rand_num) + '.txt', 'r')
        sigma = []
        for line in input:
            sigma.append(eval(line))
        sigma = sigma[0]

        input = open(
            path_change_in + '/' + str(noise_level) + '/colors' +
            str(noise_level) + '.' + str(rand_num) + '.txt', 'r')
        colors = []
        for line in input:
            colors.append(eval(line))
        colors = colors[0]
        G.prune_taxa_with_labels(
            tree_operations.remove_unsigma_genes(G, sigma, True))

        S = utiles.init_internal_labels(S, 'x', sigma, path)
        G = utiles.init_internal_labels(G, 'u', sigma, path)

        G = tree_operations.collapse_edges(G)
        S = tree_operations.collapse_edges(S)

        S_labels_table, G_labels_table, sigma = inits.init_taxon_to_label_table(
            S, G, sigma)
        sigma, old_sigma = inits.update_sigma(S, G, k, sigma, test,
                                              path_change_in, exact_names,
                                              S_labels_table, G_labels_table)
        G.prune_taxa_with_labels(
            tree_operations.remove_unsigma_genes(G, sigma, False))
        colors, old_colors = inits.update_colors(S, colors, exact_names)

        S_dis_matrix = inits.init_distance_S(S_dis_matrix, k, test, path,
                                             speciesTreespecification)
        nodes_table = inits.init_nodes_table(S, G, nodes_table)

        H, H_number_of_nodes, nodes_table = hypergraph.build_hyper_garph(
            S, G, test, k, nodes_table, D_cost, S_cost, HT_cost,
            path_change_in, alpha, sigma, save_data)

        H, max_prob = hypergraph.assign_probabilities(S, G, H, gamma)
        if H is None:
            list_of_scores_for_rand_num.update({rand_num: {}})
        else:
            ##      PROBABILITIES, COLORS, PATTERN      ##

            S_colors = tree_operations.color_tree(S, 'S', S_colors, colors,
                                                  sigma)

            interesting_vertices_p1, nCr_lookup_table, fact_lookup_table = find_Pattern(
                H, S, S_dis_matrix, nCr_lookup_table, fact_lookup_table, p1,
                S_colors)
            interesting_vertices_p2, nCr_lookup_table, fact_lookup_table = find_Pattern(
                H, S, S_dis_matrix, nCr_lookup_table, fact_lookup_table, p2,
                S_colors)

            max_S_d_of_HT = tree_operations.find_max_d_of_HT(
                S_dis_matrix, interesting_vertices_p1, p1)

            new_G = tree_operations.weight_G_based_on_same_color_HT(
                G, new_G, interesting_vertices_p1, interesting_vertices_p2,
                max_S_d_of_HT, p1, p2, False)

            new_G = tree_operations.number_of_edges_in_subtree(new_G)
            new_G = tree_operations.normlize_weights(new_G, k, p1, 'p1')
            new_G = tree_operations.normlize_weights(new_G, k, p2, 'p2')

            if p2[0] is None:
                max_score_p1_list = tree_operations.find_max_scores(
                    new_G, number_of_planted_vertices, 'p1', p1[3])
            else:
                max_score_p1_and_p2_list = tree_operations.find_max_scores(
                    new_G, number_of_planted_vertices, 'p2', p1[3])

            marked_nodes, all_vertices = pattern_identify.find_signi_distance(
                new_G, all_vertices, p1, p2, max_score_p1_list,
                max_score_p1_and_p2_list, False)

            list_of_scores_for_rand_num.update({rand_num: all_vertices})
    return (utiles.average_of_list(list_of_scores_for_rand_num,
                                   random_for_prec_curr), noise_in)
Пример #5
0
                                  schema="newick")

        print("     Reading file " + path_curr + "/sigma.txt'...")
        input = open(path_curr + '/0/sigma0.0.txt', 'r')
        sigma = []
        for line in input:
            sigma.append(eval(line))
        sigma = sigma[0]
        print("     Finished reading file  " + path_curr + "0/sigma0.0.txt'")
        G.prune_taxa_with_labels(
            tree_operations.remove_unsigma_genes(G, sigma, True))

        S = utiles.init_internal_labels(S, 'x', sigma, path_curr)
        G = utiles.init_internal_labels(G, 'u', sigma, path_curr)

        G = tree_operations.collapse_edges(G)
        S = tree_operations.collapse_edges(S)

        S_labels_table, G_labels_table, sigma = inits.init_taxon_to_label_table(
            S, G, sigma)
        sigma, old_sigma = inits.update_sigma(S, G, k, sigma, False, path_curr,
                                              True, S_labels_table,
                                              G_labels_table)
        G.prune_taxa_with_labels(
            tree_operations.remove_unsigma_genes(G, sigma, False))
        S_dis_matrix = inits.init_distance_S({}, k, False, path_curr,
                                             speciesTreespecification)
        nodes_table = inits.init_nodes_table(S, G, {})

        start = datetime.now()
        effi.build_hyper_garph(S, G, False, k, nodes_table, D_cost, S_cost,
Пример #6
0
def main(S,G,number_of_leaves,path,k,running_time,number_of_planted_vertices):
    global random_for_precentage,all_edges,TH_edges_in_subtree,compare_subtrees,TH_pattern_in_subtree,TH_compare_subtrees,both,TH_both,accur
    starting_time = datetime.now()
    new_G = nx.DiGraph()
    noise = 0
    number_of_HT_under_planted = 10
    S = Tree()
    sigma = {}
    nCr_lookup_table = {}
    fact_lookup_table = {}
    colors = {}
    S_dis_matrix = {}
    names = []
    S_colors = {}
    G_internal_colors = {}
    sol = {}

    for i in range(0, number_of_leaves):
        names.append(sym + str(i))
    S.populate(number_of_leaves, names_library=names)
    count_nodes_and_update_internal_names(S)
    #S = random_again(S, number_of_leaves / 4)
    colors = random_colors(S, colors)

    G = S.copy("newick")
    for leaf in G.iter_leaves():
        if leaf.name[:6] == 'Specie':
            leaf.name = "Gene" + leaf.name[6:]
        else:
            leaf.name = "GeneI" + leaf.name[8:]
    print_tree(G,'G',path)
    print_tree(S,'S',path)

    sigma = create_sigme(number_of_leaves, sigma)
    utils.newick2edgelist.main(path)
    save_edgelist(S_dis_matrix,path)

    S = tr.Tree.get_from_path(path + "/phyliptree(binary,all).phy", schema="newick")
    G = tr.Tree.get_from_path(path + "/GeneTree(binary)_local.txt", schema="newick")

    S = utiles.init_internal_labels(S, 'x', sigma, path)
    G = utiles.init_internal_labels(G, 'u', sigma, path)

    G = tree_operations.collapse_edges(G)
    S = tree_operations.collapse_edges(S)

    S_labels_table, G_labels_table,sigma = inits.init_taxon_to_label_table(S, G, sigma)

    sigma, old_sigma = inits.update_sigma(S, G, 0, sigma, False, path, True, S_labels_table, G_labels_table)
    colors, old_colors = inits.update_colors(S, colors, True)
    max_dis = tree_operations.max_dis(S_dis_matrix)

    flag = True
    j = 0
    all_random_sources_red_to_red = []
    all_random_sources_black_to_black = []
    all_random_nutral = []
    all_random_sources = (all_random_sources_red_to_red, all_random_sources_black_to_black, all_random_nutral)
    new_G = tree_operations.weight_G_based_on_same_color_HT(G, new_G, [],
                                                            [],[],[], 0, False,
                                                            'HT', False, k)
    new_G = tree_operations.number_of_edges_in_subtree(new_G)

    S_colors = tree_operations.color_tree(S, 'S', S_colors, colors, sigma)
    G_internal_colors = tree_operations.color_tree(G, 'G', G_internal_colors, colors, sigma)
    if not on_lab:
        draw.draw_S_and_G(S, G, old_sigma, colors, sigma, path, None, '_rand_before')

    if not running_time:
        while j < number_of_planted_vertices:
            print(
                '                                                                 *****         %sth vertex            ******' % str(
                    j))
            sol[j] = {}
            nCr_lookup_table, fact_lookup_table, (
                sol[j]['Marked'], sol[j]['list_of_couples']), colors = choose_planted_vertex(S_dis_matrix,new_G, S, G,
                                                                                            G_internal_colors,
                                                                                            TH_edges_in_subtree,
                                                                                            compare_subtrees,
                                                                                            TH_compare_subtrees,
                                                                                            sigma,
                                                                                            k,
                                                                                            both,
                                                                                            TH_both, j, sol, accur,
                                                                                            nCr_lookup_table,
                                                                                            fact_lookup_table,
                                                                                            all_random_sources,
                                                                                            colors,
                                                                                            S_colors, max_dis)
            if sol[j]['Marked'] == False:
                flag = flag and sol[j]['Marked']
            else:
                sigma, old_sigma, y = change_sigma(sigma, old_sigma, S, G, sol[j]['list_of_couples'],
                                                   number_of_HT_under_planted,S_labels_table,G_labels_table)
                S_colors = tree_operations.color_tree(S, 'S', S_colors, colors, sigma)
                G_internal_colors = tree_operations.color_tree(G, 'G', G_internal_colors, colors, sigma)
            j += 1
            if not flag:
                if not on_lab:
                    draw.draw_S_and_G(S, G, old_sigma, colors, sigma, path, None, '_rand')
                old_colors = return_color_to_taxon(S, colors)
                save_data(old_sigma, old_colors, sol, noise, 0, compare,path)
                if not running_time:
                    quit()
            print('Planted vertices:%s' % str(sol))
            if not on_lab:
                draw.draw_S_and_G(S, G, old_sigma, colors, sigma, path, sol, '_rand' + str(noise) + '.' + str(0))
            old_colors = return_color_to_taxon(S, colors)
            save_data(old_sigma, old_colors, sol, noise, 0,compare,path)
            return_planted_nodes_new_name(sol,G,path)
        p = Pool(15)
        parameters = [(noise_level[i],number_of_HT_under_planted,G_internal_colors,S_colors,nCr_lookup_table,fact_lookup_table,number_of_leaves) for i in range(0,len(noise_level))]
        p.map(create_tree_for_HT_and_colors_noise, parameters)
        p.map(create_tree_for_color_noise, parameters)
        p.map(create_tree_for_HT_noise, parameters)


    else:
        save_data(old_sigma, old_colors, sol, noise, 0, compare, path)
    print('Running time: %s' % str(datetime.now() - starting_time))
Пример #7
0
def create_tree_for_HT_and_colors_noise(parameters):
    print('Creating noise in HT and colors')
    noise = parameters[0]
    number_of_HT_under_planted = parameters[1]
    G_internal_colors = parameters[2]
    S_colors = parameters[3]
    nCr_lookup_table = parameters[4]
    fact_lookup_table = parameters[5]
    number_of_leaves = parameters[6]

    number_of_random_changes = number_of_leaves * (noise / 100)
    random_for_prec = random_for_precentage
    for rand_num in range(0, random_for_prec):
        input = open(path + '/0/sigma0.0' + '.txt', 'r')
        sigma = []
        for line in input:
            sigma.append(eval(line))
        sigma = sigma[0]

        input = open(path + '/0/colors0.0' + '.txt', 'r')
        colors = []
        for line in input:
            colors.append(eval(line))
        colors = colors[0]

        S = tr.Tree.get_from_path(path + "/phyliptree(binary,all).phy", schema="newick")
        G = tr.Tree.get_from_path(path + "/GeneTree(binary)_local.txt", schema="newick")

        S = utiles.init_internal_labels(S, 'x', sigma, path)
        G = utiles.init_internal_labels(G, 'u', sigma, path)

        G = tree_operations.collapse_edges(G)
        S = tree_operations.collapse_edges(S)

        S_labels_table, G_labels_table,sigma = inits.init_taxon_to_label_table(S, G, sigma)

        sigma, old_sigma = inits.update_sigma(S, G, 0, sigma, False, path, True, S_labels_table, G_labels_table)
        colors, old_colors = inits.update_colors(S, colors, True)

        i = 0
        all_random_sources_red_to_red = []
        all_random_sources_black_to_black = []
        all_random_nutral = []
        while i < number_of_random_changes:
            G_internal_colors = tree_operations.color_tree(G, 'G', G_internal_colors, colors, sigma)
            S_colors = tree_operations.color_tree(S, 'S', S_colors, colors, sigma)
            number_of_nodes = tree_operations.number_of_leafs(G, 'G')
            random_source = random_vertex_in_tree(number_of_nodes, G)
            random_target = random_vertex_in_tree(number_of_nodes, G)
            random_vertex_to_change_color = randome_leave_from_tree(G, number_of_nodes, True)
            sigma, old_sigma, changed = change_sigma(sigma, old_sigma, S, G,
                                                     [(random_source.label, random_target.label)],
                                                     number_of_HT_under_planted,S_labels_table,G_labels_table)
            colors = change_color_of_vertex(random_vertex_to_change_color, colors, None, sigma, True)
            nCr_lookup_table, fact_lookup_table, enriched = check_if_enriched(S,G,random_source, random_target,
                                                                              nCr_lookup_table, fact_lookup_table,
                                                                              accur, G_internal_colors,S_colors)
            if enriched == 'red-to-red':
                all_random_sources_red_to_red.append(random_source)
            elif enriched == 'black_to_balck':
                all_random_sources_black_to_black.append(random_source)
            else:
                all_random_nutral.append(random_source)
            i += changed
        old_colors = return_color_to_taxon(S, colors)
        save_data(old_sigma, old_colors, {}, noise, rand_num,compare,path+"/colors_and_HT")