def split_randomly_into_two_sets(list_of_child): left_tree = tr.Tree() right_tree = tr.Tree() left_subtree_size = random.choice(range(0,len(list_of_child))) for i in range(0,len(list_of_child)): if i < left_subtree_size: left_tree.seed_node.add_child(list_of_child[i]) else: right_tree.seed_node.add_child(list_of_child[i]) if left_subtree_size == 0: left_tree = None if left_subtree_size == len(list_of_child): right_tree = None return tree_operations.collapse_edges(left_tree),tree_operations.collapse_edges(right_tree)
def create_tree_for_color_noise(parameters): print('Creating noise in colors') noise = parameters[0] G_internal_colors = parameters[2] S_colors = parameters[3] number_of_leaves = parameters[6] number_of_random_changes = number_of_leaves * (noise / 100) random_for_prec = random_for_precentage for rand_num in range(0, random_for_prec): input = open(path + '/0/sigma0.0' + '.txt', 'r') sigma = [] for line in input: sigma.append(eval(line)) sigma = sigma[0] input = open(path + '/0/colors0.0' + '.txt', 'r') colors = [] for line in input: colors.append(eval(line)) colors = colors[0] S = tr.Tree.get_from_path(path + "/phyliptree(binary,all).phy", schema="newick") G = tr.Tree.get_from_path(path + "/GeneTree(binary)_local.txt", schema="newick") S = utiles.init_internal_labels(S, 'x', sigma, path) G = utiles.init_internal_labels(G, 'u', sigma, path) G = tree_operations.collapse_edges(G) S = tree_operations.collapse_edges(S) S_labels_table, G_labels_table,sigma = inits.init_taxon_to_label_table(S, G, sigma) sigma, old_sigma = inits.update_sigma(S, G, 0, sigma, False, path, True, S_labels_table, G_labels_table) colors, old_colors = inits.update_colors(S, colors, True) i = 0 while i < number_of_random_changes: G_internal_colors = tree_operations.color_tree(G, 'G', G_internal_colors, colors, sigma) S_colors = tree_operations.color_tree(S, 'S', S_colors, colors, sigma) number_of_nodes = tree_operations.number_of_leafs(G, 'G') random_vertex_to_change_color = randome_leave_from_tree(G, number_of_nodes, True) colors = change_color_of_vertex(random_vertex_to_change_color, colors, None, sigma, True) i += 1 old_colors = return_color_to_taxon(S, colors) save_data(old_sigma, old_colors, {}, noise, rand_num,compare,path+'/color')
def init_internal_labels(tree, char, old_sigma, path): counter = 1 dic = '' for nd in tree.postorder_node_iter(): nd.label = char + str(counter) counter += 1 if nd.taxon: dic = dic + nd.label + ' : ' + nd.taxon.label if char == 'u': dic = dic + ' (' + old_sigma[nd.taxon.label] + ') ' dic += '\n' to_create = path + '/saved_data/' os.makedirs(os.path.dirname(to_create), exist_ok=True) tree_operations.collapse_edges(tree) if char == 'u': file = open(path + '/saved_data/G_keys.txt', 'w') file.write(str(dic)) file.close() if char == 'x': file = open(path + '/saved_data/S_keys.txt', 'w') file.write(str(dic)) file.close() return tree
def RSAM_finder_multithread(parameters): noise_in = '' list_of_scores_for_rand_num = {} max_score_p1_list = [] max_score_p1_and_p2_list = [] random_for_prec_curr = random_for_prec for rand_num in range(0, random_for_prec_curr): noise_level, noise_in, p1, p2 = parameters path_change_in = path + '/' + noise_in os.makedirs(os.path.dirname(path_change_in), exist_ok=True) path_change_in = path_change_in + '/saved_data' os.makedirs(os.path.dirname(path_change_in), exist_ok=True) path_change_in = path + '/' + noise_in S_dis_matrix = {} nodes_table = {} S_colors = {} all_vertices = {} new_G = nx.DiGraph() nCr_lookup_table = {} fact_lookup_table = {} G = tr.Tree.get_from_path(path + "/GeneTree(binary," + geneExt + ")_local.txt", schema="newick") S = tr.Tree.get_from_path(path + "/phyliptree(binary," + speciesTreespecification + ").phy", schema="newick") input = open( path_change_in + '/' + str(noise_level) + '/sigma' + str(noise_level) + '.' + str(rand_num) + '.txt', 'r') sigma = [] for line in input: sigma.append(eval(line)) sigma = sigma[0] input = open( path_change_in + '/' + str(noise_level) + '/colors' + str(noise_level) + '.' + str(rand_num) + '.txt', 'r') colors = [] for line in input: colors.append(eval(line)) colors = colors[0] G.prune_taxa_with_labels( tree_operations.remove_unsigma_genes(G, sigma, True)) S = utiles.init_internal_labels(S, 'x', sigma, path) G = utiles.init_internal_labels(G, 'u', sigma, path) G = tree_operations.collapse_edges(G) S = tree_operations.collapse_edges(S) S_labels_table, G_labels_table, sigma = inits.init_taxon_to_label_table( S, G, sigma) sigma, old_sigma = inits.update_sigma(S, G, k, sigma, test, path_change_in, exact_names, S_labels_table, G_labels_table) G.prune_taxa_with_labels( tree_operations.remove_unsigma_genes(G, sigma, False)) colors, old_colors = inits.update_colors(S, colors, exact_names) S_dis_matrix = inits.init_distance_S(S_dis_matrix, k, test, path, speciesTreespecification) nodes_table = inits.init_nodes_table(S, G, nodes_table) H, H_number_of_nodes, nodes_table = hypergraph.build_hyper_garph( S, G, test, k, nodes_table, D_cost, S_cost, HT_cost, path_change_in, alpha, sigma, save_data) H, max_prob = hypergraph.assign_probabilities(S, G, H, gamma) if H is None: list_of_scores_for_rand_num.update({rand_num: {}}) else: ## PROBABILITIES, COLORS, PATTERN ## S_colors = tree_operations.color_tree(S, 'S', S_colors, colors, sigma) interesting_vertices_p1, nCr_lookup_table, fact_lookup_table = find_Pattern( H, S, S_dis_matrix, nCr_lookup_table, fact_lookup_table, p1, S_colors) interesting_vertices_p2, nCr_lookup_table, fact_lookup_table = find_Pattern( H, S, S_dis_matrix, nCr_lookup_table, fact_lookup_table, p2, S_colors) max_S_d_of_HT = tree_operations.find_max_d_of_HT( S_dis_matrix, interesting_vertices_p1, p1) new_G = tree_operations.weight_G_based_on_same_color_HT( G, new_G, interesting_vertices_p1, interesting_vertices_p2, max_S_d_of_HT, p1, p2, False) new_G = tree_operations.number_of_edges_in_subtree(new_G) new_G = tree_operations.normlize_weights(new_G, k, p1, 'p1') new_G = tree_operations.normlize_weights(new_G, k, p2, 'p2') if p2[0] is None: max_score_p1_list = tree_operations.find_max_scores( new_G, number_of_planted_vertices, 'p1', p1[3]) else: max_score_p1_and_p2_list = tree_operations.find_max_scores( new_G, number_of_planted_vertices, 'p2', p1[3]) marked_nodes, all_vertices = pattern_identify.find_signi_distance( new_G, all_vertices, p1, p2, max_score_p1_list, max_score_p1_and_p2_list, False) list_of_scores_for_rand_num.update({rand_num: all_vertices}) return (utiles.average_of_list(list_of_scores_for_rand_num, random_for_prec_curr), noise_in)
schema="newick") print(" Reading file " + path_curr + "/sigma.txt'...") input = open(path_curr + '/0/sigma0.0.txt', 'r') sigma = [] for line in input: sigma.append(eval(line)) sigma = sigma[0] print(" Finished reading file " + path_curr + "0/sigma0.0.txt'") G.prune_taxa_with_labels( tree_operations.remove_unsigma_genes(G, sigma, True)) S = utiles.init_internal_labels(S, 'x', sigma, path_curr) G = utiles.init_internal_labels(G, 'u', sigma, path_curr) G = tree_operations.collapse_edges(G) S = tree_operations.collapse_edges(S) S_labels_table, G_labels_table, sigma = inits.init_taxon_to_label_table( S, G, sigma) sigma, old_sigma = inits.update_sigma(S, G, k, sigma, False, path_curr, True, S_labels_table, G_labels_table) G.prune_taxa_with_labels( tree_operations.remove_unsigma_genes(G, sigma, False)) S_dis_matrix = inits.init_distance_S({}, k, False, path_curr, speciesTreespecification) nodes_table = inits.init_nodes_table(S, G, {}) start = datetime.now() effi.build_hyper_garph(S, G, False, k, nodes_table, D_cost, S_cost,
def main(S,G,number_of_leaves,path,k,running_time,number_of_planted_vertices): global random_for_precentage,all_edges,TH_edges_in_subtree,compare_subtrees,TH_pattern_in_subtree,TH_compare_subtrees,both,TH_both,accur starting_time = datetime.now() new_G = nx.DiGraph() noise = 0 number_of_HT_under_planted = 10 S = Tree() sigma = {} nCr_lookup_table = {} fact_lookup_table = {} colors = {} S_dis_matrix = {} names = [] S_colors = {} G_internal_colors = {} sol = {} for i in range(0, number_of_leaves): names.append(sym + str(i)) S.populate(number_of_leaves, names_library=names) count_nodes_and_update_internal_names(S) #S = random_again(S, number_of_leaves / 4) colors = random_colors(S, colors) G = S.copy("newick") for leaf in G.iter_leaves(): if leaf.name[:6] == 'Specie': leaf.name = "Gene" + leaf.name[6:] else: leaf.name = "GeneI" + leaf.name[8:] print_tree(G,'G',path) print_tree(S,'S',path) sigma = create_sigme(number_of_leaves, sigma) utils.newick2edgelist.main(path) save_edgelist(S_dis_matrix,path) S = tr.Tree.get_from_path(path + "/phyliptree(binary,all).phy", schema="newick") G = tr.Tree.get_from_path(path + "/GeneTree(binary)_local.txt", schema="newick") S = utiles.init_internal_labels(S, 'x', sigma, path) G = utiles.init_internal_labels(G, 'u', sigma, path) G = tree_operations.collapse_edges(G) S = tree_operations.collapse_edges(S) S_labels_table, G_labels_table,sigma = inits.init_taxon_to_label_table(S, G, sigma) sigma, old_sigma = inits.update_sigma(S, G, 0, sigma, False, path, True, S_labels_table, G_labels_table) colors, old_colors = inits.update_colors(S, colors, True) max_dis = tree_operations.max_dis(S_dis_matrix) flag = True j = 0 all_random_sources_red_to_red = [] all_random_sources_black_to_black = [] all_random_nutral = [] all_random_sources = (all_random_sources_red_to_red, all_random_sources_black_to_black, all_random_nutral) new_G = tree_operations.weight_G_based_on_same_color_HT(G, new_G, [], [],[],[], 0, False, 'HT', False, k) new_G = tree_operations.number_of_edges_in_subtree(new_G) S_colors = tree_operations.color_tree(S, 'S', S_colors, colors, sigma) G_internal_colors = tree_operations.color_tree(G, 'G', G_internal_colors, colors, sigma) if not on_lab: draw.draw_S_and_G(S, G, old_sigma, colors, sigma, path, None, '_rand_before') if not running_time: while j < number_of_planted_vertices: print( ' ***** %sth vertex ******' % str( j)) sol[j] = {} nCr_lookup_table, fact_lookup_table, ( sol[j]['Marked'], sol[j]['list_of_couples']), colors = choose_planted_vertex(S_dis_matrix,new_G, S, G, G_internal_colors, TH_edges_in_subtree, compare_subtrees, TH_compare_subtrees, sigma, k, both, TH_both, j, sol, accur, nCr_lookup_table, fact_lookup_table, all_random_sources, colors, S_colors, max_dis) if sol[j]['Marked'] == False: flag = flag and sol[j]['Marked'] else: sigma, old_sigma, y = change_sigma(sigma, old_sigma, S, G, sol[j]['list_of_couples'], number_of_HT_under_planted,S_labels_table,G_labels_table) S_colors = tree_operations.color_tree(S, 'S', S_colors, colors, sigma) G_internal_colors = tree_operations.color_tree(G, 'G', G_internal_colors, colors, sigma) j += 1 if not flag: if not on_lab: draw.draw_S_and_G(S, G, old_sigma, colors, sigma, path, None, '_rand') old_colors = return_color_to_taxon(S, colors) save_data(old_sigma, old_colors, sol, noise, 0, compare,path) if not running_time: quit() print('Planted vertices:%s' % str(sol)) if not on_lab: draw.draw_S_and_G(S, G, old_sigma, colors, sigma, path, sol, '_rand' + str(noise) + '.' + str(0)) old_colors = return_color_to_taxon(S, colors) save_data(old_sigma, old_colors, sol, noise, 0,compare,path) return_planted_nodes_new_name(sol,G,path) p = Pool(15) parameters = [(noise_level[i],number_of_HT_under_planted,G_internal_colors,S_colors,nCr_lookup_table,fact_lookup_table,number_of_leaves) for i in range(0,len(noise_level))] p.map(create_tree_for_HT_and_colors_noise, parameters) p.map(create_tree_for_color_noise, parameters) p.map(create_tree_for_HT_noise, parameters) else: save_data(old_sigma, old_colors, sol, noise, 0, compare, path) print('Running time: %s' % str(datetime.now() - starting_time))
def create_tree_for_HT_and_colors_noise(parameters): print('Creating noise in HT and colors') noise = parameters[0] number_of_HT_under_planted = parameters[1] G_internal_colors = parameters[2] S_colors = parameters[3] nCr_lookup_table = parameters[4] fact_lookup_table = parameters[5] number_of_leaves = parameters[6] number_of_random_changes = number_of_leaves * (noise / 100) random_for_prec = random_for_precentage for rand_num in range(0, random_for_prec): input = open(path + '/0/sigma0.0' + '.txt', 'r') sigma = [] for line in input: sigma.append(eval(line)) sigma = sigma[0] input = open(path + '/0/colors0.0' + '.txt', 'r') colors = [] for line in input: colors.append(eval(line)) colors = colors[0] S = tr.Tree.get_from_path(path + "/phyliptree(binary,all).phy", schema="newick") G = tr.Tree.get_from_path(path + "/GeneTree(binary)_local.txt", schema="newick") S = utiles.init_internal_labels(S, 'x', sigma, path) G = utiles.init_internal_labels(G, 'u', sigma, path) G = tree_operations.collapse_edges(G) S = tree_operations.collapse_edges(S) S_labels_table, G_labels_table,sigma = inits.init_taxon_to_label_table(S, G, sigma) sigma, old_sigma = inits.update_sigma(S, G, 0, sigma, False, path, True, S_labels_table, G_labels_table) colors, old_colors = inits.update_colors(S, colors, True) i = 0 all_random_sources_red_to_red = [] all_random_sources_black_to_black = [] all_random_nutral = [] while i < number_of_random_changes: G_internal_colors = tree_operations.color_tree(G, 'G', G_internal_colors, colors, sigma) S_colors = tree_operations.color_tree(S, 'S', S_colors, colors, sigma) number_of_nodes = tree_operations.number_of_leafs(G, 'G') random_source = random_vertex_in_tree(number_of_nodes, G) random_target = random_vertex_in_tree(number_of_nodes, G) random_vertex_to_change_color = randome_leave_from_tree(G, number_of_nodes, True) sigma, old_sigma, changed = change_sigma(sigma, old_sigma, S, G, [(random_source.label, random_target.label)], number_of_HT_under_planted,S_labels_table,G_labels_table) colors = change_color_of_vertex(random_vertex_to_change_color, colors, None, sigma, True) nCr_lookup_table, fact_lookup_table, enriched = check_if_enriched(S,G,random_source, random_target, nCr_lookup_table, fact_lookup_table, accur, G_internal_colors,S_colors) if enriched == 'red-to-red': all_random_sources_red_to_red.append(random_source) elif enriched == 'black_to_balck': all_random_sources_black_to_black.append(random_source) else: all_random_nutral.append(random_source) i += changed old_colors = return_color_to_taxon(S, colors) save_data(old_sigma, old_colors, {}, noise, rand_num,compare,path+"/colors_and_HT")