def test_upgma(self): dists = [1.45, 1.51, 1.57, 2.98, 2.94, 3.04, 7.51, 7.55, 7.39, 7.10] labels = ["H", "C", "G", "O", "R"] tree = do_tree(dists, labels, method="upgma") newick_tree = "((((H,C):0.73,G):0.77,O):1.49,R):3.69;" expected_tree = Tree(newick_tree) result = expected_tree.compare(tree) assert result["source_edges_in_ref"] - 1 < 0.00001 assert result["ref_edges_in_source"] - 1 < 0.00001
def test_nj(self): dists = [5, 9, 10, 9, 10, 8, 8, 9, 7, 3] labels = ["A", "B", "C", "D", "E"] tree = do_tree(dists, labels, method="nj") newick_tree = "(C:2,((A:2,B:3):3,(D:2,E:1):2):2);" expected_tree = Tree(newick_tree) result = expected_tree.compare(tree, unrooted=True) assert result["source_edges_in_ref"] - 1 < 0.00001 assert result["ref_edges_in_source"] - 1 < 0.00001
def ete_compare(usr_tree_str, ref_tree_str, outgroup_id=None): qt = Tree(usr_tree_str) rt = Tree(ref_tree_str) if outgroup_id: qt.set_outgroup(outgroup_id) rt.set_outgroup(outgroup_id) res = qt.compare(rt, unrooted=False if outgroup_id else True) rf = res["rf"] max_rf = res["max_rf"] nrf = res["norm_rf"] effective_tree_size = res["effective_tree_size"] ref_edges_in_source = res["ref_edges_in_source"] source_edges_in_ref = res["source_edges_in_ref"] source_subtrees = res["source_subtrees"] common_edges = res["common_edges"] treeko_dist = res["treeko_dist"] source_edges = res["source_edges"] ref_edges = res["ref_edges"] return qt, rt, nrf, rf, max_rf, source_edges_in_ref, ref_edges_in_source, treeko_dist
def compare_trees(args): tree_ref = Tree(args['ref']) trees = [] for i in args['all']: tree = {} tree['name'] = i tree['comparison'] = tree_ref.compare(Tree(i),unrooted=True) trees.append(tree) with open(args['output'],"w") as f_out: f_out.write("Tree\tEffective_Tree_Size\tRF-normalized\tRF\tRF-max\t% edges in Src\t% edges in Ref\n") for tree in trees: f_out.write(tree['name']+"\t") f_out.write(str(tree['comparison']['effective_tree_size'])+"\t") f_out.write(str(tree['comparison']['norm_rf'])+"\t") f_out.write(str(tree['comparison']['rf'])+"\t") f_out.write(str(tree['comparison']['max_rf'])+"\t") f_out.write(str(tree['comparison']['ref_edges_in_source'])+"\t") f_out.write(str(tree['comparison']['source_edges_in_ref'])+"\t") f_out.write("\n")
def run_ete_compare_py(src, ref, format=0, use_collateral=False, min_support_source=0.0, min_support_ref=0.0, has_duplications=False, expand_polytomies=False, unrooted=False, max_treeko_splits_to_be_artifact=1000, ref_tree_attr='name', source_tree_attr='name'): src_tree = Tree(newick=src) ref_tree = Tree(newick=ref) compare_result = Tree.compare(src_tree, ref_tree, use_collateral=False, min_support_source=0.0, min_support_ref=0.0, has_duplications=False, expand_polytomies=False, unrooted=False, max_treeko_splits_to_be_artifact=1000, ref_tree_attr='name', source_tree_attr='name') """ 'common_edges': {('a', 'b', 'c'), ('a', 'b', 'c', 'g')}, 'source_edges': {('a', 'b'), ('a', 'b', 'c'), ('a', 'b', 'c', 'g')}, 'ref_edges': {('a', 'c'), ('a', 'b', 'c'), ('a', 'b', 'c', 'g')} """ #compare_result['common_edges'] = list(compare_result['common_edges']) #compare_result['source_edges'] = list(compare_result['source_edges']) #compare_result['ref_edges'] = list(compare_result['ref_edges']) #print(compare_result) return dict((k, compare_result[k]) for k in [ 'rf', 'max_rf', 'ref_edges_in_source', 'source_edges_in_ref', 'effective_tree_size', 'norm_rf', 'treeko_dist', 'source_subtrees' ])
FBA_tree.write(format=1, outfile="/home/acabbia/Documents/Muscle_Model/GSMM-distance/FBA_tree.nw") #%% ''' # dictionary to translate between model_taxonomy.index (GK and JD trees) and NCBI_id (NCBI tree) idx_str = [str(i) for i in list(models_taxonomy.index)] NCBI_str = [str(i) for i in NCBI_ID] tr = dict(zip(idx_str, NCBI_str)) #Annotate GK and JD trees with NCBI id's for leaf in GK_tree: leaf.name = tr[leaf.name] for leaf in JD_tree: leaf.name = tr[leaf.name] # Write GK_tree.write( format=1, outfile="/home/acabbia/Documents/Muscle_Model/GSMM-distance/GK_tree.nw") JD_tree.write( format=1, outfile="/home/acabbia/Documents/Muscle_Model/GSMM-distance/JD_tree.nw") ### Compare trees with reference taxonomy (NCBI) resultGK = GK_tree.compare(NCBI_tree, unrooted=True) resultJD = JD_tree.compare(NCBI_tree, unrooted=True)
node_tree1.delete() for node_tree2 in tree2.get_descendants(): #print(node2.dist) #if not node2.is_leaf() and round(node2.dist,4) <= 1: if not node_tree2.is_leaf( ) and node_tree2.dist <= 1.00000050002909e-06: node_tree2.delete() print("Trees read successfully.") except: print("Trees couldn't be loaded.") raise # Calculate Robinson-Foulds distance between two trees REF and test try: print("Calculating robinson-foulds distance...") results = tree1.compare(tree2) print("Calculation of robin-foulds distance failed.") except: print("Robinson-foulds distance calculation failed.") raise true_positives = len(results["common_edges"]) false_positives = len(results["source_edges"] - results["ref_edges"]) false_negatives = len(results["ref_edges"] - results["source_edges"]) sensitivity = calculate_sensitivity(true_positives, false_negatives) precision = calculate_precision(true_positives, false_positives) #Create ids dictionary with challenges_ids and sample ids from tree leaves. try: metrics.update(challenges_ids=arguments.challenges_ids)
def main(): # files = glob.glob('/Users/williamlin/Desktop/IW/IW/phyloSim-master/trees_BL/*') # for f in files: # os.remove(f) # files = glob.glob('/Users/williamlin/Desktop/IW/IW/phyloSim-master/trees_unrooted/*') # for f in files: # os.remove(f) # files = glob.glob('/Users/williamlin/Desktop/IW/IW/phyloSim-master/trees_rooted/*') # for f in files: # os.remove(f) # for i in range(0,1): # files = glob.glob('/Users/williamlin/Desktop/IW/IW/phyloSim-master/data/OUTGROUP_*') # for f in files: # os.remove(f) # files = glob.glob('/Users/williamlin/Desktop/IW/IW/phyloSim-master/log_file') # for f in files: # os.remove(f) # directory = "/Users/williamlin/Desktop/IW/IW/phyloSim-master/data" # data = [] # for filename in os.listdir(directory): # # print(filename) # # if filename.endswith("_" + str(i)): # data.append(filename) # # print(data) # # print(data) # #data = ['0.fasta', '1.fasta', '2.fasta', '3.fasta','4.fasta'] # #identified = [] # # for file in data: # # uniqueID(file) # # identified.append("U_" + file) # for file in data: # generateRootedTree(file) # print("AAAAAAAAA") # nodeLabelledRootedTree = generateTree(data,0) # # remove(nodeLabelledRootedTree, False, False, False, True) # branchLengthTree = branchLengths(nodeLabelledRootedTree) output = {} directory1 = "/Users/williamlin/Desktop/IW/IW/phyloSim-master/trees_BL/" for i in range(0, 150): # if i not in [28,108,66,285,388,432,472,492]: # print(filename) for fileName in os.listdir(directory1): if fileName.endswith("_" + str(i) + ".fasta"): tag = fileName.split(".")[0] joined = fileName.split(".")[1] + "." + fileName.split(".")[2] output[i] = joined # parts = filename.split("_") # if len(parts) == 4 and filename.endswith("_" + str(i) + ".fasta"): # output[i] = parts[2] + "_" + parts[3] # print(output) directory2 = "/Users/williamlin/Desktop/IW/IW/phyloSim-master/trees_rooted/RAxML_nodeLabelledRootedTree." directory3 = "/Users/williamlin/Desktop/IW/IW/phyloSim-master/simulated_guestTrees/" # print(output) rf_values = [] compare = [] for step in output: # print(output[step]) print(directory2 + output[step]) t1 = Tree(directory2 + output[step], format=1) t2 = Tree(directory3 + "guestTrees_step_" + str(step), format=1) rf = t1.robinson_foulds(t2) rfN = t1.compare(t2)["norm_rf"] rf_values.append(rf[0]) compare.append(rfN) # print(rf_values) # print(compare) # print("----------------------------------------------------------------------") print("Average Distance: " + str(np.average(rf_values))) print("Variance of Distance " + str(np.var(rf_values))) print("Average Distance: " + str(np.average(compare))) print("Variance of Distance " + str(np.var(compare))) plt.hist(rf_values, bins=[0, 20, 40, 60, 80, 100, 120, 140]) plt.title('Distribution of Robinson-Foulds Distances') plt.xlabel('Robinson-Foulds Distances') plt.ylabel('Number of Reconstructed Trees') plt.xlim((0, 150)) plt.ylim((0, 60)) plt.show() plt.hist(compare, bins=[0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) plt.title('Distribution of Normalized Robinson-Foulds Distances') plt.xlabel('Robinson-Foulds Distances') plt.ylabel('Number of Reconstructed Trees') plt.show()
try: print ("Reading public participants trees...") for public_participant in participants: tree_file = public_participant + "_canonical.nwk" part_fullpath = os.path.join(arguments.benchmark_trees_path,tree_file) if os.path.isfile(part_fullpath): tree_files.append(part_fullpath) for participant in tree_files: print("Reading public participant tree :" + participant) tree = Tree(participant) print("Setting root on midpoint...") tree = midpoint_root (tree) print("Collapsing nodes with branch distance = 0...") tree = collapse_nodes (tree, 1.00000050002909e-06 ) results = tree_test.compare(tree) participant_row.append(results["norm_rf"]) print("Public participants trees read and analyzed successfully.") except: print("Public participants trees couldn't be analyzed.") raise sys.exit(1) #Create ids dictionary with challenges_ids and sample ids from tree leaves. try: print("Updating benchmark data with new participant..") for row in benchmark_data: row.append(0) benchmark_data.append(participant_row) data.update(participants=participants)
def rf_dist(t1, t2): tree1 = Tree(t1) tree2 = Tree(t2) r = tree1.compare(tree2) return r['norm_rf']
shared_edge_support_values = [] ref_only_edge_support_values = [] for tree_file in tree_file_list: pct_mask_match = re.search(r"mask(\d+)", tree_file) if pct_mask_match is not None: pct_mask = int(pct_mask_match.groups()[0]) else: pct_mask = 0 print("Adding {} bootstrap values to tree from {}...".format( tree_file, ref_tree_file)) pct_masks.append(pct_mask) tree = Tree(tree_file, format=1) add_support_and_subtypes(tree) tree.set_outgroup(outgroup) comparison = ref_tree.compare(tree) print("{}/{} common/total edges, normRF {:0.2f} for {} vs {}".format( len(comparison["common_edges"]), len(comparison["source_edges"]), comparison["norm_rf"], tree_file, ref_tree_file)) for common_edge in comparison["common_edges"]: tree_node = tree.get_common_ancestor(common_edge) if hasattr(tree_node, "bootstrap"): ref_tree_node = ref_tree.get_common_ancestor(common_edge) ref_tree_node.barchart_values[pct_mask] = tree_node.bootstrap ref_tree_node.barchart_values[0] = ref_tree_node.bootstrap ref_tree_node.suport_symbol = get_support_symbol( ref_tree_node.bootstrap, tree_node.bootstrap) shared_edge_support_values.append({ ref_bs_label: ref_tree_node.bootstrap, tree_bs_label: tree_node.bootstrap, "Percent Mask": pct_mask,
def compare_tree(tree1, tree2): return Tree.compare(tree1, tree2, unrooted=True)['rf']