def main(): # getting the tree tree_gen = Phylo.parse(PATH_EXAMPLE, 'newick') tree_object = next(tree_gen) # the tree basic information print(tree_info(tree_object)) # drawing the tree Phylo.draw(tree_object) # distance comparing tns = dendropy.TaxonNamespace() tre_one = Tree.get_from_path(PATH_EXAMPLE, 'newick', taxon_namespace=tns) tre_two = Tree.get_from_path(PATH_BIF, 'newick', taxon_namespace=tns) euclidean_distance = treecompare.euclidean_distance(tre_one, tre_two) robinson_distance = treecompare.robinson_foulds_distance(tre_one, tre_two) print("Robinson Foulds distance: ", robinson_distance) print("Euclidean distance: ", euclidean_distance) # common ancestors common_ancestor_tree = tree_object.common_ancestor({"name": "C"}, {"name": "D"}) common_ancestor_tree.color = "blue" print("COMMON ANCESTOR: ", common_ancestor_tree) Phylo.draw(common_ancestor_tree)
def collapse_short_analyses(dirstub, num): inf_dict = {'Euclidean': [], 'RF': []} for i in range(num): i += 1 diri = "{}{}".format(dirstub, i) tns = dendropy.TaxonNamespace() inputtree = dendropy.Tree.get_from_path( "{}/scaledtree.tre".format(diri), schema="newick", taxon_namespace=tns) for edge in inputtree.postorder_edge_iter(): if edge.length < 0.0000000001: edge.collapse() inferred = dendropy.Tree.get_from_string(trestr, schema="newick", taxon_namespace=tns) for edge in inferredtree.postorder_edge_iter(): if edge.length < 0.0000000001: edge.collapse() inputtree.encode_bipartitions() inferred.encode_bipartitions() inf_dict['Euclidean'].append( treecompare.euclidean_distance(inputtree, inferred)) inf_dict['RF'].append( treecompare.unweighted_robinson_foulds_distance( inputtree, inferred)) for key in inf_dict: mean = sum(inf_dict[key]) / len(inf_dict[key]) print(key) print(mean) return (inf_dict) #perform_sims(dirstub = "validation/short_fix/run", refloc = "example/short_ref.fasta") #perform_analyses("validation/short_fix/run")
def calcDistance(self): if self.path1 != '' and self.path2 != '': self.fileEx1 = (os.path.splitext(self.path1)[1])[1:] self.fileEx2 = (os.path.splitext(self.path2)[1])[1:] tns = dendropy.TaxonNamespace() self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileEx1, taxon_namespace=tns) self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileEx2, taxon_namespace=tns) self.tree1.encode_bipartitions() self.tree2.encode_bipartitions() print(treecompare.false_positives_and_negatives(self.tree1, self.tree2)) # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree1.encode_bipartitions() # self.tree2.encode_bipartitions() # oblicz dystans # self.symDist = self.tree1.symmetric_difference(self.tree2) self.symDist = treecompare.symmetric_difference(self.tree1, self.tree2) self.fpnDist = treecompare.false_positives_and_negatives(self.tree1, self.tree2) self.eucDist = treecompare.euclidean_distance(self.tree1, self.tree2) self.rfDist = treecompare.robinson_foulds_distance(self.tree1, self.tree2) # pokaz wyniki self.res1.setText(str(self.eucDist)) #eucDist self.res2.setText(str(self.rfDist)) #rfDist
def euclidean_distance(tree1, tree2, edge_length_attr="length", value_type=float): deprecate.dendropy_deprecation_warning( preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.euclidean_distance()' function has moved to 'dendropy.calculate.treecompare.euclidean_distance()'.", old_construct="from dendropy import treecalc\nd = treecalc.euclidean_distance(...)", new_construct="from dendropy.calculate import treecompare\nd = treecompare.euclidean_distance(...)", ) return treecompare.euclidean_distance( tree1=tree1, tree2=tree2, edge_weight_attr=edge_length_attr, value_type=value_type )
def euclidean_distance(tree1, tree2, edge_length_attr="length", value_type=float): deprecate.dendropy_deprecation_warning( preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.euclidean_distance()' function has moved to 'dendropy.calculate.treecompare.euclidean_distance()'.", old_construct="from dendropy import treecalc\nd = treecalc.euclidean_distance(...)", new_construct="from dendropy.calculate import treecompare\nd = treecompare.euclidean_distance(...)") return treecompare.euclidean_distance( tree1=tree1, tree2=tree2, edge_weight_attr=edge_length_attr, value_type=value_type)
def compute_dist_matrix(self, dendropy=False, weighted=False, resolve=True, overwrite=False): import dendropy from dendropy.calculate import treecompare db = tables.open_file(self.h5name, mode="a") trees, intvals = self.grab_trees(db) if (not db.__contains__("/" + "dist_matrix") or overwrite): D = np.zeros((len(trees), len(trees))) if (not dendropy): for n in range(len(trees) - 1): for nn in range(n + 1, len(trees)): D[n, nn] = self.compare_trees(trees[n], trees[nn]) D[nn, n] = D[n, nn] else: T = dendropy.TreeList([ dendropy.Tree.get(data=t.write(), schema='newick') for t in trees ]) for n in range(len(trees) - 1): for nn in range(n + 1, len(trees)): if (weighted): w_rf = treecompare.euclidean_distance( T[n], T[nn] ) #weighted_robinson_foulds_distance(T[n],T[nn]) else: w_rf = treecompare.symmetric_difference( T[n], T[nn] ) #weighted_robinson_foulds_distance(T[n],T[nn]) D[n, nn] = w_rf D[nn, n] = w_rf if (overwrite): del db["/dist_matrix"] db.create_array("/", "dist_matrix", D) else: D = np.array([ np.array(row) for row in db.get_node("/dist_matrix", classname="Array") ]) db.flush() db.close() return D
def distance(file_path, file_format, file_path2): taxon_namespace = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get_from_path(file_path, file_format, taxon_namespace=taxon_namespace) tree2 = dendropy.Tree.get_from_path(file_path2, file_format, taxon_namespace=taxon_namespace) sym_diff = treecompare.symmetric_difference(tree1, tree2) euc_dis = treecompare.euclidean_distance(tree1, tree2) false_pos = treecompare.false_positives_and_negatives(tree1, tree2) robinson_dis = treecompare.robinson_foulds_distance(tree1, tree2) print("Symetric difference: ", sym_diff) print("Robinson Foulds distance: ", robinson_dis) print("False positives and negatives: ", false_pos) print("Euclidean distance: ", euc_dis)
def main(tree_path_1, tree_path_2): tns = dendropy.TaxonNamespace() tree1 = read_tree(tree_path_1, tns) tree2 = read_tree(tree_path_2, tns) tree1.encode_bipartitions() tree2.encode_bipartitions() print("Number of leaves in tree 1: ", len(tree1.leaf_nodes())) print("Number of leaves in tree 2: ", len(tree2.leaf_nodes())) print("Unweighted Robinson-Fould distance: ", treecompare.symmetric_difference(tree1, tree2)) print("Weighted Robinson-Fould distance: ", treecompare.weighted_robinson_foulds_distance(tree1, tree2)) print("Euclidean distance: ", treecompare.euclidean_distance(tree1, tree2))
def calcDistance(self): if self.path1 != '' and self.path2 != '': self.fileEx1 = (os.path.splitext(self.path1)[1])[1:] self.fileEx2 = (os.path.splitext(self.path2)[1])[1:] tns = dendropy.TaxonNamespace() self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileEx1, taxon_namespace=tns) self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileEx2, taxon_namespace=tns) self.tree1.encode_bipartitions() self.tree2.encode_bipartitions() print( treecompare.false_positives_and_negatives( self.tree1, self.tree2)) # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree1.encode_bipartitions() # self.tree2.encode_bipartitions() # oblicz dystans # self.symDist = self.tree1.symmetric_difference(self.tree2) self.symDist = treecompare.symmetric_difference( self.tree1, self.tree2) self.fpnDist = treecompare.false_positives_and_negatives( self.tree1, self.tree2) self.eucDist = treecompare.euclidean_distance( self.tree1, self.tree2) self.rfDist = treecompare.robinson_foulds_distance( self.tree1, self.tree2) # pokaz wyniki self.res1.setText(str(self.eucDist)) #eucDist self.res2.setText(str(self.rfDist)) #rfDist
def calculateDistance(self): if self.path1 != '' and self.path2 != '': #get files extensions self.fileExtension1 = (os.path.splitext(self.path1)[1])[1:] self.fileExtension2 = (os.path.splitext(self.path2)[1])[1:] #open tree files tns = dendropy.TaxonNamespace() self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileExtension1, taxon_namespace=tns) self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileExtension2, taxon_namespace=tns) self.tree1.encode_bipartitions() self.tree2.encode_bipartitions() print(treecompare.false_positives_and_negatives(self.tree1, self.tree2)) # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree1.encode_bipartitions() #self.tree2.encode_bipartitions() #calculate distances #self.symDist = self.tree1.symmetric_difference(self.tree2) self.symDist = treecompare.symmetric_difference(self.tree1, self.tree2) self.fpnDist = treecompare.false_positives_and_negatives(self.tree1, self.tree2) self.eucDist = treecompare.euclidean_distance(self.tree1, self.tree2) self.rfDist = treecompare.robinson_foulds_distance(self.tree1, self.tree2) #show distances self.dist1Value.setText(str(self.eucDist)) self.dist2Value.setText(str(self.rfDist)) self.dist3Value.setText(str(self.symDist)) self.dist4Value.setText(str(self.fpnDist))
import dendropy from dendropy.calculate import treecompare s1 = "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):0.028969):0.065840,t3:0.170221):0.383247);" s2 = "((t5:2.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);" tns = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get(data=s1, schema='newick', taxon_namespace=tns) tree2 = dendropy.Tree.get(data=s2, schema='newick', taxon_namespace=tns) ## Euclidean distance = 2.22326363775 print(treecompare.euclidean_distance(tree1, tree2))
def euclidean(self, tree1, tree2, taxa_list): """Calculate Euclidean distance between two trees.""" tree1, tree2 = self._read_trees(tree1, tree2, taxa_list) return treecompare.euclidean_distance(tree1, tree2)
if len(sys.argv) < 4: print "usage: " + sys.argv[ 0] + " <true tree directory> <inferred tree directory> <discrete or continuous>" sys.exit(0) ttdir = sys.argv[1] + "/" itdir = sys.argv[2] + "/" rfout = open("ALL.euclidean.unwt.rfdist", "w") rfout.write("trait_type\tunweighted_rf\tweighted_rf\teuclidean_dist\n") trait_cat = sys.argv[3] for j in os.listdir(itdir): if "mcc" in j: #and j.split(".")[-1]!="rr":#j.split(".")[-2] =="mcc" or j.split(".")[-3]=="mcc": #j.split(".")[-1] == "tre" or j.split(".")[-1] == "sumtree": spls = j.split(".") num = spls[0] tree = dendropy.Tree() tns = dendropy.TaxonNamespace() tt = tree.get_from_path(ttdir + "dated." + str(num) + ".tre", "newick", taxon_namespace=tns) #it = tree.get_from_path(itdir+i+"/"+j,"newick",taxon_namespace=tns) it = tree.get_from_path(itdir + "/" + j, "nexus", taxon_namespace=tns) tt.encode_bipartitions() it.encode_bipartitions() #vals[i].append(str(treecompare.weighted_robinson_foulds_distance(tt,it))+"\n") rfout.write( trait_cat + "\t" + str(treecompare.symmetric_difference(tt, it)) + "\t" + str(treecompare.weighted_robinson_foulds_distance(tt, it)) + "\t" + str(treecompare.euclidean_distance(tt, it)) + "\n")
def get_figure(software1, software2, output_path): """ This function generates a comparison figure of trees generated from software 1 and software 2 :param software1: name of the software :param software2: name of the software :param output_path: path to where the output figure will be saved :return: NA """ map = { 1: "PB2", 2: "PB1", 3: "PA", 4: "HA", 5: "NP", 6: "NA", 7: "MP", 8: "NS", 9: "concatenated" } wRF = [] uwRF = [] eD = [] for num1 in range(1, 10): w = [] u = [] e = [] for num2 in range(1, 10): segment1 = map[num1] segment2 = map[num2] groundTruthFile = get_file(software1, segment1) estimationFile = get_file(software2, segment2) tns = dendropy.TaxonNamespace() gtTree = dendropy.Tree.get(file=open(groundTruthFile, 'r'), schema='newick', taxon_namespace=tns) estimateTree = dendropy.Tree.get(file=open(estimationFile, 'r'), schema='newick', taxon_namespace=tns) # metrics, weighted RF is unsymmetric, unweighted RF is symmetric distance weightedRF = treecompare.weighted_robinson_foulds_distance( gtTree, estimateTree) unweightedRF = treecompare.unweighted_robinson_foulds_distance( gtTree, estimateTree) euclideanDist = treecompare.euclidean_distance( gtTree, estimateTree) w.append(weightedRF) u.append(unweightedRF) e.append(euclideanDist) wRF.append(w) uwRF.append(u) eD.append(e) wRF = np.array(wRF) uwRF = np.array(uwRF) eD = np.array(eD) metric_map = { "Weighted Robinson Foulds": wRF, "Unweighted Robinson Foulds": uwRF, "Euclidean Distances": eD } for metric in [ "Weighted Robinson Foulds", "Unweighted Robinson Foulds", "Euclidean Distances" ]: fig, ax = plt.subplots() im, cbar = heatmap(metric_map[metric], software1, software2, ax=ax, cmap="YlGn", cbarlabel="Distance") texts = annotate_heatmap(im, valfmt="{x:.2f}") title = "%s on %s and %s Tree" % (metric, software1.capitalize(), software2.capitalize()) ax.set_title(title, pad=-330) fig.tight_layout() # save figure to output path plt.savefig(output_path)
import dendropy from dendropy.calculate import treecompare s1 = "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):0.028969):0.065840,t3:0.170221):0.383247);" s2 = "((t5:2.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);" tns = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get( data=s1, schema='newick', taxon_namespace=tns) tree2 = dendropy.Tree.get( data=s2, schema='newick', taxon_namespace=tns) ## Euclidean distance = 2.22326363775 print(treecompare.euclidean_distance(tree1, tree2))
def perform_analyses(dirstub, num): inf_dict = { 'Euclidean': [], 'RF': [], 'MutsSim': [], 'MutsCalled': [], 'freqA': [], 'freqC': [], 'freqG': [], 'freqT': [], 'ac': [], 'ag': [], 'at': [], 'cg': [], 'ct': [], 'gt': [] } for i in range(num): i += 1 diri = "{}{}".format(dirstub, i) #diri = "{}{}/altref".format(dirstub,i) cwd = os.getcwd() os.chdir(diri) os.system( "raxmlHPC -m ASC_GTRGAMMA --asc-corr=lewis -s snpma.fasta -p 1 -n val" ) # os.system("raxmlHPC -m GTRGAMMA -s snpma.fasta -p 1 -n val_noasc") os.chdir(cwd) print diri trestr = open("{}/RAxML_bestTree.val".format(diri)).readline().replace( 'sim_', '') # trestr =open("{}/RAxML_bestTree.val_noasc".format(diri)).readline().replace('sim_','') tns = dendropy.TaxonNamespace() inputtree = dendropy.Tree.get_from_path( "{}/scaledtree.tre".format(diri), schema="newick", taxon_namespace=tns) inferred = dendropy.Tree.get_from_string(trestr, schema="newick", taxon_namespace=tns) inputtree.encode_bipartitions() inferred.encode_bipartitions() inf_dict['Euclidean'].append( treecompare.euclidean_distance(inputtree, inferred)) inf_dict['RF'].append( treecompare.unweighted_robinson_foulds_distance( inputtree, inferred)) freqs = subprocess.check_output( ["grep", "Base frequencies:", "{}/RAxML_info.val".format(diri)]).split() # freqs = subprocess.check_output(["grep", "Base frequencies:","{}/RAxML_info.val_noasc".format(diri)]).split() print(freqs) freqs = [float(val) for val in freqs[2:]] inf_dict['freqA'].append(freqs[0]) inf_dict['freqC'].append(freqs[1]) inf_dict['freqG'].append(freqs[2]) inf_dict['freqT'].append(freqs[3]) trans = subprocess.check_output( ["grep", "ac ag at cg ct gt", "{}/RAxML_info.val".format(diri)]).split() # trans = subprocess.check_output(["grep", "ac ag at cg ct gt", "{}/RAxML_info.val_noasc".format(diri)]).split() trans = [float(val) for val in trans[9:]] inf_dict['ac'].append(trans[0]) inf_dict['ag'].append(trans[1]) inf_dict['at'].append(trans[2]) inf_dict['cg'].append(trans[3]) inf_dict['ct'].append(trans[4]) inf_dict['gt'].append(trans[5]) inf_dict['MutsCalled'].append( float( subprocess.check_output(["wc", "{}/snplist.txt".format(diri) ]).split()[0])) inf_dict['MutsSim'].append( float( subprocess.check_output(["wc", "{}/mutsites.txt".format(diri) ]).split()[0])) for key in inf_dict: mean = sum(inf_dict[key]) / len(inf_dict[key]) print(key) print(mean) return (inf_dict)