def read_phyloxml(input_file): """ Parses a pyhlogenetic tree in phyloxml-format. :param str input_file: path to file :return: ete2.Tree object """ from ete2 import Phyloxml project = Phyloxml() project.build_from_file(input_file) return project.get_phylogeny()
def getRelevantEdges( adjGraph, t1, t2 ): pxml = Phyloxml() pxml.build_from_file(t1) pxml.build_from_file(t2) la = filter( lambda x : x.find('LOST') == -1, pxml.phylogeny[0].get_leaf_names() ) lb = filter( lambda x : x.find('LOST') == -1, pxml.phylogeny[1].get_leaf_names() ) #lb = filter( lambda x : x.find('LOST') == -1, map( getName, cogent.LoadTree(t2).tips() ) ) crossValidationEdges = filter( lambda (x,y) : ((x in la) and (y in lb)) or ((y in la) and (x in lb)) , adjGraph.edges() ) relevantEdges = filter( lambda (x,y) : ((x in la) or (x in lb)) and ((y in la) or (y in lb)) , adjGraph.edges() ) newGraph = nx.Graph() newGraph.add_nodes_from( la + lb ) newGraph.add_edges_from( relevantEdges ) return newGraph, crossValidationEdges
def getTreeFromPhyloxml(xml, saveToFile="default.xml", delFile=True): """ Read a phylogeny tree from a phyloxml string and return a TreeClass object or a list of TreeClass object """ project = Phyloxml() fo=open(saveToFile, "w+") fo.write(xml) fo.close() project.build_from_file(saveToFile) treeList=[] for tree in project.get_phylogeny(): treeList.append(TreeClass.import_from_PhyloxmlTree(tree)) if(delFile): os.remove(saveToFile) if len(treeList)==1: return treeList[0] return treeList
def readScoreFile(fname, noself, randomize=False): # The strings naming the proteins whose interaction was removed in # this input tstring = fname.split('@')[-2].split("#") # Convert to upper case and make into an edge name tedge = (tstring[0].upper(), tstring[1].upper()) # Read in the phylogenies for the orthology groups treeDir = "../../Parana2Data/HerpesPPIs/trees/rearranged"#"dataOut_June17/rearranged" baseFile = fname.split('/')[-1] orthoGroup1 = baseFile.split('@')[0] orthoGroup2 = baseFile.split('@')[1] t1 = "{0}/{1}.xml.rooting.0.ntg.reconciled.0.ntg.rearrange.0.ntg".format(treeDir,orthoGroup1) t2 = "{0}/{1}.xml.rooting.0.ntg.reconciled.0.ntg.rearrange.0.ntg".format(treeDir,orthoGroup2) if ( not (os.path.exists(t1) and os.path.exists(t2)) ): return None, None # The extant (non ancestral, non lost) nodes from the two homology groups getName = lambda x : x.Name.upper() pxml = Phyloxml() pxml.build_from_file(t1) pxml.build_from_file(t2) la = filter( lambda x : x.find('LOST') == -1, map( lambda x: x.upper(), pxml.phylogeny[0].get_leaf_names() ) ) lb = filter( lambda x : x.find('LOST') == -1, map( lambda x: x.upper(), pxml.phylogeny[1].get_leaf_names() ) ) # The set of all possible interactions among the two homology groups #pe = list(itertools.product(la,la)) + list(itertools.product(la,lb)) + list(itertools.product(lb,lb)) possibleEndpoints = combinationsWithSelf(la) + list(itertools.product(la,lb)) + combinationsWithSelf(lb) \ if not (orthoGroup1 == orthoGroup2 ) else combinationsWithSelf(la) # From among all possible endpoints, only those protein pairs that reside in the # same species represent a potential edge allPossibleEdges = filter( lambda (x,y): x.split('_')[-1] == y.split('_')[-1], possibleEndpoints ) # an edge has both endpoints in the set of extant nodes inCurrentGroups = lambda e, x, y: (e[0] in x or e[0] in y) and (e[1] in x or e[1] in y) # an edge is relevant if it's constrained to the current groups relevantExtantEdges = [ e for e in ExtantNetwork.edges_iter() if inCurrentGroups(e,la,lb) ] # the set of potential edges that don't appear in the input network nonPresentEdgesMinusTarget = list(set([ x for x in allPossibleEdges if not ExtantNetwork.has_edge(x[0],x[1])])) # the same as above but including our target edge nonPresentEdges = nonPresentEdgesMinusTarget + [tedge] import random # Ancestral edges start with an N or R ancestral = ['R','N'] # The node is valid if it is neither lost nor ancestral isValidNode = lambda x : (x[0] not in ancestral) and (x.find('LOST') == -1) # Is the edge u,v the target edge? isCurrentEdge = lambda u,v : (u == tedge[0] and v == tedge[1]) or (u == tedge[1] and v == tedge[0]) isRealEdge = lambda u,v : (not isCurrentEdge(u,v)) and ExtantNetwork.has_edge(u,v) isValidEdge = lambda u,v : ((isValidNode(u) and isValidNode(v)) and (not isRealEdge(u,v))) # Is u,v one of the edges we wish to consider? def inPotentialEdges(u,v) : contains = (u,v) in nonPresentEdges or (v,u) in nonPresentEdges if noself: return u != v and contains else: return contains def isEdge( se, p1, p2 ): r = ((se.p1 == p1 and se.p2 == p2) or (se.p1 == p2 and se.p2 == p1)) return r scoredEdges = [] nonEdgesWithProb = set( nonPresentEdges ) with open(fname,'rb') as ifile: for l in ifile: toks = l.rstrip().split() p1 = toks[0].upper() p2 = toks[1].upper() s = float(toks[3]) if inPotentialEdges(p1,p2): if randomize: s = random.uniform(0.0,1.0) #if p1 == p2: s = 0.0 se = ScoredEdge(p1,p2,s) scoredEdges.append( se ) nonEdgesWithProb.discard((p1,p2)) nonEdgesWithProb.discard((p2,p1)) rev = True for u,v in (nonEdgesWithProb - set(nonPresentEdges)): s = random.uniform(0.0,1.0) if randomize else 0.0 scoredEdges.append(ScoredEdge(u, v, s)) # cost = 0.0 # for u,v in nonPresentEdges: # se = ScoredEdge(u,v,cost) # fe = [ e for e in scoredEdges if isEdge(e, u, v) ] # if len(fe) == 0: # scoredEdges.append(se) random.shuffle(scoredEdges) scoredEdges = list(enumerate(sorted( scoredEdges, key=lambda x: x.score, reverse=rev ))) # print(len(scoredEdges)) # print(t1,t2) # print("Target Edge = {0}".format(tedge)) # print("Extant Edges = {0}".format(relevantExtantEdges)) # print("Potential Edges = {0}".format(nonPresentEdges)) # print("Scored Edges = {0}".format(scoredEdges)) res = [ x for x in scoredEdges if isEdge(x[1], tedge[0], tedge[1]) ] if len(res) > 0: print(res) # Prev (ISMB) #print(res[0][0],float(len(nonPresentEdges)-1)) #return (res[0][0], float(len(nonPresentEdges)-1)) # New #print(res[0][0],float(len(scoredEdges)-1)) return (res[0][0], float(len(scoredEdges)-1)) else: raise 'Hell'
def main (): global options, args if options.verbose: print time.asctime(), if options.verbose: print "load and parse newick file" # TODO: read newick file tree = Phylo.read(args[0],'newick') # TODO: convert newick to phyloxml treeXML = StringIO() Phylo.write(tree,treeXML,'phyloxml') # TODO: read phyloxml as ete object hPhylotree = Phyloxml() with tempinput(treeXML.getvalue()) as tempfilename: hPhylotree.build_from_file(tempfilename) # TODO: get the tree tree2 = hPhylotree.get_phylogeny()[0] if options.verbose: print time.asctime(), if options.verbose: print "load and parse taxonomy file" # TODO: read taxonomy file tax = get_taxonomy(args[1]) # TODO: refine taxonomy annotation of internal node tree2 = add_taxonomy_for_internal_branch(tree2,tax) # TODO: refine tree node label #tree2 = add_node_label(tree2,tax) for node in tree2.traverse(): if not node.is_leaf(): label = "null" for t in ['kingdom','phylum','class','order','family','genus','species']: if len(tax[node.id][t])>3: label = tax[node.id][t] node.add_feature("mylabel",label) # TODO: add node depth depth={} if options.depth: with open(options.depth) as f: for line in f: (id,dep) = line.split() depth[id] = float(dep) # TODO: add color attribute if options.depth: for node in tree2.iter_leaves(): if depth[node.id] >= 10 and depth[node.id] < 100: node.add_feature("color","#D8BFD8") elif depth[node.id] >= 100 and depth[node.id] < 1000: node.add_feature("color","#DDA0DD") elif depth[node.id] >= 1000 and depth[node.id] < 5000: node.add_feature("color","#EE82EE") elif depth[node.id] >= 5000: node.add_feature("color","#DA70D6") else: node.add_feature("color","#E6E6FA") # TODO: set tree style ts = TreeStyle() ts.show_leaf_name = False ts.layout_fn = tree_layout # TODO: show tree2 #tree2.show(tree_style=ts) tree2.render(args[0]+".png",dpi=2048,tree_style=ts)
import sys import re from StringIO import StringIO from ete2 import Phyloxml, phyloxml #Creates empty phyloxml document project = Phyloxml() # Loads newick tree phylo = phyloxml.PhyloxmlTree(newick=sys.argv[1]) # Set basic tree info as a phyloxml phylogeny object phylo.phyloxml_phylogeny.set_name("test_tree") if len(phylo.children) <= 2: phylo.phyloxml_phylogeny.set_rooted("true") else: phylo.phyloxml_phylogeny.set_rooted("false") # Add the tree to the phyloxml project project.add_phylogeny(phylo) # Export phyloxml document OUTPUT = StringIO() project.export(OUTPUT) # Some ad-hoc changes to the phyloxml formatted document to meet the schema definition text = OUTPUT.getvalue() text = text.replace("phy:", "") text = re.sub('branch_length_attr="[^"]+"', "", text) header = """
def main(argv): input_file='' title='Title' label_internal_nodes = False label_leaves = False out_file='' width=750 out_file_xml='' plot_rectangular = False common_kmer_data_path='' taxonomic_names_on_leaves = False try: opts, args = getopt.getopt(argv,"h:i:lnrto:w:x:D:",["Help=","InputCommonKmerXFile=","LabelLeaves=", "LabelInternalNodes=","Rectangular=", "TaxonomicNamesOnLeaves=", "OutFile=","Width=","OutFileXML=","CommonKmerDataPath="]) except getopt.GetoptError: print 'Unknown option, call using: ./PlotNJTree.py -i <InputCommonKmerXFile> -D <CommonKmerDataPath> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -r <RectangularPlotFlag> -t <TaxonomicNamesOnLeavesFlag> -o <OutFile.png> -x <Outfile.xml> -w <Width>' sys.exit(2) for opt, arg in opts: if opt == '-h': print './PlotNJTree.py -i <InputCommonKmerXFile> -D <CommonKmerDataPath> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -r <RectangularPlotFlag> -t <TaxonomicNamesOnLeavesFlag> -o <OutFile.png> -x <Outfile.xml> -w <Width>' sys.exit(2) elif opt in ("-i", "--InputCommonKmerXFile"): input_file = arg elif opt in ("-l", "--LabelLeaves"): label_leaves = True elif opt in ("-n","--LabelInternalNodes"): label_internal_nodes = True elif opt in ("-o", "--OutFile"): out_file = arg elif opt in ("-w", "--Width"): width = int(arg) elif opt in ("-x", "--OutFileXML"): out_file_xml = arg elif opt in ("-D", "--CommonKmerDataPath"): common_kmer_data_path = arg elif opt in ("-r", "--Rectangular"): plot_rectangular = True elif opt in ("-t", "--TaxonomicNamesOnLeaves"): taxonomic_names_on_leaves = True #Read in the x vector fid = open(input_file,'r') x = map(lambda y: float(y),fid.readlines()) fid.close() #Normalize the x vector #x = map(lambda y: y/sum(x),x) #Read in the taxonomy taxonomy = list() fid = open(os.path.join(common_kmer_data_path,"Taxonomy.txt"),'r') for line in fid: taxonomy.append('_'.join(line.split()[0].split("_")[1:])) #Just take the first line of the taxonomy (erasing the taxID) fid.close() #Read in the basis for the ckm matrices x_file_names = list() fid = open(os.path.join(common_kmer_data_path,"FileNames.txt"),'r') for line in fid: x_file_names.append(os.path.basename(line.strip())) fid.close() #Read in the common kmer matrix f=h5py.File(os.path.join(common_kmer_data_path,'CommonKmerMatrix-30mers.h5'),'r') ckm30=np.array(f['common_kmers'],dtype=np.float64) f.close() f=h5py.File(os.path.join(common_kmer_data_path,'CommonKmerMatrix-50mers.h5'),'r') ckm50=np.array(f['common_kmers'],dtype=np.float64) f.close() ckm30_norm = np.multiply(ckm30,1/np.diag(ckm30)) ckm50_norm = np.multiply(ckm50,1/np.diag(ckm50)) num_rows = ckm30_norm.shape[0] num_cols = ckm30_norm.shape[1] names = x_file_names matrix=list() for i in range(num_rows): matrix.append([.5*(1-.5*ckm30_norm[i,j]-.5*ckm30_norm[j,i])+.5*(1-.5*ckm50_norm[i,j]-.5*ckm50_norm[j,i]) for j in range(i+1)]) #Construct the tree. Note I could use RapidNJ here, but a few tests have shown that the trees that RapidNJ creates are rubbish. dm = _DistanceMatrix(names, matrix) constructor = DistanceTreeConstructor() tree = constructor.nj(dm) t=Tree(tree.format('newick'),format=1) #tree.format('newick') #Phylo.draw_ascii(tree) #Now I will put internal nodes in a certain phylogenetic distance between the root and a given node. #Function to insert a node at a given distance def insert_node(t, name_to_insert, insert_above, dist_along): insert_at_node = t.search_nodes(name=insert_above)[0] parent = (t&insert_above).up orig_branch_length = t.get_distance(insert_at_node,parent) if orig_branch_length < dist_along: raise ValueError("error: dist_along larger than orig_branch_length") removed_node = insert_at_node.detach() removed_node.dist = orig_branch_length - dist_along added_node = parent.add_child(name=name_to_insert, dist=dist_along) added_node.add_child(removed_node) #Function to insert a node some % along a branch def insert_hyp_node(t, leaf_name, percent): total_dist = t.get_distance(t.name,leaf_name) percent_dist = percent*total_dist child_node = (t&leaf_name) ancestor_node = (t&child_node.name).up while t.get_distance(t.name, ancestor_node) > percent_dist: child_node = ancestor_node ancestor_node = (t&child_node.name).up insert_node(t, leaf_name+"_"+str(percent), child_node.name, percent_dist-t.get_distance(t.name, ancestor_node)) #Insert hypothetical nodes hyp_node_names = dict() cutoffs = [.9,.8,.7,.6,.5,.4,.3,.2,.1] cutoffs = map(lambda y: y**1.5,cutoffs) for i in range(len(x_file_names)): xi = x[i:len(x):len(x_file_names)] for j in range(1,len(cutoffs)+1): if xi[j]>0: insert_hyp_node(t, x_file_names[i], cutoffs[j-1]) hyp_node_names[x_file_names[i]+"_"+str(cutoffs[j-1])] = [x_file_names[i], cutoffs[j-1], j-1] #in case there are "_" in the file names #insert_hyp_node(t, x_file_names[i],.5/t.get_distance(t.name,t&x_file_names[i])*cutoffs[j]) #Now put the bubbles on the nodes def layout(node): #print(node) if node.is_leaf(): if node.name in x_file_names: #make reconstructed bubble size = x[x_file_names.index(node.name)] F = CircleFace(radius=500*math.sqrt(size), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F,node, 0, position="branch-right") if taxonomic_names_on_leaves: nameFace = AttrFace("name", fsize=25, fgcolor='black',text_suffix="_"+taxonomy[x_file_names.index(node.name)]) faces.add_face_to_node(nameFace, node, 0, position="branch-right") else: nameFace = AttrFace("name", fsize=25, fgcolor='black') faces.add_face_to_node(nameFace, node, 0, position="branch-right") elif node.name in hyp_node_names: #Otherwise it's a hypothetical node, just use recon x node_base_name = hyp_node_names[node.name][0] percent = hyp_node_names[node.name][1] if node_base_name in x_file_names: idx = hyp_node_names[node.name][2] size = x[x_file_names.index(node_base_name)+(idx+1)*len(x_file_names)] F = CircleFace(radius=500*math.sqrt(size), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F,node, 0, position="branch-right") #print node #print size else: size=0 else: size=0 #print(size) ts = TreeStyle() ts.layout_fn = layout if plot_rectangular: ts.mode = "r" else: ts.mode = "c" ts.show_leaf_name = False ts.min_leaf_separation = 50 #Export the tree to a png image t.render(out_file, w=width, units="mm", tree_style=ts) #Export the xml file project = Phyloxml() phylo = phyloxml.PhyloxmlTree(newick=t.write(format=0, features=[])) phylo.phyloxml_phylogeny.set_name(title) project.add_phylogeny(phylo) project.export(open(out_file_xml,'w'))
def readScoreFile(fname, noself, randomize=False): # The strings naming the proteins whose interaction was removed in # this input tstring = fname.split('@')[-2].split("#") # Convert to upper case and make into an edge name tedge = (tstring[0].upper(), tstring[1].upper()) # Read in the phylogenies for the orthology groups treeDir = "../../Parana2Data/HerpesPPIs/trees/rearranged" #"dataOut_June17/rearranged" baseFile = fname.split('/')[-1] orthoGroup1 = baseFile.split('@')[0] orthoGroup2 = baseFile.split('@')[1] t1 = "{0}/{1}.xml.rooting.0.ntg.reconciled.0.ntg.rearrange.0.ntg".format( treeDir, orthoGroup1) t2 = "{0}/{1}.xml.rooting.0.ntg.reconciled.0.ntg.rearrange.0.ntg".format( treeDir, orthoGroup2) if (not (os.path.exists(t1) and os.path.exists(t2))): return None, None # The extant (non ancestral, non lost) nodes from the two homology groups getName = lambda x: x.Name.upper() pxml = Phyloxml() pxml.build_from_file(t1) pxml.build_from_file(t2) la = filter(lambda x: x.find('LOST') == -1, map(lambda x: x.upper(), pxml.phylogeny[0].get_leaf_names())) lb = filter(lambda x: x.find('LOST') == -1, map(lambda x: x.upper(), pxml.phylogeny[1].get_leaf_names())) # The set of all possible interactions among the two homology groups #pe = list(itertools.product(la,la)) + list(itertools.product(la,lb)) + list(itertools.product(lb,lb)) possibleEndpoints = combinationsWithSelf(la) + list(itertools.product(la,lb)) + combinationsWithSelf(lb) \ if not (orthoGroup1 == orthoGroup2 ) else combinationsWithSelf(la) # From among all possible endpoints, only those protein pairs that reside in the # same species represent a potential edge allPossibleEdges = filter( lambda (x, y): x.split('_')[-1] == y.split('_')[-1], possibleEndpoints) # an edge has both endpoints in the set of extant nodes inCurrentGroups = lambda e, x, y: (e[0] in x or e[0] in y) and (e[ 1] in x or e[1] in y) # an edge is relevant if it's constrained to the current groups relevantExtantEdges = [ e for e in ExtantNetwork.edges_iter() if inCurrentGroups(e, la, lb) ] # the set of potential edges that don't appear in the input network nonPresentEdgesMinusTarget = list( set([ x for x in allPossibleEdges if not ExtantNetwork.has_edge(x[0], x[1]) ])) # the same as above but including our target edge nonPresentEdges = nonPresentEdgesMinusTarget + [tedge] import random # Ancestral edges start with an N or R ancestral = ['R', 'N'] # The node is valid if it is neither lost nor ancestral isValidNode = lambda x: (x[0] not in ancestral) and (x.find('LOST') == -1) # Is the edge u,v the target edge? isCurrentEdge = lambda u, v: (u == tedge[0] and v == tedge[1]) or ( u == tedge[1] and v == tedge[0]) isRealEdge = lambda u, v: (not isCurrentEdge(u, v) ) and ExtantNetwork.has_edge(u, v) isValidEdge = lambda u, v: ( (isValidNode(u) and isValidNode(v)) and (not isRealEdge(u, v))) # Is u,v one of the edges we wish to consider? def inPotentialEdges(u, v): contains = (u, v) in nonPresentEdges or (v, u) in nonPresentEdges if noself: return u != v and contains else: return contains def isEdge(se, p1, p2): r = ((se.p1 == p1 and se.p2 == p2) or (se.p1 == p2 and se.p2 == p1)) return r scoredEdges = [] nonEdgesWithProb = set(nonPresentEdges) with open(fname, 'rb') as ifile: for l in ifile: toks = l.rstrip().split() p1 = toks[0].upper() p2 = toks[1].upper() s = float(toks[3]) if inPotentialEdges(p1, p2): if randomize: s = random.uniform(0.0, 1.0) #if p1 == p2: s = 0.0 se = ScoredEdge(p1, p2, s) scoredEdges.append(se) nonEdgesWithProb.discard((p1, p2)) nonEdgesWithProb.discard((p2, p1)) rev = True for u, v in (nonEdgesWithProb - set(nonPresentEdges)): s = random.uniform(0.0, 1.0) if randomize else 0.0 scoredEdges.append(ScoredEdge(u, v, s)) # cost = 0.0 # for u,v in nonPresentEdges: # se = ScoredEdge(u,v,cost) # fe = [ e for e in scoredEdges if isEdge(e, u, v) ] # if len(fe) == 0: # scoredEdges.append(se) random.shuffle(scoredEdges) scoredEdges = list( enumerate(sorted(scoredEdges, key=lambda x: x.score, reverse=rev))) # print(len(scoredEdges)) # print(t1,t2) # print("Target Edge = {0}".format(tedge)) # print("Extant Edges = {0}".format(relevantExtantEdges)) # print("Potential Edges = {0}".format(nonPresentEdges)) # print("Scored Edges = {0}".format(scoredEdges)) res = [x for x in scoredEdges if isEdge(x[1], tedge[0], tedge[1])] if len(res) > 0: print(res) # Prev (ISMB) #print(res[0][0],float(len(nonPresentEdges)-1)) #return (res[0][0], float(len(nonPresentEdges)-1)) # New #print(res[0][0],float(len(scoredEdges)-1)) return (res[0][0], float(len(scoredEdges) - 1)) else: raise 'Hell'
from ete2 import Phyloxml project = Phyloxml() project.build_from_file("apaf.xml") # Each tree contains the same methods as a PhyloTree object for tree in project.get_phylogeny(): print tree # you can even use rendering options tree.show() # PhyloXML features are stored in the phyloxml_clade attribute for node in tree: print "Node name:", node.name for seq in node.phyloxml_clade.get_sequence(): for domain in seq.domain_architecture.get_domain(): domain_data = [domain.valueOf_, domain.get_from(), domain.get_to()] print " Domain:", '\t'.join(map(str, domain_data))
def MakePlot(x, org_names, ckm30, ckm50, outgroup, outfile, outfilexml, sum_x): #Make sure names are unique names = org_names for name in names: if names.count(name)>1: temp_name = name i=1 for dummy in range(0,names.count(name)-1): #Don't change the last one, just to make sure we don't conflict with the outgroup names[names.index(temp_name)] = temp_name + "_" + str(i) i = i +1 #Normalize the x vector x = map(lambda y: y/sum(x),x) ckm30_norm = np.multiply(ckm30,1/np.diag(ckm30)) ckm50_norm = np.multiply(ckm50,1/np.diag(ckm50)) num_rows = ckm30_norm.shape[0] num_cols = ckm30_norm.shape[1] matrix=list() for i in range(num_rows): matrix.append([.5*(1-.5*ckm30_norm[i,j]-.5*ckm30_norm[j,i])+.5*(1-.5*ckm50_norm[i,j]-.5*ckm50_norm[j,i]) for j in range(i+1)]) #Make the list of distances (ave of the two ckm matrices) ckm_ave_train = .5*ckm30_norm+.5*ckm50_norm ckm_ave_train_dist = dict() for i in range(len(org_names)): ckm_ave_train_dist[org_names[i]] = [.5*ckm_ave_train[i,j]+.5*ckm_ave_train[j,i] for j in range(len(org_names))] #Construct the tree. Note I could use RapidNJ here, but a few tests have shown that the trees that RapidNJ creates are rubbish. dm = _DistanceMatrix(names, matrix) constructor = DistanceTreeConstructor() tree = constructor.nj(dm) t=Tree(tree.format('newick'),format=1) #tree.format('newick') #Phylo.draw_ascii(tree) #Now I will put internal nodes in a certain phylogenetic distance between the root and a given node. #Function to insert a node at a given distance def insert_node(t, name_to_insert, insert_above, dist_along): insert_at_node = t.search_nodes(name=insert_above)[0] parent = (t&insert_above).up orig_branch_length = t.get_distance(insert_at_node,parent) if orig_branch_length < dist_along: raise ValueError("error: dist_along larger than orig_branch_length in PlotPackage.py") removed_node = insert_at_node.detach() removed_node.dist = orig_branch_length - dist_along added_node = parent.add_child(name=name_to_insert, dist=dist_along) added_node.add_child(removed_node) #Function to insert a node some % along a branch, taking into account the ckm distances and nodes already created in the NJ tree (and what distance their descendants are from everyone else) def insert_hyp_node(t, leaf_name, percent, ckm_ave_train_dist, org_names): dists = map(lambda y: abs(y-percent), ckm_ave_train_dist[leaf_name]) nearby_indicies = list() #Add all the organisms that are within 0.05 of the given percent # for i in range(len(dists)): # if dists[i]<=.05: # nearby_indicies.append(i) nearby_names = list() #If there are no nearby indicies, add the closest organism to the given percent if nearby_indicies==[]: nearby_names.append(org_names[dists.index(min(dists))]) else: for i in range(len(nearby_indicies)): nearby_names.append(org_names[i]) mean_dist = np.mean(map(lambda y: ckm_ave_train_dist[leaf_name][org_names.index(y)],nearby_names)) nearby_names.append(leaf_name) LCA = t.get_common_ancestor(nearby_names) LCA_to_leaf_dist = t.get_distance(LCA,leaf_name) #divide the dist to the right/left of the LCA node by the number of percentage points in there if LCA.name==t.name: percent_dist = percent*LCA_to_leaf_dist if mean_dist <= percent: child_node = (t&leaf_name) else: child_node = (t&nearby_names[0])#This means "go up from root" in the direction of the nearest guy ancestor_node = (t&child_node.name).up elif mean_dist <= percent: percent_dist = t.get_distance(LCA) + abs(percent-mean_dist)*(LCA_to_leaf_dist)/(1-mean_dist) child_node = (t&leaf_name) ancestor_node = (t&child_node.name).up else: percent_dist = t.get_distance(LCA) - abs(percent-mean_dist)*(t.get_distance(LCA))/(mean_dist) child_node = (t&leaf_name) ancestor_node = (t&child_node.name).up while t.get_distance(t.name, ancestor_node) > percent_dist: child_node = ancestor_node ancestor_node = (t&child_node.name).up insert_node(t, leaf_name+"_"+str(percent), child_node.name, percent_dist-t.get_distance(t.name, ancestor_node)) #Set outgroup if outgroup in names: t.set_outgroup(t&outgroup) #I will need to check that this outgroup is actually one of the names... else: print("WARNING: the chosen outgroup " + outgroup + " is not in the given taxonomy: ") print(names) print("Proceeding without setting an outgroup. This may cause results to be uninterpretable.") #Insert hypothetical nodes hyp_node_names = dict() cutoffs = [.9,.8,.7,.6,.5,.4,.3,.2,.1] cutoffs = [-.5141*(val**3)+1.0932*(val**2)+0.3824*val for val in cutoffs] for i in range(len(org_names)): xi = x[i:len(x):len(org_names)] for j in range(1,len(cutoffs)+1): if xi[j]>0: insert_hyp_node(t, org_names[i], cutoffs[j-1],ckm_ave_train_dist, org_names) hyp_node_names[org_names[i]+"_"+str(cutoffs[j-1])] = [org_names[i], cutoffs[j-1], j-1] #in case there are "_" in the file names size_factor=250 font_size=55 #Now put the bubbles on the nodes def layout(node): node_style = NodeStyle() node_style["hz_line_width"] = 10 node_style["vt_line_width"] = 10 node.set_style(node_style) #print(node) if node.is_leaf(): if node.name in org_names: #make reconstructed bubble size = x[org_names.index(node.name)] F = CircleFace(radius=size_factor*math.sqrt(size), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F,node, 0, position="branch-right") #Denote that this was a training organism nameFace = AttrFace("name", fsize=font_size, fgcolor='black') faces.add_face_to_node(nameFace, node, 0, position="branch-right") elif node.name in hyp_node_names: #Otherwise it's a hypothetical node, just use recon x node_base_name = hyp_node_names[node.name][0] percent = hyp_node_names[node.name][1] if node_base_name in org_names: idx = hyp_node_names[node.name][2] size = x[org_names.index(node_base_name)+(idx+1)*len(org_names)] F = CircleFace(radius=size_factor*math.sqrt(size), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F,node, 0, position="branch-right") #This is if I want the names of the hypothetical nodes to be printed as well #nameFace = AttrFace("name", fsize=font_size, fgcolor='black') #faces.add_face_to_node(nameFace, node, 0, position="branch-right") else: size=0 else: size=0 ts = TreeStyle() ts.layout_fn = layout ts.mode = "r" #ts.mode = "c" ts.scale = 2*1000 ts.show_leaf_name = False ts.min_leaf_separation = 50 F = CircleFace(radius=.87*size_factor, color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 ts.legend.add_face(F,0) ts.legend.add_face(TextFace(" Inferred relative abundance",fsize=1.5*font_size,fgcolor="Blue"),1) ts.legend.add_face(TextFace(" Total absolute abundance depicted " + str(sum_x)[0:8], fsize=1.5*font_size,fgcolor="Black"),1) ts.legend_position=4 #t.show(tree_style=ts) t.render(outfile, w=550, units="mm", tree_style=ts) #Redner the XML file project = Phyloxml() phylo = phyloxml.PhyloxmlTree(newick=t.write(format=0, features=[])) project.add_phylogeny(phylo) project.export(open(outfilexml,'w'))
from ete2 import Phyloxml project = Phyloxml() project.build_from_file("testTree.xml") # Each tree contains the same methods as a PhyloTree object for tree in project.get_phylogeny(): print tree # you can even use rendering options tree.show() # PhyloXML features are stored in the phyloxml_clade attribute for node in tree: print "Node name:", node.name for seq in node.phyloxml_clade.get_sequence(): for domain in seq.domain_architecture.get_domain(): domain_data = [domain.valueOf_, domain.get_from(), domain.get_to()] print " Domain:", '\t'.join(map(str, domain_data))
from ete2 import Phyloxml, phyloxml import random project = Phyloxml() # Creates a random tree phylo = phyloxml.PhyloxmlTree() phylo.populate(5, random_branches=True) phylo.phyloxml_phylogeny.set_name("test_tree") # Add the tree to the phyloxml project project.add_phylogeny(phylo) print project.get_phylogeny()[0] # /-iajom # /---| # | \-wiszh #----| # | /-xrygw # \---| # | /-gjlwx # \---| # \-ijvnk # Trees can be operated as normal ETE trees phylo.show() # Export the project as phyloXML format project.export() # <phy:Phyloxml xmlns:phy="http://www.phyloxml.org/1.10/phyloxml.xsd">
def MakePlot(x, org_names, ckm30, ckm50, outgroup, outfile, outfilexml, sum_x): #Make sure names are unique names = org_names for name in names: if names.count(name) > 1: temp_name = name i = 1 for dummy in range( 0, names.count(name) - 1 ): #Don't change the last one, just to make sure we don't conflict with the outgroup names[names.index(temp_name)] = temp_name + "_" + str(i) i = i + 1 #Normalize the x vector x = map(lambda y: y / sum(x), x) ckm30_norm = np.multiply(ckm30, 1 / np.diag(ckm30)) ckm50_norm = np.multiply(ckm50, 1 / np.diag(ckm50)) num_rows = ckm30_norm.shape[0] num_cols = ckm30_norm.shape[1] matrix = list() for i in range(num_rows): matrix.append([ .5 * (1 - .5 * ckm30_norm[i, j] - .5 * ckm30_norm[j, i]) + .5 * (1 - .5 * ckm50_norm[i, j] - .5 * ckm50_norm[j, i]) for j in range(i + 1) ]) #Make the list of distances (ave of the two ckm matrices) ckm_ave_train = .5 * ckm30_norm + .5 * ckm50_norm ckm_ave_train_dist = dict() for i in range(len(org_names)): ckm_ave_train_dist[org_names[i]] = [ .5 * ckm_ave_train[i, j] + .5 * ckm_ave_train[j, i] for j in range(len(org_names)) ] #Construct the tree. Note I could use RapidNJ here, but a few tests have shown that the trees that RapidNJ creates are rubbish. dm = _DistanceMatrix(names, matrix) constructor = DistanceTreeConstructor() tree = constructor.nj(dm) t = Tree(tree.format('newick'), format=1) #tree.format('newick') #Phylo.draw_ascii(tree) #Now I will put internal nodes in a certain phylogenetic distance between the root and a given node. #Function to insert a node at a given distance def insert_node(t, name_to_insert, insert_above, dist_along): insert_at_node = t.search_nodes(name=insert_above)[0] parent = (t & insert_above).up orig_branch_length = t.get_distance(insert_at_node, parent) if orig_branch_length < dist_along: raise ValueError( "error: dist_along larger than orig_branch_length in PlotPackage.py" ) removed_node = insert_at_node.detach() removed_node.dist = orig_branch_length - dist_along added_node = parent.add_child(name=name_to_insert, dist=dist_along) added_node.add_child(removed_node) #Function to insert a node some % along a branch, taking into account the ckm distances and nodes already created in the NJ tree (and what distance their descendants are from everyone else) def insert_hyp_node(t, leaf_name, percent, ckm_ave_train_dist, org_names): dists = map(lambda y: abs(y - percent), ckm_ave_train_dist[leaf_name]) nearby_indicies = list() #Add all the organisms that are within 0.05 of the given percent # for i in range(len(dists)): # if dists[i]<=.05: # nearby_indicies.append(i) nearby_names = list() #If there are no nearby indicies, add the closest organism to the given percent if nearby_indicies == []: nearby_names.append(org_names[dists.index(min(dists))]) else: for i in range(len(nearby_indicies)): nearby_names.append(org_names[i]) mean_dist = np.mean( map(lambda y: ckm_ave_train_dist[leaf_name][org_names.index(y)], nearby_names)) nearby_names.append(leaf_name) LCA = t.get_common_ancestor(nearby_names) LCA_to_leaf_dist = t.get_distance(LCA, leaf_name) #divide the dist to the right/left of the LCA node by the number of percentage points in there if LCA.name == t.name: percent_dist = percent * LCA_to_leaf_dist if mean_dist <= percent: child_node = (t & leaf_name) else: child_node = ( t & nearby_names[0] ) #This means "go up from root" in the direction of the nearest guy ancestor_node = (t & child_node.name).up elif mean_dist <= percent: percent_dist = t.get_distance(LCA) + abs(percent - mean_dist) * ( LCA_to_leaf_dist) / (1 - mean_dist) child_node = (t & leaf_name) ancestor_node = (t & child_node.name).up else: percent_dist = t.get_distance(LCA) - abs(percent - mean_dist) * ( t.get_distance(LCA)) / (mean_dist) child_node = (t & leaf_name) ancestor_node = (t & child_node.name).up while t.get_distance(t.name, ancestor_node) > percent_dist: child_node = ancestor_node ancestor_node = (t & child_node.name).up insert_node(t, leaf_name + "_" + str(percent), child_node.name, percent_dist - t.get_distance(t.name, ancestor_node)) #Set outgroup if outgroup in names: t.set_outgroup( t & outgroup ) #I will need to check that this outgroup is actually one of the names... else: print("WARNING: the chosen outgroup " + outgroup + " is not in the given taxonomy: ") print(names) print( "Proceeding without setting an outgroup. This may cause results to be uninterpretable." ) #Insert hypothetical nodes hyp_node_names = dict() cutoffs = [.9, .8, .7, .6, .5, .4, .3, .2, .1] cutoffs = [ -.5141 * (val**3) + 1.0932 * (val**2) + 0.3824 * val for val in cutoffs ] for i in range(len(org_names)): xi = x[i:len(x):len(org_names)] for j in range(1, len(cutoffs) + 1): if xi[j] > 0: insert_hyp_node(t, org_names[i], cutoffs[j - 1], ckm_ave_train_dist, org_names) hyp_node_names[org_names[i] + "_" + str(cutoffs[j - 1])] = [ org_names[i], cutoffs[j - 1], j - 1 ] #in case there are "_" in the file names size_factor = 250 font_size = 55 #Now put the bubbles on the nodes def layout(node): node_style = NodeStyle() node_style["hz_line_width"] = 10 node_style["vt_line_width"] = 10 node.set_style(node_style) #print(node) if node.is_leaf(): if node.name in org_names: #make reconstructed bubble size = x[org_names.index(node.name)] F = CircleFace(radius=size_factor * math.sqrt(size), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F, node, 0, position="branch-right") #Denote that this was a training organism nameFace = AttrFace("name", fsize=font_size, fgcolor='black') faces.add_face_to_node(nameFace, node, 0, position="branch-right") elif node.name in hyp_node_names: #Otherwise it's a hypothetical node, just use recon x node_base_name = hyp_node_names[node.name][0] percent = hyp_node_names[node.name][1] if node_base_name in org_names: idx = hyp_node_names[node.name][2] size = x[org_names.index(node_base_name) + (idx + 1) * len(org_names)] F = CircleFace(radius=size_factor * math.sqrt(size), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F, node, 0, position="branch-right") #This is if I want the names of the hypothetical nodes to be printed as well #nameFace = AttrFace("name", fsize=font_size, fgcolor='black') #faces.add_face_to_node(nameFace, node, 0, position="branch-right") else: size = 0 else: size = 0 ts = TreeStyle() ts.layout_fn = layout ts.mode = "r" #ts.mode = "c" ts.scale = 2 * 1000 ts.show_leaf_name = False ts.min_leaf_separation = 50 F = CircleFace(radius=.87 * size_factor, color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 ts.legend.add_face(F, 0) ts.legend.add_face( TextFace(" Inferred relative abundance", fsize=1.5 * font_size, fgcolor="Blue"), 1) ts.legend.add_face( TextFace(" Total absolute abundance depicted " + str(sum_x)[0:8], fsize=1.5 * font_size, fgcolor="Black"), 1) ts.legend_position = 4 #t.show(tree_style=ts) t.render(outfile, w=550, units="mm", tree_style=ts) #Redner the XML file project = Phyloxml() phylo = phyloxml.PhyloxmlTree(newick=t.write(format=0, features=[])) project.add_phylogeny(phylo) project.export(open(outfilexml, 'w'))
def main(argv): input_file='' title='Title' label_internal_nodes = False label_leaves = False out_file='' width=750 out_file_xml='' try: opts, args = getopt.getopt(argv,"h:i:t:lno:w:x:",["Help=","InputFile=","Title=","LabelLeaves=", "LabelInternalNodes=","OutFile=","Width=","OutFileXML="]) except getopt.GetoptError: print 'Unknown option, call using: ./PlotTree.py -i <InputCAMIFile> -t <Title> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -o <OutFile.png> -x <Outfile.xml> -w <Width>' sys.exit(2) for opt, arg in opts: if opt == '-h': print './PlotTree.py -i <InputCAMIFile> -t <Title> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -o <OutFile> -x <OutFile.xml> -w <Width>' sys.exit(2) elif opt in ("-i", "--InputFile"): input_file = arg elif opt in ("-t", "--Title"): title = arg elif opt in ("-l", "--LabelLeaves"): label_leaves = True elif opt in ("-n","--LabelInternalNodes"): label_internal_nodes = True elif opt in ("-o", "--OutFile"): out_file = arg elif opt in ("-w", "--Width"): width = int(arg) elif opt in ("-x", "--OutFileXML"): out_file_xml = arg schema_names = COLOR_SCHEMES.keys() #Read the common kmer profile ckm_tax_paths = [] ckm_name_to_perc = dict() fid = open(input_file,'r') file = fid.readlines() fid.close() #Put placeholders in for missing names like: "||" -> "|NA1|" file_noblank = list() i=0 for line in file: while "||" in line: line = line.replace("||","|NONAME|",1) i = i+1 file_noblank.append(line) #Get the names and weights for line in file_noblank: if line[0]!='#' and line[0]!='@' and line[0]!='\n': #Don't parse comments or blank lines temp = line.split()[3] #Get the names ckm_tax_paths.append(temp) ckm_name_to_perc[temp.split("|")[-1]] = line.split()[-1] #Get the weights #Create the tree t=Tree() names_to_nodes = dict() for i in range(0,len(ckm_tax_paths)): split_tax_path = ckm_tax_paths[i].split("|") if len(split_tax_path)==1: #If len==1, then it's a superkingdom names_to_nodes[split_tax_path[0]] = t.add_child(name=split_tax_path[0]) #connect directly to tree else: if split_tax_path[-2] in names_to_nodes: #If the parent is already in the tree, add to tree names_to_nodes[split_tax_path[-1]] = names_to_nodes[split_tax_path[-2]].add_child(name=split_tax_path[-1]) else: #Otherwise iterate up until we have something that is in the tree j=2 while split_tax_path[-j]=="NONAME": j = j + 1 #This skips over the NONAMES names_to_nodes[split_tax_path[-1]] = names_to_nodes[split_tax_path[-j]].add_child(name=split_tax_path[-1]) #Show the tree #print t.get_ascii(show_internal=True) #scheme = random.sample(schema_names, 1)[0] #'set2' is nice, scheme = 'set2' def layout(node): if node.name in ckm_name_to_perc: ckm_perc = float(ckm_name_to_perc[node.name]) else: ckm_perc = 0 F = CircleFace(radius=3.14*math.sqrt(ckm_perc), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F,node, 0, position="branch-right") if label_internal_nodes: faces.add_face_to_node(TextFace(node.name, fsize=7),node, 0, position="branch-top") ts = TreeStyle() ts.layout_fn = layout ts.mode = "r" ts.show_leaf_name = label_leaves ts.min_leaf_separation = 50 ts.title.add_face(TextFace(title, fsize=20), column=0) #Export the tree to a png image t.render(out_file, w=width, units="mm", tree_style=ts) #Export the xml file project = Phyloxml() phylo = phyloxml.PhyloxmlTree(newick=t.write(format=0, features=[])) phylo.phyloxml_phylogeny.set_name(title) project.add_phylogeny(phylo) project.export(open(out_file_xml,'w'))
def main(argv): input_file = '' title = 'Title' label_internal_nodes = False label_leaves = False out_file = '' width = 750 out_file_xml = '' plot_rectangular = False common_kmer_data_path = '' taxonomic_names_on_leaves = False try: opts, args = getopt.getopt(argv, "h:i:lnrto:w:x:D:", [ "Help=", "InputCommonKmerXFile=", "LabelLeaves=", "LabelInternalNodes=", "Rectangular=", "TaxonomicNamesOnLeaves=", "OutFile=", "Width=", "OutFileXML=", "CommonKmerDataPath=" ]) except getopt.GetoptError: print 'Unknown option, call using: ./PlotNJTree.py -i <InputCommonKmerXFile> -D <CommonKmerDataPath> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -r <RectangularPlotFlag> -t <TaxonomicNamesOnLeavesFlag> -o <OutFile.png> -x <Outfile.xml> -w <Width>' sys.exit(2) for opt, arg in opts: if opt == '-h': print './PlotNJTree.py -i <InputCommonKmerXFile> -D <CommonKmerDataPath> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -r <RectangularPlotFlag> -t <TaxonomicNamesOnLeavesFlag> -o <OutFile.png> -x <Outfile.xml> -w <Width>' sys.exit(2) elif opt in ("-i", "--InputCommonKmerXFile"): input_file = arg elif opt in ("-l", "--LabelLeaves"): label_leaves = True elif opt in ("-n", "--LabelInternalNodes"): label_internal_nodes = True elif opt in ("-o", "--OutFile"): out_file = arg elif opt in ("-w", "--Width"): width = int(arg) elif opt in ("-x", "--OutFileXML"): out_file_xml = arg elif opt in ("-D", "--CommonKmerDataPath"): common_kmer_data_path = arg elif opt in ("-r", "--Rectangular"): plot_rectangular = True elif opt in ("-t", "--TaxonomicNamesOnLeaves"): taxonomic_names_on_leaves = True #Read in the x vector fid = open(input_file, 'r') x = map(lambda y: float(y), fid.readlines()) fid.close() #Normalize the x vector #x = map(lambda y: y/sum(x),x) #Read in the taxonomy taxonomy = list() fid = open(os.path.join(common_kmer_data_path, "Taxonomy.txt"), 'r') for line in fid: taxonomy.append( '_'.join(line.split()[0].split("_")[1:]) ) #Just take the first line of the taxonomy (erasing the taxID) fid.close() #Read in the basis for the ckm matrices x_file_names = list() fid = open(os.path.join(common_kmer_data_path, "FileNames.txt"), 'r') for line in fid: x_file_names.append(os.path.basename(line.strip())) fid.close() #Read in the common kmer matrix f = h5py.File( os.path.join(common_kmer_data_path, 'CommonKmerMatrix-30mers.h5'), 'r') ckm30 = np.array(f['common_kmers'], dtype=np.float64) f.close() f = h5py.File( os.path.join(common_kmer_data_path, 'CommonKmerMatrix-50mers.h5'), 'r') ckm50 = np.array(f['common_kmers'], dtype=np.float64) f.close() ckm30_norm = np.multiply(ckm30, 1 / np.diag(ckm30)) ckm50_norm = np.multiply(ckm50, 1 / np.diag(ckm50)) num_rows = ckm30_norm.shape[0] num_cols = ckm30_norm.shape[1] names = x_file_names matrix = list() for i in range(num_rows): matrix.append([ .5 * (1 - .5 * ckm30_norm[i, j] - .5 * ckm30_norm[j, i]) + .5 * (1 - .5 * ckm50_norm[i, j] - .5 * ckm50_norm[j, i]) for j in range(i + 1) ]) #Construct the tree. Note I could use RapidNJ here, but a few tests have shown that the trees that RapidNJ creates are rubbish. dm = _DistanceMatrix(names, matrix) constructor = DistanceTreeConstructor() tree = constructor.nj(dm) t = Tree(tree.format('newick'), format=1) #tree.format('newick') #Phylo.draw_ascii(tree) #Now I will put internal nodes in a certain phylogenetic distance between the root and a given node. #Function to insert a node at a given distance def insert_node(t, name_to_insert, insert_above, dist_along): insert_at_node = t.search_nodes(name=insert_above)[0] parent = (t & insert_above).up orig_branch_length = t.get_distance(insert_at_node, parent) if orig_branch_length < dist_along: raise ValueError( "error: dist_along larger than orig_branch_length") removed_node = insert_at_node.detach() removed_node.dist = orig_branch_length - dist_along added_node = parent.add_child(name=name_to_insert, dist=dist_along) added_node.add_child(removed_node) #Function to insert a node some % along a branch def insert_hyp_node(t, leaf_name, percent): total_dist = t.get_distance(t.name, leaf_name) percent_dist = percent * total_dist child_node = (t & leaf_name) ancestor_node = (t & child_node.name).up while t.get_distance(t.name, ancestor_node) > percent_dist: child_node = ancestor_node ancestor_node = (t & child_node.name).up insert_node(t, leaf_name + "_" + str(percent), child_node.name, percent_dist - t.get_distance(t.name, ancestor_node)) #Insert hypothetical nodes hyp_node_names = dict() cutoffs = [.9, .8, .7, .6, .5, .4, .3, .2, .1] cutoffs = map(lambda y: y**1.5, cutoffs) for i in range(len(x_file_names)): xi = x[i:len(x):len(x_file_names)] for j in range(1, len(cutoffs) + 1): if xi[j] > 0: insert_hyp_node(t, x_file_names[i], cutoffs[j - 1]) hyp_node_names[x_file_names[i] + "_" + str(cutoffs[j - 1])] = [ x_file_names[i], cutoffs[j - 1], j - 1 ] #in case there are "_" in the file names #insert_hyp_node(t, x_file_names[i],.5/t.get_distance(t.name,t&x_file_names[i])*cutoffs[j]) #Now put the bubbles on the nodes def layout(node): #print(node) if node.is_leaf(): if node.name in x_file_names: #make reconstructed bubble size = x[x_file_names.index(node.name)] F = CircleFace(radius=500 * math.sqrt(size), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F, node, 0, position="branch-right") if taxonomic_names_on_leaves: nameFace = AttrFace( "name", fsize=25, fgcolor='black', text_suffix="_" + taxonomy[x_file_names.index(node.name)]) faces.add_face_to_node(nameFace, node, 0, position="branch-right") else: nameFace = AttrFace("name", fsize=25, fgcolor='black') faces.add_face_to_node(nameFace, node, 0, position="branch-right") elif node.name in hyp_node_names: #Otherwise it's a hypothetical node, just use recon x node_base_name = hyp_node_names[node.name][0] percent = hyp_node_names[node.name][1] if node_base_name in x_file_names: idx = hyp_node_names[node.name][2] size = x[x_file_names.index(node_base_name) + (idx + 1) * len(x_file_names)] F = CircleFace(radius=500 * math.sqrt(size), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F, node, 0, position="branch-right") #print node #print size else: size = 0 else: size = 0 #print(size) ts = TreeStyle() ts.layout_fn = layout if plot_rectangular: ts.mode = "r" else: ts.mode = "c" ts.show_leaf_name = False ts.min_leaf_separation = 50 #Export the tree to a png image t.render(out_file, w=width, units="mm", tree_style=ts) #Export the xml file project = Phyloxml() phylo = phyloxml.PhyloxmlTree(newick=t.write(format=0, features=[])) phylo.phyloxml_phylogeny.set_name(title) project.add_phylogeny(phylo) project.export(open(out_file_xml, 'w'))