def draw_tree(recipe_inst): ''' from ete3 import Tree recipe_inst = [{'word': 'heated', 'ingredient':['rice','banana','cookie','dishes']}, {'word': 'boil', 'ingredient':['apple','banana','cookie','dish']}, {'word': 'rince', 'ingredient':['apple','banana','cookie','dish']} ] ''' # sorting will not improve the tree edit distance # if sort: # recipe_inst = [{'word':line['word'], 'ingredient': sorted(line['ingredient'])} for line in recipe_inst] output = Tree() temp = output for i in recipe_inst: t = Tree(name=i['word']) t.add_feature('type', 'action') if not i['ingredient']: pass else: for j in i['ingredient']: a = t.get_tree_root().add_child(name=j) a.add_feature('type', 'ingredient') temp = temp.add_child(t) print(output.get_ascii(show_internal=True)) return output
def species_tree(self, taxid): sn = self.taxa(taxid) tree = Tree( name=sn.taxid) # PhyloTree? Has annotate_ncbi_taxa() method. tree.add_feature(spname=sn.spname, rank=sn.rank) tree.children.extend(self.species_tree(taxid) for taxid in sn.children) return tree
def createNode(): """Creates a domain node with required fields precreated""" node = Tree() node.name = 'placeholder' node.add_feature('pos', 0) node.add_feature('event', 'SPECIATION') node.dist = 0 return node
def ASR_parser(args): try: import cPickle as pickle except: import pickle from GCutils import CollapsedForest, CollapsedTree, hamming_distance try: tree = Tree(args.tree, format=1) except Exception as e: print(e) raise TreeFileParsingError('Could not read the input tree. Is this really newick format?') counts = {l.split(',')[0]:int(l.split(',')[1]) for l in open(args.counts)} tree.add_feature('frequency', 0) # Placeholder will be deleted when rerooting tree.add_feature('sequence', 'DUMMY') # Placeholder will be deleted when rerooting tree = map_asr_to_tree(args.asr_seq, args.leaf_seq, tree, args.naive, counts) # Reroot to make the naive sequence the real root instead of just an outgroup: tree = reroot_tree(tree, pattern=args.naive) # Recompute branch lengths as hamming distances: tree.dist = 0 # No branch above root for node in tree.iter_descendants(): node.dist = hamming_distance(node.sequence, node.up.sequence) iqtree_tree = CollapsedTree(tree=tree, name=args.name) # Add colors: if args.colormap is not None: with open(args.colormap, 'rb') as fh: colormap = pickle.load(fh) with open(args.idmap, 'rb') as fh: id_map = pickle.load(fh) # Reverse the id_map: id_map = {cs:seq_id for seq_id, cell_ids in id_map.items() for cs in cell_ids} # Expand the colormap and map to sequence ids: colormap_seqid = dict() for key, color in colormap.items(): if isinstance(key, str) and key in id_map: colormap_seqid[id_map[key]] = color else: for cell_id in key: if cell_id in id_map: colormap_seqid[id_map[cell_id]] = color colormap = colormap_seqid else: colormap = None iqtree_tree.render(args.outbase + '.svg', colormap=colormap) iqtree_forest = CollapsedForest(forest=[iqtree_tree], name=args.name) # Dump tree as newick: iqtree_forest.write_random_tree(args.outbase+'.tree') print('number of trees with integer branch lengths:', iqtree_forest.n_trees) with open(args.outbase + '.p', 'wb') as f: pickle.dump(iqtree_forest, f) print('Done parsing IQ-TREE tree')
def ASR_parser(args): try: import cPickle as pickle except: import pickle from gctree import CollapsedForest, CollapsedTree, hamming_distance try: tree = Tree(args.tree) except: raise TreeFileParsingError( 'Could not read the input tree. Is this really newick format?') counts = {l.split(',')[0]: int(l.split(',')[1]) for l in open(args.counts)} tree.add_feature('frequency', 0) # Placeholder will be deleted when rerooting tree.add_feature('sequence', 'DUMMY') # Placeholder will be deleted when rerooting tree = map_asr_to_tree(args.asr_seq, tree, args.naive, counts) # Reroot to make the naive sequence the real root instead of just an outgroup: tree = reroot_tree(tree) # Recompute branch lengths as hamming distances: tree.dist = 0 # No branch above root for node in tree.iter_descendants(): node.dist = hamming_distance(node.sequence, node.up.sequence) igphyml_tree = CollapsedTree(tree=tree) igphyml_tree.render(args.outbase + '.svg') igphyml_forest = CollapsedForest(forest=[igphyml_tree]) print('number of trees with integer branch lengths:', igphyml_forest.n_trees) # check for unifurcations at root unifurcations = sum( tree.tree.frequency == 0 and len(tree.tree.children) == 1 for tree in igphyml_forest.forest) if unifurcations: print( 'WARNING: {} trees exhibit unifurcation from root, which is not possible under current model. Such nodes will be ommitted from likelihood calculation' .format(unifurcations)) with open(args.outbase + '.p', 'wb') as f: pickle.dump(igphyml_forest, f) print('Done parsing IgPhyML tree')
def generateIQTree(): sd = 1 #startingDomains hostTree = createRandomTopology(1, 1, lambda x: x) guestTree, nodeMap = buildGuestTree(hostTree, s2, expfunc, .2, gaussNoise, sd) rootSequence = grs(sd) evolveAlongTree(hostTree, guestTree, nodeMap, rootSequence, hmmfile, emissionProbs, transmat) names, seqs = [], [] for node in hostTree: if HMMER: seqs += findDomains(node.sequence, hmmfile)[2] else: seqs += findMotifs(node.sequence, hmmfile)[2] gnodes = findLeaves(nodeMap[node]) n = [(leaf.position, leaf.name) for leaf in gnodes if leaf.event != 'LOSS'] n.sort() names += [name[1] for name in n] guestTree = prune(guestTree, names) outgroup = Tree() outgroup.up = guestTree guestTree.children.append(outgroup) outgroup.name = 'Outgroup' outseq = evolveSequence(rootSequence, .1, 2, emissionProbs, hmmfile, transmat) if HMMER: outseq = findDomains(outseq, hmmfile)[2][0] else: outseq = findMotifs(outseq, hmmfile)[2][0] outgroup.add_feature('sequence', outseq) seqs.insert(0, outseq) names.insert(0, 'Outgroup') guestTree.write(outfile = 'testtree.nwk') hostTree.write(outfile='hosttree.nwk') addRandomTrees('testtree.nwk') writeFasta(names, seqs, 'testfasta.fa', False) mlTree('testfasta.fa', 'testtree.nwk', True) iqtree = Tree('testfasta.fa.treefile') iqtree.set_outgroup(iqtree&('Outgroup')) return hostTree, guestTree, iqtree
def build_tree(sequences, parents, counts=None, naive='naive'): # build an ete tree # first a dictionary of disconnected nodes nodes = {} for name in sequences: node = Tree() node.name = name node.add_feature('nuc_seq', sequences[node.name]) node.add_feature('aa_seq', local_translate(sequences[node.name])) if counts is not None and node.name in counts: node.add_feature('frequency', counts[node.name]) else: node.add_feature('frequency', 0) nodes[name] = node for name in sequences: if name in parents: nodes[parents[name]].add_child(nodes[name]) else: tree = nodes[name] # Reroot on naive: if naive is not None: naive_id = [n for n in nodes if naive in n][0] assert len(nodes[naive_id].children) == 0 naive_parent = nodes[naive_id].up naive_parent.remove_child(nodes[naive_id]) nodes[naive_id].add_child(naive_parent) # remove possible unecessary unifurcation after rerooting if len(naive_parent.children) == 1: naive_parent.delete(prevent_nondicotomic=False) naive_parent.children[0].dist = hamming_distance( naive_parent.children[0].nuc_seq, nodes[naive_id].nuc_seq) tree = nodes[naive_id] # make random choices for ambiguous bases tree = disambiguate(tree) # compute branch lengths tree.dist = 0 # no branch above root for node in tree.iter_descendants(): node.dist = hamming_distance(node.nuc_seq, node.up.nuc_seq) return tree
def build_tree(sequences: Dict[str, str], parents: Dict[str, str], counts=None, root="root"): """Build an ete tree from sequences and parents dictionaries. Args: sequences: a dictionary mapping names to sequences parents: a dictionary mapping parent node names to child node names counts: a dictionary mapping node names to observed abundances. This argument is no longer used in the main gctree inference pipeline (counts are assigned in DAG) but remains for compatibility. root: the name of the root node """ # build an ete tree # first a dictionary of disconnected nodes nodes = {} for name in sequences: node = Tree() node.name = name node.add_feature("sequence", sequences[node.name]) if counts is not None: if node.name in counts: node.add_feature("abundance", counts[node.name]) else: node.add_feature("abundance", 0) nodes[name] = node for name in sequences: if name in parents: nodes[parents[name]].add_child(nodes[name]) else: tree = nodes[name] # reroot on root if root is not None: try: root_id = [node for node in nodes if root in node][0] except IndexError: raise RuntimeError( f"Provided root id '{root}' not found in dnapars tree.") assert len(nodes[root_id].children) == 0 root_parent = nodes[root_id].up root_parent.remove_child(nodes[root_id]) nodes[root_id].add_child(root_parent) # remove possible unecessary unifurcation after rerooting if len(root_parent.children) == 1: root_parent.delete(prevent_nondicotomic=False) root_parent.children[0].dist = gctree.utils.hamming_distance( root_parent.children[0].sequence, nodes[root_id].sequence) tree = nodes[root_id] return tree
def build_tree(sequences, parents, counts=None, naive='naive'): # build an ete tree # first a dictionary of disconnected nodes nodes = {} for name in sequences: node = Tree() node.name = name node.add_feature('sequence', sequences[node.name]) ### Removed by KD because it is replaced by a count file # if '_' in node.name: # node.add_feature('frequency', int(node.name.split('_')[-1])) # node.name = '_'.join(node.name.split('_')[:-1]) # else: # node.add_feature('frequency', 0) if counts is not None: if node.name in counts: node.add_feature('frequency', counts[node.name]) else: node.add_feature('frequency', 0) nodes[name] = node for name in sequences: if name in parents: nodes[parents[name]].add_child(nodes[name]) else: tree = nodes[name] # reroot on naive if naive is not None: naive_id = [node for node in nodes if naive in node][0] assert len(nodes[naive_id].children) == 0 assert nodes[naive_id] in tree.children tree.remove_child(nodes[naive_id]) nodes[naive_id].add_child(tree) tree = nodes[naive_id] # make random choices for ambiguous bases tree = disambiguate(tree) # compute branch lengths tree.dist = 0 # no branch above root for node in tree.iter_descendants(): node.dist = gctree.hamming_distance(node.sequence, node.up.sequence) return tree
def parse_union_tree(history_1, history_2, base_tree_path, debug=False): base_tree = Tree(base_tree_path, format=1) # add for debugging base_tree.get_tree_root().name = "_baseInternal_30" united_tree = Tree() united_tree.dist = 0 # initialize distance to 0 united_tree.get_tree_root().name = history_1.get_tree_root( ).name # set the name of the root united_tree.add_feature("history_1_label", history_1.get_tree_root().label) united_tree.add_feature("history_2_label", history_2.get_tree_root().label) union_nodes_number = 0 for original_node in base_tree.traverse( "preorder" ): # traverse the tree in pre-order to assure that for any visited node, its parent from the base branch is already in the united tree original_parent = original_node.up if original_parent != None: # will be none only in the case the original node is the root if debug: print("handled branch: (", original_node.name, ",", original_parent.name, ")") curr_union_parent = united_tree.search_nodes( name=original_parent.name)[0] hist_1_done = True hist_1_curr_child = None hist_1_parent = history_1.search_nodes(name=original_parent.name)[ 0] # need to check names consistency across the 3 trees for child in hist_1_parent.children: if len(base_tree.search_nodes(name=child.name)) == 0 and len( child.search_nodes(name=original_node.name) ) > 0: # if the child is a root in a tree that holds the original child node, then this child must be on the branch of interest hist_1_curr_child = child hist_1_done = False break if hist_1_done: hist_1_curr_child = history_1.search_nodes( name=original_node.name)[0] hist_1_current_label = hist_1_curr_child.label hist_2_done = True hist_2_curr_child = None hist_2_parent = history_2.search_nodes(name=original_parent.name)[ 0] # need to check names consistency across the 3 trees for child in hist_2_parent.children: if len(base_tree.search_nodes(name=child.name)) == 0 and len( child.search_nodes(name=original_node.name) ) > 0: # if the child is a root in a tree that holds the original child node, then this child must be on the branch of interest hist_2_curr_child = child hist_2_done = False break if hist_2_done: hist_2_curr_child = history_2.search_nodes( name=original_node.name)[0] hist_2_current_label = hist_2_curr_child.label while not hist_1_done or not hist_2_done: hist_1_dist = float("inf") hist_2_dist = float("inf") if not hist_1_done: # if there is a node closer to the original node in history 1 -> add it to the united tree first hist_1_dist = hist_1_curr_child.get_distance( original_parent.name) - curr_union_parent.get_distance( original_parent.name) if not hist_2_done: hist_2_dist = hist_2_curr_child.get_distance( original_parent.name) - curr_union_parent.get_distance( original_parent.name) if debug: if not hist_1_done: print("history 1 has current child of ", original_parent.name, ": ", hist_1_curr_child.name, " with label: ", hist_1_current_label, " and distance from parent is: ", hist_1_dist) if not hist_2_done: print("history 2 has current child of ", original_parent.name, ": ", hist_2_curr_child.name, " with label: ", hist_2_current_label, " and distance from parent is: ", hist_2_dist) # first, check if now the two current children have the same name, and if this name is in the base tree - exit if hist_1_curr_child.name == hist_2_curr_child.name and len( base_tree.search_nodes( name=hist_1_curr_child.name)) > 0: break # else, at least one of the histories has more than one step to go before reaching the bottom of the branch if hist_1_dist < hist_2_dist: # add the node from history 1 and travel down to the next node in history 1 if debug: print( "adding child from history 1 which precedes to the one from history 2" ) print("the label of the added node in history 1 is: ", hist_1_curr_child.label) print( "the label of the added node in histroy 2 remains like papa: ", hist_2_current_label) curr_union_parent = curr_union_parent.add_child( child=None, name="internal_" + str(union_nodes_number), dist=hist_1_dist, support=None) curr_union_parent.add_feature("history_1_label", hist_1_curr_child.label) curr_union_parent.add_feature("history_2_label", hist_2_current_label) hist_1_parent = hist_1_curr_child if len(hist_1_parent.children) == 1: hist_1_curr_child = hist_1_parent.children[0] else: hist_1_done = True if debug: print("united tree is now: \n", united_tree) if hist_1_done: print( "history 1 on the handled branch is complete") else: print( "history 1 on the handled branch isn't complete yet" ) else: # add the node from history 2 and travel down to the next node in history 2 if debug: print( "adding child from history 2 which precedes to the one from history 1" ) print("the label of the added node in history 2 is: ", hist_2_curr_child.label) print( "the label of the added node in history 1 remains like papa: ", hist_1_current_label) curr_union_parent = curr_union_parent.add_child( child=None, name="internal_" + str(union_nodes_number), dist=hist_2_dist) # added as a new branch curr_union_parent.add_feature("history_1_label", hist_1_current_label) curr_union_parent.add_feature("history_2_label", hist_2_curr_child.label) hist_2_parent = hist_2_curr_child if len(hist_2_parent.children) == 1: hist_2_curr_child = hist_2_parent.children[0] else: hist_2_done = True if debug: print("united tree is now: \n", united_tree) if hist_2_done: print( "history 2 on the handled branch is complete") else: print( "history 2 on the handled branch isn't complete yet" ) union_nodes_number += 1 # now add the original node as the child of the current parent original_dist = original_node.dist residual = original_dist - curr_union_parent.get_distance( united_tree.search_nodes(name=original_parent.name)[0]) curr_union_parent = curr_union_parent.add_child( child=None, name=original_node.name, dist=residual) curr_union_parent.add_feature( "history_1_label", history_1.search_nodes(name=original_node.name)[0].label) curr_union_parent.add_feature( "history_2_label", history_2.search_nodes(name=original_node.name)[0].label) return united_tree
''' Node Style End #######################''' for r in xrange(rows): cell_id = trackResult[r, 0] time_begin = trackResult[r, 1] time_end = trackResult[r, 2] parent_id = trackResult[r, 3] time_duration = np.abs(time_begin-time_end) # for root if parent_id == 0: # Add name to root for the first iteration root.add_feature("name", str(cell_id)) # change the branch length root.add_feature("dist", time_duration) #change node style root.set_style(ns_root) # set node name to face nameFace = TextFace(root.name) nameFace.fgcolor = "white" nameFace.fsize = 15 # nameFace.border.width = 1 nameFace.background.color = "green" node_cur.add_face(nameFace, column=1, position="branch-bottom") else: # for child #### search the parent node by parent_id
R = LG_matrix['R'] Q = LG_matrix['Q'] PI = LG_matrix['PI'] amino_acids = LG_matrix['amino_acids'] aa2idx = {} for i in range(len(amino_acids)): aa2idx[amino_acids[i]] = i ## sample sequence for the root node from the equilibrium ## distribution of amino acids len_protein = 100 root_seq = nrand.choice(amino_acids, size=len_protein, replace=True, p=PI.reshape(-1) / np.sum(PI)) t.add_feature('seq', root_seq) ## simulate sequences for each node ## the evolution process is modelled as a continous-time Markov chain. ## the following script is used for simulating the continous-time Markov chain. for node in t.traverse('preorder'): if node.is_root(): continue anc_node = node.up seq = np.copy(anc_node.seq) dist = node.dist while True: tot_rate = -np.sum([Q_dict[(aa, aa)] for aa in seq]) wait_time = nrand.exponential(scale=1 / tot_rate)
#ns["hz_line_width"] = 1.5 #ns["vt_line_width"] = 1.5 ''' Node Style End #######################''' for r in xrange(rows): cell_id = trackResult[r, 0] time_begin = trackResult[r, 1] time_end = trackResult[r, 2] parent_id = trackResult[r, 3] time_duration = np.abs(time_begin - time_end) # for root if parent_id == 0: # Add name to root for the first iteration root.add_feature("name", str(cell_id)) # change the branch length root.add_feature("dist", time_duration) #change node style root.set_style(ns_root) # set node name to face nameFace = TextFace(root.name) nameFace.fgcolor = "white" nameFace.fsize = 15 # nameFace.border.width = 1 nameFace.background.color = "green" node_cur.add_face(nameFace, column=1, position="branch-bottom") else: # for child #### search the parent node by parent_id
class um_tree: def __init__(self, tree): self.tree = Tree(tree, format=1) self.tree.resolve_polytomy(default_dist=0.000001, recursive=True) self.tree.dist = 0 self.tree.add_feature("age", 0) self.nodes = self.tree.get_descendants() internal_node = [] cnt = 0 for n in self.nodes: node_age = n.get_distance(self.tree) n.add_feature("age", node_age) if not n.is_leaf(): n.add_feature("id", cnt) cnt = cnt + 1 internal_node.append(n) self.nodes = internal_node one_leaf = self.tree.get_farthest_node()[0] one_leaf.add_feature("id", cnt + 1) if one_leaf.is_leaf(): self.nodes.append(one_leaf) self.nodes.sort(key=self.__compare_node) self.species_list = [] self.coa_roots = None def __compare_node(self, node): return node.age def get_waiting_times(self, threshold_node=None, threshold_node_idx=0): wt_list = [] reach_t = False curr_age = 0.0 curr_spe = 2 curr_num_coa = 0 coa_roots = [] min_brl = 1000 num_spe = -1 if threshold_node == None: threshold_node = self.nodes[threshold_node_idx] last_coa_num = 0 tcnt = 0 for node in self.nodes: num_children = len(node.get_children()) wt = None times = node.age - curr_age if times >= 0: if times < min_brl and times > 0: min_brl = times curr_age = node.age assert curr_spe >= 0 if reach_t: if tcnt == 0: last_coa_num = 2 fnode = node.up coa_root = None idx = 0 while not fnode.is_root(): idx = 0 for coa_r in coa_roots: if coa_r.id == fnode.id: coa_root = coa_r break idx = idx + 1 if coa_root != None: break else: fnode = fnode.up wt = waiting_time(length=times, num_coas=curr_num_coa, num_lines=curr_spe) for coa_r in coa_roots: coa = coalescent(num_individual=coa_r.curr_n) wt.coas.add_coalescent(coa) wt.coas.coas_idx = last_coa_num wt.num_curr_coa = last_coa_num if coa_root == None: #here can be modified to use multiple T curr_spe = curr_spe - 1 curr_num_coa = curr_num_coa + 1 node.add_feature("curr_n", 2) coa_roots.append(node) last_coa_num = 2 else: curr_n = coa_root.curr_n coa_root.add_feature("curr_n", curr_n + 1) last_coa_num = curr_n + 1 tcnt = tcnt + 1 else: if node.id == threshold_node.id: reach_t = True tcnt = 0 wt = waiting_time(length=times, num_coas=0, num_lines=curr_spe) num_spe = curr_spe curr_spe = curr_spe - 1 curr_num_coa = 2 node.add_feature("curr_n", 2) coa_roots.append(node) else: wt = waiting_time(length=times, num_coas=0, num_lines=curr_spe) curr_spe = curr_spe + 1 if times > 0.00000001: wt_list.append(wt) for wt in wt_list: wt.count_num_lines() self.species_list = [] all_coa_leaves = [] self.coa_roots = coa_roots for coa_r in coa_roots: leaves = coa_r.get_leaves() all_coa_leaves.extend(leaves) self.species_list.append(leaves) all_leaves = self.tree.get_leaves() for leaf in all_leaves: if leaf not in all_coa_leaves: self.species_list.append([leaf]) return wt_list, num_spe def show(self, wt_list): cnt = 1 for wt in wt_list: print("Waitting interval " + repr(cnt)) print(wt) cnt = cnt + 1 def get_species(self): sp_list = [] for sp in self.species_list: spe = [] for taxa in sp: spe.append(taxa.name) sp_list.append(spe) all_taxa_name = [] for leaf in self.tree.get_leaves(): all_taxa_name.append(leaf.name) style0 = NodeStyle() style0["fgcolor"] = "#000000" style0["vt_line_color"] = "#0000aa" style0["hz_line_color"] = "#0000aa" style0["vt_line_width"] = 2 style0["hz_line_width"] = 2 style0["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style0["hz_line_type"] = 0 style0["size"] = 0 for node in self.tree.get_descendants(): node.set_style(style0) node.img_style["size"] = 0 self.tree.set_style(style0) self.tree.img_style["size"] = 0 style1 = NodeStyle() style1["fgcolor"] = "#000000" style1["vt_line_color"] = "#ff0000" style1["hz_line_color"] = "#0000aa" style1["vt_line_width"] = 2 style1["hz_line_width"] = 2 style1["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style1["hz_line_type"] = 0 style1["size"] = 0 style2 = NodeStyle() style2["fgcolor"] = "#0f0f0f" style2["vt_line_color"] = "#ff0000" style2["hz_line_color"] = "#ff0000" style2["vt_line_width"] = 2 style2["hz_line_width"] = 2 style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 0 style2["size"] = 0 for node in self.coa_roots: node.set_style(style1) node.img_style["size"] = 0 for des in node.get_descendants(): des.set_style(style2) des.img_style["size"] = 0 return [all_taxa_name], sp_list def print_species(self, save_file): cnt = 1 file3 = open(os.path.join(save_file, "partition.txt"), "w+") for sp in self.species_list: print("Species " + repr(cnt) + ":", file=file3) cnt = cnt + 1 taxas = "" for taxa in sp: taxas = taxas + taxa.name + ", " print("" + taxas[:-1], file=file3) def print_species_spart(self, save_file): cnt = 1 file3 = open(os.path.join(save_file, "partition.spart"), "w+") # for sp in self.species_list: # print("Species " + repr(cnt) + ":", file= file3) # cnt = cnt + 1 # taxas = "" # for taxa in sp: # print(taxa) # taxas = taxas + taxa.name + ", " # print("" + taxas[:-1], file= file3) file3.write("Filename=GMYC delimitation\n") file3.write(f'{datetime.datetime.now().astimezone().isoformat()}\n\n') file3.write(f"Npartition={1};GMYC\n") file3.write(f'Nsamples={sum(len(sp) for sp in self.species_list)}\n') file3.write( f'Nsubsets={len(self.species_list)};{",".join(["?" for i in range(len(self.species_list))])}\n\n' ) file3.write("#this is my first comment\n") file3.write("#this is my second comment\n\n") file3.write("Assignment\n") cnt = 1 for sp in self.species_list: print(repr(sp)) for taxa in sp: print(repr(taxa)) xx = taxa.name + "\t" + repr(cnt) + ";" + "?" file3.write(f"{xx}\n") cnt += 1 file3.write("\nPartition_score=\n") file3.close() def output_species(self, taxa_order=[]): if len(taxa_order) == 0: taxa_order = self.tree.get_leaf_names() num_taxa = 0 for sp in self.species_list: for taxa in sp: num_taxa = num_taxa + 1 if not len(taxa_order) == num_taxa: print("error error, taxa_order != num_taxa!") return None, None else: partion = [-1] * num_taxa cnt = 1 for sp in self.species_list: for taxa in sp: idx = taxa_order.index(taxa.name) partion[idx] = cnt cnt = cnt + 1 return taxa_order, partion def num_lineages(self, wt_list, save_file): nl_list = [] times = [] last_time = 0.0 for wt in wt_list: nl_list.append(wt.get_num_branches()) times.append(last_time) last_time = wt.length + last_time plt.plot(times, nl_list) plt.ylabel('Number of lineages') plt.xlabel('Time') plt.savefig(os.path.join(save_file, "Time_Lines.png"))
def parse_union_tree(history_1, history_2, base_tree_path, debug=False): base_tree = Tree(base_tree_path, format=1) base_tree.get_tree_root().name = "root" united_tree = Tree() united_tree.dist = 0 # initialize distance to 0 united_tree.get_tree_root().name = history_1.get_tree_root( ).name # set the name of the root united_tree.add_feature("history_1_label", history_1.get_tree_root().label) united_tree.add_feature("history_2_label", history_2.get_tree_root().label) union_nodes_number = 0 for original_node in base_tree.traverse( "preorder" ): # traverse the tree in pre-order to assure that for any visited node, its parent from the base branch is already in the united tree original_parent = original_node.up if original_parent != None: # will be none only in the case the original node is the root if debug: print("handled branch: (", original_node.name, ",", original_parent.name, ")") curr_union_parent = united_tree.search_nodes( name=original_parent.name.rstrip())[0] hist_1_done = True hist_1_curr_child = None hist_1_parent = history_1.search_nodes( name=original_parent.name.rstrip())[ 0] # need to check names consistency across the 3 trees for child in hist_1_parent.children: if len( base_tree.search_nodes(name=child.name) ) == 0 and len(child.get_children()) == 1 and len( child.search_nodes(name=original_node.name) ) > 0: # if the child does not exist in the base tree, it represents a mapping node that was created out of breaking a branch in the original tree hist_1_curr_child = child hist_1_done = False break if hist_1_done: hist_1_curr_child = history_1.search_nodes( name=original_node.name.rstrip())[0] hist_1_current_label = hist_1_curr_child.label hist_2_done = True hist_2_curr_child = None hist_2_parent = history_2.search_nodes( name=original_parent.name.rstrip())[ 0] # need to check names consistency across the 3 trees for child in hist_2_parent.children: if len( base_tree.search_nodes(name=child.name) ) == 0 and len(child.get_children()) == 1 and len( child.search_nodes(name=original_node.name) ) > 0: #: # if the child is a root in a tree that holds the original child node, then this child must be on the branch of interest hist_2_curr_child = child hist_2_done = False # should be false for _baseInternal_52 break if hist_2_done: try: hist_2_curr_child = history_2.search_nodes( name=original_node.name.rstrip())[0] except: name = original_node.name.rstrip() original_children = original_node.get_children() exit(1) hist_2_current_label = hist_2_curr_child.label original_dist = original_node.dist while not hist_1_done or not hist_2_done: if hist_1_curr_child.name == hist_2_curr_child.name and hist_1_curr_child.name == original_node.name: # both have reached the original child print( "error! original child wasn't recognized in the end of the loop" ) exit(1) hist_1_dist = history_1.search_nodes( name=original_node.name.rstrip())[0].dist hist_2_dist = history_2.search_nodes( name=original_node.name.rstrip())[0].dist if not hist_1_done: # if there is a node closer to the original node in history 1 -> add it to the united tree first hist_1_dist = hist_1_curr_child.get_distance( original_parent.name) - curr_union_parent.get_distance( original_parent.name) if not hist_2_done: hist_2_dist = hist_2_curr_child.get_distance( original_parent.name) - curr_union_parent.get_distance( original_parent.name) if debug: if not hist_1_done: print("history 1 has current child of ", original_parent.name, ": ", hist_1_curr_child.name, " with label: ", hist_1_current_label, " and distance from parent is: ", hist_1_dist) if not hist_2_done: print("history 2 has current child of ", original_parent.name, ": ", hist_2_curr_child.name, " with label: ", hist_2_current_label, " and distance from parent is: ", hist_2_dist) # first, check if now the two current children have the same name, and if this name is in the base tree - exit if hist_1_curr_child.name == hist_2_curr_child.name and len( base_tree.search_nodes( name=hist_1_curr_child.name)) > 0: break # else, at least one of the histories has more than one step to go before reaching the bottom of the branch if hist_1_dist < hist_2_dist: # add the node from history 1 and travel down to the next node in history 1 if debug: print( "adding child from history 1 which precedes to the one from history 2" ) print("the label of the added node in history 1 is: ", hist_1_curr_child.label) print( "the label of the added node in history 2 remains like papa: ", hist_2_current_label) curr_union_parent = curr_union_parent.add_child( child=None, name="internal_" + str(union_nodes_number), dist=hist_1_dist, support=None) curr_union_parent.add_feature("history_1_label", hist_1_curr_child.label) curr_union_parent.add_feature("history_2_label", hist_2_current_label) hist_1_parent = hist_1_curr_child if len(hist_1_parent.children) == 1: hist_1_curr_child = hist_1_parent.children[0] if hist_1_curr_child.name == original_node.name: hist_1_done = True else: # two children only occur when reaching a junction from the base tree hist_1_done = True if debug: if hist_1_done: print( "history 1 on the handled branch is complete") continue else: print( "history 1 on the handled branch isn't complete yet" ) else: # add the node from history 2 and travel down to the next node in history 2 if debug: print( "adding child from history 2 which precedes to the one from history 1" ) print("the label of the added node in history 2 is: ", hist_2_curr_child.label) print( "the label of the added node in history 1 remains like papa: ", hist_1_current_label) curr_union_parent = curr_union_parent.add_child( child=None, name="internal_" + str(union_nodes_number), dist=hist_2_dist) # added as a new branch curr_union_parent.add_feature("history_1_label", hist_1_current_label) curr_union_parent.add_feature("history_2_label", hist_2_curr_child.label) hist_2_parent = hist_2_curr_child if len(hist_2_parent.children) == 1: hist_2_curr_child = hist_2_parent.children[0] if hist_2_curr_child.name == original_node.name: hist_2_done = True else: hist_2_done = True if debug: if hist_2_done: print( "history 2 on the handled branch is complete") continue else: print( "history 2 on the handled branch isn't complete yet" ) union_nodes_number += 1 # now add the original node as the child of the current parent residual = original_dist - curr_union_parent.get_distance( united_tree.search_nodes( name=original_parent.name.rstrip())[0]) if residual < 0: print("error on residual computation for branch leading to ", original_node.name) print("residual: ", residual) print("original_dist: ", original_dist) print( "curr_union_parent.get_distance(united_tree.search_nodes(name=original_parent.name.rstrip())[0]): ", curr_union_parent.get_distance( united_tree.search_nodes( name=original_parent.name.rstrip())[0])) exit(1) curr_union_parent = curr_union_parent.add_child( child=None, name=original_node.name, dist=residual) curr_union_parent.add_feature( "history_1_label", history_1.search_nodes( name=original_parent.name.rstrip())[0].label) curr_union_parent.add_feature( "history_2_label", history_2.search_nodes( name=original_parent.name.rstrip())[0].label) if debug: for node in united_tree.traverse("postorder"): print("node=", node.name) print("label in hist1=", node.history_1_label) print("label in hist2=", node.history_2_label) print("branch length=", node.dist) return united_tree
class um_tree: def __init__(self, tree, PATH): self.tree = Tree(tree, format=1) self.tree2 = open(tree) self.tree.resolve_polytomy(default_dist=0.000001, recursive=True) self.tree.dist = 0 self.tree.add_feature("age", 0) self.nodes = self.tree.get_descendants() self.PATH = PATH internal_node = [] cnt = 0 for n in self.nodes: node_age = n.get_distance(self.tree) n.add_feature("age", node_age) if not n.is_leaf(): n.add_feature("id", cnt) cnt = cnt + 1 internal_node.append(n) self.nodes = internal_node one_leaf = self.tree.get_farthest_node()[0] one_leaf.add_feature("id", cnt + 1) if one_leaf.is_leaf(): self.nodes.append(one_leaf) self.nodes.sort(key=self.__compare_node) self.species_list = [] self.coa_roots = None def __compare_node(self, node): return node.age def get_waiting_times(self, threshold_node=None, threshold_node_idx=0): wt_list = [] reach_t = False curr_age = 0.0 curr_spe = 2 curr_num_coa = 0 coa_roots = [] min_brl = 1000 num_spe = -1 if threshold_node == None: threshold_node = self.nodes[threshold_node_idx] last_coa_num = 0 tcnt = 0 for node in self.nodes: num_children = len(node.get_children()) wt = None times = node.age - curr_age if times >= 0: if times < min_brl and times > 0: min_brl = times curr_age = node.age assert curr_spe >= 0 if reach_t: if tcnt == 0: last_coa_num = 2 fnode = node.up coa_root = None idx = 0 while not fnode.is_root(): idx = 0 for coa_r in coa_roots: if coa_r.id == fnode.id: coa_root = coa_r break idx = idx + 1 if coa_root != None: break else: fnode = fnode.up wt = waiting_time(length=times, num_coas=curr_num_coa, num_lines=curr_spe) for coa_r in coa_roots: coa = coalescent(num_individual=coa_r.curr_n) wt.coas.add_coalescent(coa) wt.coas.coas_idx = last_coa_num wt.num_curr_coa = last_coa_num if (coa_root == None ): # here can be modified to use multiple T curr_spe = curr_spe - 1 curr_num_coa = curr_num_coa + 1 node.add_feature("curr_n", 2) coa_roots.append(node) last_coa_num = 2 else: curr_n = coa_root.curr_n coa_root.add_feature("curr_n", curr_n + 1) last_coa_num = curr_n + 1 tcnt = tcnt + 1 else: if node.id == threshold_node.id: reach_t = True tcnt = 0 wt = waiting_time(length=times, num_coas=0, num_lines=curr_spe) num_spe = curr_spe curr_spe = curr_spe - 1 curr_num_coa = 2 node.add_feature("curr_n", 2) coa_roots.append(node) else: wt = waiting_time(length=times, num_coas=0, num_lines=curr_spe) curr_spe = curr_spe + 1 if times > 0.00000001: wt_list.append(wt) for wt in wt_list: wt.count_num_lines() self.species_list = [] all_coa_leaves = [] self.coa_roots = coa_roots for coa_r in coa_roots: leaves = coa_r.get_leaves() all_coa_leaves.extend(leaves) self.species_list.append(leaves) all_leaves = self.tree.get_leaves() for leaf in all_leaves: if leaf not in all_coa_leaves: self.species_list.append([leaf]) return wt_list, num_spe def show(self, wt_list): cnt = 1 for wt in wt_list: print(("Waitting interval " + repr(cnt))) print(wt) cnt = cnt + 1 def get_species(self): sp_list = [] for sp in self.species_list: spe = [] for taxa in sp: spe.append(taxa.name) sp_list.append(spe) all_taxa_name = [] # self.tree.convert_to_ultrametric(tree_length = 1.0, strategy='balanced') for leaf in self.tree.get_leaves(): all_taxa_name.append(leaf.name) style0 = NodeStyle() style0["fgcolor"] = "#000000" # style2["shape"] = "circle" style0["vt_line_color"] = "#0000aa" style0["hz_line_color"] = "#0000aa" style0["vt_line_width"] = 2 style0["hz_line_width"] = 2 style0["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style0["hz_line_type"] = 0 style0["size"] = 0 for node in self.tree.get_descendants(): node.set_style(style0) node.img_style["size"] = 0 self.tree.set_style(style0) self.tree.img_style["size"] = 0 style1 = NodeStyle() style1["fgcolor"] = "#000000" # style2["shape"] = "circle" style1["vt_line_color"] = "#ff0000" style1["hz_line_color"] = "#0000aa" style1["vt_line_width"] = 2 style1["hz_line_width"] = 2 style1["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style1["hz_line_type"] = 0 style1["size"] = 0 style2 = NodeStyle() style2["fgcolor"] = "#0f0f0f" # style2["shape"] = "circle" style2["vt_line_color"] = "#ff0000" style2["hz_line_color"] = "#ff0000" style2["vt_line_width"] = 2 style2["hz_line_width"] = 2 style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 0 style2["size"] = 0 for node in self.coa_roots: node.set_style(style1) node.img_style["size"] = 0 for des in node.get_descendants(): des.set_style(style2) des.img_style["size"] = 0 return [all_taxa_name], sp_list def print_species(self): # tree_path = os.path.dirname(self.tree2.name) sp_out = open(os.path.join(self.PATH, "GMYC/GMYC_MOTU.txt"), "w+") cnt = 1 for sp in self.species_list: # print("Species " + repr(cnt) + ":") sp_out.write("Species " + repr(cnt) + "\n") cnt = cnt + 1 taxas = "" for taxa in sp: taxas = taxas + taxa.name + ", " # print(" " + taxas[:-1]) sp_out.write(" " + taxas[:-1] + "\n") def output_species(self, taxa_order=[]): """taxa_order is a list of taxa names, the paritions will be output as the same order""" if len(taxa_order) == 0: taxa_order = self.tree.get_leaf_names() num_taxa = 0 for sp in self.species_list: for taxa in sp: num_taxa = num_taxa + 1 if not len(taxa_order) == num_taxa: print("error error, taxa_order != num_taxa!") return None, None else: partion = [-1] * num_taxa cnt = 1 for sp in self.species_list: for taxa in sp: idx = taxa_order.index(taxa.name) partion[idx] = cnt cnt = cnt + 1 return taxa_order, partion def num_lineages(self, wt_list): nl_list = [] times = [] last_time = 0.0 for wt in wt_list: nl_list.append(wt.get_num_branches()) times.append(last_time) last_time = wt.length + last_time plt.plot(times, nl_list) plt.ylabel("Number of lineages") plt.xlabel("Time") plt.savefig("Time_Lines") plt.show()