def next_proposal(self): self.locus_search.propose() # TODO: propose other reconciliations beside LCA locus_tree = self.locus_search.get_tree().copy() phylo.recon_root(locus_tree, self.reconer.stree, self.reconer.gene2species, newCopy=False) locus_recon = phylo.reconcile(locus_tree, self.reconer.stree, self.reconer.gene2species) locus_events = phylo.label_events(locus_tree, locus_recon) # propose daughters (TODO) daughters = set() # propose coal recon (TODO: propose others beside LCA) coal_recon = phylo.reconcile(self.reconer.coal_tree, locus_tree, lambda x: x) recon = {"coal_recon": coal_recon, "locus_tree": locus_tree, "locus_recon": locus_recon, "locus_events": locus_events, "daughters": daughters} return recon
def compute_cost(self, gtree): """Returns the rf cost""" recon = phylo.reconcile(gtree, self.stree, self.gene2species) #rf_cost = recon.size #for every node in recon: # for every othernode in recon.dropLeft(index.nodeIn(recon)): # if there exists an inverse for this key-value pair, subtract 2 # from recon cost. rf_cost = 0 recon_relevant = recon.copy() for node_key, node_value in recon.items(): if not node_key.name is node_value.name: rf_cost += 1 # for node_key, node_value in recon.items(): # recon_relevant.pop(node_key, node_value) # for othernode_key, othernode_value in recon_relevant.items(): # if (node_key.name is othernode_value.name) and (node_value.name is othernode_key.name): # rf_cost -= 2 return rf_cost #cherry yum diddly dip
def count_dup_loss_coal_tree(coal_tree, extra, stree, gene2species, implied=True, locus_mpr=True): """count dup loss coal""" if not locus_mpr: raise Exception("not implemented") # TODO: use locus_recon and locus_events rather than MPR # (currently, phylo.py reconciliation functions fail for non-MPR) locus_tree = extra["locus_tree"] locus_recon = phylo.reconcile(locus_tree, stree, gene2species) locus_events = phylo.label_events(locus_tree, locus_recon) coal_recon = extra["coal_recon"] ndup, nloss, nappear = phylo.count_dup_loss_tree(locus_tree, stree, gene2species, locus_recon, locus_events) # add implied speciation nodes if desired # this must be added AFTER counting dups and losses since it affects loss inference if implied: added = phylo.add_implied_spec_nodes(locus_tree, stree, locus_recon, locus_events) # count coals ncoal = 0 counts = coal.count_lineages_per_branch(coal_tree, coal_recon, locus_tree) for lnode, (count_bot, count_top) in counts.iteritems(): n = max(count_top-1, 0) locus_recon[lnode].data['coal'] += n ncoal += n if implied: phylo.remove_implied_spec_nodes(locus_tree, added, locus_recon, locus_events) return ndup, nloss, ncoal, nappear
def optimize_model(self, gtree, stree, gene2species): """Optimizes the model""" CostModel.optimize_model(self, gtree, stree, gene2species) # ensure gtree and stree are both rooted and binary if not (treelib.is_rooted(gtree) and treelib.is_binary(gtree)): raise Exception("gene tree must be rooted and binary") if not (treelib.is_rooted(stree) and treelib.is_binary(stree)): raise Exception("species tree must be rooted and binary") try: junk = phylo.reconcile(gtree, stree, gene2species) except: raise Exception("problem mapping gene tree to species tree") treeout = StringIO.StringIO() if not self.printed: import pprint treelib.draw_tree(gtree, out=treeout, minlen=5, maxlen=5) print "gene tree:\n" print(treeout.getvalue()) treelib.draw_tree(self.stree, out=treeout, minlen=5, maxlen=5) print "spec tree:\n" print(treeout.getvalue()) pprint.pprint(junk) self.printed = True
def _recon_lca(self, locus_tree): # get locus tree, and LCA (MPR) locus_recon locus_recon = phylo.reconcile(locus_tree, self._stree, self._gene2species) locus_events = phylo.label_events(locus_tree, locus_recon) # propose LCA (MPR) coal_recon coal_recon = phylo.reconcile(self._coal_tree, locus_tree, lambda x: x) # propose daughters daughters = self._propose_daughters(self._coal_tree, coal_recon, locus_tree, locus_recon, locus_events) return phyloDLC.Recon(coal_recon, locus_tree, locus_recon, locus_events, daughters)
def _recon_lca(self, locus_tree): # get locus tree, and LCA (MPR) locus_recon locus_recon = phylo.reconcile(locus_tree, self._stree, self._gene2species) locus_events = phylo.label_events(locus_tree, locus_recon) # propose LCA (MPR) coal_recon coal_recon = phylo.reconcile(self._coal_tree, locus_tree, lambda x: x) # propose daughters daughters = self._propose_daughters( self._coal_tree, coal_recon, locus_tree, locus_recon, locus_events) return phyloDLC.Recon(coal_recon, locus_tree, locus_recon, locus_events, daughters)
def prescreen(self, tree): # tree is coal tree, compute the associated lca reconciliation recon = phylo.reconcile(tree, self.locus_tree) # calculate the log probability of a reconciled coalescent tree (topology + reconciliation) # against the locus tree under the coalescent model return reconprob.prob_locus_coal_recon_topology( tree, recon, self.locus_tree, self.popsizes, self.daughters)
def compute_cost(self, gtree): """Returns the duplication-loss cost""" recon = phylo.reconcile(gtree, self.stree, self.gene2species) events = phylo.label_events(gtree, recon) cost = 0 if self.dupcost != 0: cost += phylo.count_dup(gtree, events) * self.dupcost if self.losscost != 0: cost += phylo.count_loss(gtree, self.stree, recon) * self.losscost return cost
def _compute_duplosscost(self, ltree): """Returns dup/loss cost from locus tree to species tree""" cost = 0 if self.dupcost > 0 or self.losscost > 0: recon = phylo.reconcile(ltree, self.stree, self.gene2species) events = phylo.label_events(ltree, recon) if self.dupcost != 0: cost += phylo.count_dup(ltree, events) * self.dupcost if self.losscost != 0: cost += phylo.count_loss(ltree, self.stree, recon) * self.losscost return cost
def _compute_coalcost(self, gtree, ltree): """Returns deep coalescent cost from coalescent tree (gene tree) to locus tree Note: uses Zhang (RECOMB 2000) result that C = L - 2*D """ cost = 0 if self.coalcost > 0: recon = phylo.reconcile(gtree, ltree) events = phylo.label_events(gtree, recon) cost = (phylo.count_loss(gtree, ltree, recon) - 2*phylo.count_dup(gtree, events)) * self.coalcost return cost
def prescreen(self, tree): recon = phylo.reconcile(tree, self.stree, self.gene2species) events = phylo.label_events(tree, recon) #print tree.root.name #treelib.draw_tree_names(tree, maxlen=8) return duploss.prob_dup_loss( tree, self.stree, recon, events, self.duprate, self.lossrate)
def _recon_lca(self, coal_tree): # get coal tree, and LCA coal_recon coal_recon = phylo.reconcile(coal_tree, self._locus_tree, lambda x: x) # we do not explore the reconciliation space now self._coal_recon_enum = phylo.enum_recon(coal_tree, self._locus_tree, recon=coal_recon, depth=self._coal_recon_depth) return Recon(coal_tree, coal_recon, self._locus_tree, self._locus_recon, self._locus_events, self._daughters)
def optimize_model(self, gtree, stree, gene2species): """Optimizes the model""" CostModel.optimize_model(self, gtree, stree, gene2species) # ensure gtree and stree are both rooted and binary if not (treelib.is_rooted(gtree) and treelib.is_binary(gtree)): raise Exception("gene tree must be rooted and binary") if not (treelib.is_rooted(stree) and treelib.is_binary(stree)): raise Exception("species tree must be rooted and binary") try: junk = phylo.reconcile(gtree, stree, gene2species) except: raise Exception("problem mapping gene tree to species tree")
def setup_recon(self, recon=None): # construct default reconciliation if recon == None and self.stree and self.gene2species: self.recon = phylo.reconcile(self.tree, self.stree, self.gene2species) else: self.recon = recon # construct events if self.recon: self.events = phylo.label_events(self.tree, self.recon) self.losses = phylo.find_loss(self.tree, self.stree, self.recon) else: self.events = None self.losses = None
def test_birthDeathPrior_large(self): """test birth death prior for large trees""" l = 0.000732 u = 0.000859 maxdoom = 20 stree = treelib.read_tree("test/data/fungi.stree") gene2species = phylo.read_gene2species("test/data/fungi.smap") tree = treelib.read_tree("test/data/fungi/10169/10169.tree") recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print p self.assert_(p != -INF)
def _recon_lca(self, locus_tree): # get locus tree, and LCA locus_recon locus_recon = phylo.reconcile(locus_tree, self._stree, self._gene2species) locus_events = phylo.label_events(locus_tree, locus_recon) # propose LCA coal_recon coal_recon = phylo.reconcile(self._coal_tree, locus_tree, lambda x: x) # propose daughters (TODO) daughters = self._propose_daughters( self._coal_tree, coal_recon, locus_tree, locus_recon, locus_events) self._coal_recon_enum = phylo.enum_recon( self._coal_tree, locus_tree, recon=coal_recon, depth=self._coal_recon_depth) return Recon(coal_recon, locus_tree, locus_recon, locus_events, daughters)
def prescreen(self, tree): recon = phylo.reconcile(tree, self.stree, self.gene2species) events = phylo.label_events(tree, recon) if self.dupcost == 0: dupcost = 0 else: ndup = phylo.count_dup(tree, events) dupcost = ndup * self.dupcost if self.losscost == 0: losscost = 0 else: nloss = phylo.count_loss(tree, self.stree, recon) losscost = nloss * self.losscost return dupcost + losscost
def compute_cost(self, gtree): """ Returns -log [P(topology) + P(branch)], min cost = min neg log prob = max log prob = max prob """ recon = phylo.reconcile(gtree, self.stree, self.gene2species) events = phylo.label_events(gtree, recon) # optimize branch lengths spidir.find_ml_branch_lengths_hky(gtree, self.align, self.bgfreq, self.kappa, maxiter=10, parsinit=False) branchp = spidir.branch_prior(gtree, self.stree, recon, events, self.params, self.duprate, self.lossrate, self.pretime) topp = spidir.calc_birth_death_prior(gtree, self.stree, recon, self.duprate, self.lossrate, events) return -(topp + branchp)
def optimize_model(self, gtree, stree, gene2species): """Optimizes the model""" CostModel.optimize_model(self, gtree, stree, gene2species) if self.dupcost < 0: self.parser.error("-D/--dupcost must be >= 0") if self.losscost < 0: self.parser.error("-L/--losscost must be >= 0") # ensure gtree and stree are both rooted and binary if not (treelib.is_rooted(gtree) and treelib.is_binary(gtree)): raise Exception("gene tree must be rooted and binary") if not (treelib.is_rooted(stree) and treelib.is_binary(stree)): raise Exception("species tree must be rooted and binary") try: junk = phylo.reconcile(gtree, stree, gene2species) except: raise Exception("problem mapping gene tree to species tree")
def test_birthDeathPriorFull(self): """test birth death prior with implied speciation nodes""" l = 2 u = .5 maxdoom = 10 def gene2species(gene): return gene[:1].upper() stree = treelib.parse_newick("((A:1,B:1):1,((C:1,D:1):2,E:3):1);") tree = treelib.parse_newick("((((a1,a2),(a3,a4)),(b1,b2)),((c1,d1),(c2,c3)));") # test gene reconciling within species tree recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2)
def test_birthDeathPriorFull(self): """test birth death prior with implied speciation nodes""" l = 2 u = .5 maxdoom = 10 def gene2species(gene): return gene[:1].upper() stree = treelib.parse_newick("((A:1,B:1):1,((C:1,D:1):2,E:3):1);") tree = treelib.parse_newick( "((((a1,a2),(a3,a4)),(b1,b2)),((c1,d1),(c2,c3)));") # test gene reconciling within species tree recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2)
def count_dup_loss_coal_tree(coal_tree, extra, stree, gene2species, implied=True, locus_mpr=True): """count dup loss coal""" if not locus_mpr: raise Exception("not implemented") # TODO: use locus_recon and locus_events rather than MPR # (currently, phylo.py reconciliation functions fail for non-MPR) locus_tree = extra["locus_tree"] locus_recon = phylo.reconcile(locus_tree, stree, gene2species) locus_events = phylo.label_events(locus_tree, locus_recon) coal_recon = extra["coal_recon"] ndup, nloss, nappear = phylo.count_dup_loss_tree(locus_tree, stree, gene2species, locus_recon, locus_events) # add implied speciation nodes if desired # this must be added AFTER counting dups and losses since it affects loss inference if implied: added = phylo.add_implied_spec_nodes(locus_tree, stree, locus_recon, locus_events) # count coals ncoal = 0 counts = coal.count_lineages_per_branch(coal_tree, coal_recon, locus_tree) for lnode, (count_bot, count_top) in counts.iteritems(): n = max(count_top - 1, 0) locus_recon[lnode].data['coal'] += n ncoal += n if implied: phylo.remove_implied_spec_nodes(locus_tree, added, locus_recon, locus_events) return ndup, nloss, ncoal, nappear
def __init__(self, stree, locus_tree, daughters, gene2species, search=phylo.TreeSearchNni, num_coal_recons=1): self._stree = stree self._locus_tree = locus_tree self._daughters = daughters self._coal_search = search(None) # locus recon (static) -- propose LCA reconciliation self._locus_recon = phylo.reconcile(locus_tree, stree, gene2species) self._locus_events = phylo.label_events(locus_tree, self._locus_recon) # coal recon search self._num_coal_recons = num_coal_recons self._i_coal_recons = 1 self._coal_recon_enum = None self._coal_recon_depth = 2 self._accept_coal = False self._recon = None
def sample_locus_tree_hem(stree, popsize, duprate, lossrate, freq=1.0, freqdup=.05, freqloss=.05, steptime=1e6, keep_extinct=False): """ Sample a locus tree with birth-death and hemiplasy Runs a relaxed fixation assumption simulation on a species tree. Some simplifying assumptions are made for this version of the simulator: 1) All branches of the species tree have the same population size 2) All branches of the species tree have the same duplication rate 3) All branches of the species tree have the same loss rate 4) All branches of the species tree have the same duplication effect 5) All branches of the species tree have the same loss effect 6) All branches of the species tree have the same time between forced frequency changes 7) There is a single allele at the root of the species tree. A duplication/loss effect is the change in frequency for either event. Appropriate default values for these effects may need to be determined. Furture iterations should remove these assumptions by incorporating dictionaries to allow values for each branch. parameters: stree is the initial species tree; it may be mutated by the simulator popsize is the population size (assmpt. 1) freq is the allele frequency (assmpt. 7) duprate is the duplication rate (in events/myr/indiv(?); assmpt. 2) lossrate is the loss rate (in events/myr/indiv(?); assmpt. 3) freqdup is the duplication effect (assmpt. 4) freqloss is the loss effect (assmpt. 5) forcetime is the maximum time between frequency changes (assmpt. 6) Returns the locus tree, as well as extra information including a reconciliation dictionary and an events dictionary. """ ## sanity checks before running the simulator; may be removed or relaxed treelib.assert_tree(stree) assert popsize > 0 assert 0.0 <= freq and freq <= 1.0 assert duprate >= 0.0 assert lossrate >= 0.0 assert 0.0 <= freqdup and freqdup <= 1.0 assert 0.0 <= freqloss and freqloss <= 1.0 assert steptime > 0.0 # special case: no duplications or losses if duprate == 0.0 and lossrate == 0.0: locus_tree = stree.copy() recon = phylo.reconcile(locus_tree, stree, lambda x: x) events = phylo.label_events(locus_tree, recon) return locus_tree, {"recon": recon, "events": events, "daughters": set()} def event_is_dup(duprate, fullrate): return random.random() <= duprate / fullrate def sim_walk(gtree, snode, gparent, p, s_walk_time=0.0, remaining_steptime=steptime, daughter=False): """ eventlog is a log of events along the gtree branch. Each entry has the form (time_on_branch, event_type, frequency, species_node), where 0.0 <= time_on_branch <= branch_node.dist event_type is one of {'extinction', 'frequency', 'speciation', duplication', 'loss', 'root', 'gene'}, where 'root' is a unique event not added during the sim_walk process frequency is the branch frequency at the event time species_node is the name of the node of the species tree branch in which the event occurs """ # create new node gnode = treelib.TreeNode(gtree.new_name()) gtree.add_child(gparent, gnode) gnode.data = {"freq": p, "log": []} eventlog = gnode.data["log"] g_walk_time = 0.0 if daughter: eventlog.append((0.0, 'daughter', freqdup, snode.name)) # grow this branch, determine next event event = None while True: if p <= 0.0: event = "extinct" break # determine remaing time remaining_s_dist = snode.dist - s_walk_time remaining_time = min(remaining_steptime, remaining_s_dist) # sample next dup/loss event eff_duprate = duprate * p / freqdup eff_lossrate = lossrate * p / freqloss eff_bothrate = eff_duprate + eff_lossrate event_time = stats.exponentialvariate(eff_bothrate) # advance times time_delta = min(event_time, remaining_time) s_walk_time += time_delta g_walk_time += time_delta # sample new frequency p = coal.sample_freq_CDF(p, popsize, time_delta) # determine event if event_time < remaining_time: # dup/loss occurs if event_is_dup(eff_duprate, eff_bothrate): # dup, stop growing event = "dup" break else: # loss, continue growing event = "loss" else: if remaining_s_dist < remaining_steptime: # we are at a speciation, stop growing event = "spec" break # process step if event == "loss": # LOSS EVENT p = max(p - freqloss, 0.0) remaining_steptime -= time_delta eventlog.append((g_walk_time, 'loss', p, snode.name)) else: # NEXT TIME STEP remaining_steptime = steptime eventlog.append((g_walk_time, 'frequency', p, snode.name)) # process event if event == "extinct": # EXTINCTION EVENT (p <= 0) gnode.dist = g_walk_time gnode.data['freq'] = 0.0 eventlog.append((g_walk_time, 'extinction', 0.0, snode.name)) elif event == "spec": # SPECIATION EVENT gnode.dist = g_walk_time gnode.data['freq'] = p # add speciation event to event log and if snode.is_leaf(): eventlog.append((g_walk_time, 'gene', p, snode.name)) else: eventlog.append((g_walk_time, 'speciation', p, snode.name)) for schild in snode.children: sim_walk(gtree, schild, gnode, p) elif event == "dup": # DUPLICATION EVENT gnode.dist = g_walk_time gnode.data['freq'] = p eventlog.append((g_walk_time, 'duplication', p, snode.name)) # recurse on mother sim_walk(gtree, snode, gnode, p, s_walk_time=s_walk_time, remaining_steptime=remaining_steptime) # recurse on daughter sim_walk(gtree, snode, gnode, freqdup, s_walk_time=s_walk_time, remaining_steptime=remaining_steptime, daughter=True) else: raise Exception("unknown event '%s'" % event) # create new gene tree and simulate its evolution gtree = treelib.Tree() gtree.make_root() gtree.root.dist = 0.0 gtree.root.data['freq'] = freq gtree.root.data['log'] = [(0.0, 'speciation', freq, stree.root.name)] # simulate locus tree sim_walk(gtree, stree.root.children[0], gtree.root, freq) sim_walk(gtree, stree.root.children[1], gtree.root, freq) # remove dead branches and single children extant_leaves = [leaf.name for leaf in gtree.leaves() if leaf.data['freq'] > 0.0] extinctions = [leaf for leaf in gtree.leaves() if leaf.data['freq'] == 0.0] if keep_extinct: full_gtree = gtree.copy() # do deep copy of data for node in full_gtree: node2 = gtree.nodes[node.name] for key, val in node2.data.items(): node.data[key] = copy.copy(val) treelib.subtree_by_leaf_names(gtree, extant_leaves, keep_single=True) remove_single_children(gtree) # determine extra information (recon, events, daughters) extras = generate_extras(stree, gtree) if keep_extinct: extras["full_locus_tree"] = full_gtree return gtree, extras
#============================================================================= # parse options conf, args = o.parse_args() #gene2species = phylo.read_gene2species(conf.smap) stree = treelib1.read_tree(conf.stree) tree = treelib1.read_tree(conf.tree) if conf.names: snames = dict(util.read_delim(conf.names)) else: snames = None if conf.brecon: brecon = phylo.read_brecon(conf.brecon, tree, stree) elif conf.recon: recon, events = phylo.read_recon_events(conf.recon, tree, stree) brecon = phylo.recon_events2brecon(recon, events) else: gene2species = phylo.read_gene2species(conf.smap) recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) brecon = phylo.recon_events2brecon(recon, events) phylo.add_implied_spec_nodes_brecon(tree, brecon) transsvg.draw_tree(tree, brecon, stree, filename=conf.output, snames=snames)
def recon_to_labeledrecon(coal_tree, recon, stree, gene2species, name_internal="n", locus_mpr=True): """Convert from DLCoal to DLCpar reconciliation model If locus_mpr is set (default), use MPR from locus_tree to stree. """ gene_tree = coal_tree.copy() coal_recon = recon.coal_recon locus_tree = recon.locus_tree if not locus_mpr: locus_recon = recon.locus_recon daughters = recon.daughters else: locus_recon = phylo.reconcile(locus_tree, stree, gene2species) locus_events = phylo.label_events(locus_tree, locus_recon) daughters = filter(lambda node: locus_events[node.parent] == "dup", recon.daughters) #======================================== # find species map # find species tree subtree substree = treelib.subtree(stree, locus_recon[coal_recon[coal_tree.root]]) # find species map species_map = {} for node in gene_tree: cnode = coal_tree.nodes[node.name] lnode = coal_recon[cnode] snode = locus_recon[lnode] species_map[node] = substree[snode.name] # add implied speciation and delay nodes to gene tree events = phylo.label_events(gene_tree, species_map) added_spec, added_dup, added_delay = add_implied_nodes(gene_tree, substree, species_map, events) # rename internal nodes common.rename_nodes(gene_tree, name_internal) #======================================== # helper functions def walk_up(node): if node.name in coal_tree.nodes: return coal_tree.nodes[node.name] return walk_up(node.parent) def walk_down(node): if node.name in coal_tree.nodes: return coal_tree.nodes[node.name] assert len(node.children) == 1, (node.name, node.children) return walk_down(node.children[0]) #======================================== # find locus map # label loci in locus tree loci = {} next = 1 # keep track of duplication ages (measured as dist from leaf since root dist may differ in coal and locus trees) locus_times = treelib.get_tree_ages(locus_tree) dup_times = {} dup_snodes = {} for lnode in locus_tree.preorder(): if not lnode.parent: # root loci[lnode] = next elif lnode in daughters: # duplication next += 1 loci[lnode] = next dup_times[next] = locus_times[lnode.parent] dup_snodes[next] = locus_recon[lnode.parent] else: # regular node loci[lnode] = loci[lnode.parent] # label loci in gene tree locus_map = {} for node in gene_tree: if node.name in coal_tree.nodes: # node in coal tree cnode = coal_tree.nodes[node.name] lnode = coal_recon[cnode] locus_map[node] = loci[lnode] else: # node not in coal tree, so use either parent or child locus cnode_up = walk_up(node) lnode_up = coal_recon[cnode_up] loci_up = loci[lnode_up] cnode_down = walk_down(node) lnode_down = coal_recon[cnode_down] loci_down = loci[lnode_down] if loci_up == loci_down: # parent and child locus match locus_map[node] = loci_up else: # determine whether to use parent or child locus snode = species_map[node] dup_snode = dup_snodes[loci_down] if (snode.name == dup_snode.name) or (snode.name in dup_snode.descendant_names()): locus_map[node] = loci_down else: locus_map[node] = loci_up #======================================== # find order # find loci that give rise to new loci in each sbranch parent_loci = set() for node in gene_tree: if node.parent: locus = locus_map[node] plocus = locus_map[node.parent] if locus != plocus: snode = species_map[node] parent_loci.add((snode, plocus)) # find order (locus tree and coal tree must use same timescale) order = {} for node in gene_tree: if node.parent: snode = species_map[node] plocus = locus_map[node.parent] if (snode, plocus) in parent_loci: order.setdefault(snode, {}) order[snode].setdefault(plocus, []) order[snode][plocus].append(node) # find coalescent/duplication times (= negative age) and depths coal_times = treelib.get_tree_ages(coal_tree) depths = get_tree_depths(gene_tree, distfunc=lambda node: 1) def get_time(node): if locus_map[node.parent] != locus_map[node]: # duplication return -dup_times[locus_map[node]], depths[node] else: # walk up to the nearest node in the coal tree # if the node was added (due to spec or dup), it has a single child # so it can be placed directly after its parent without affecting the extra lineage count if node.name in coal_tree.nodes: cnode = coal_tree.nodes[node.name] else: cnode = walk_up(node) return -coal_times[cnode], depths[node] # sort by node times # 1) larger age (smaller dist from root) are earlier in sort # 2) if equal dist, then smaller depths are earlier in sort for snode, d in order.iteritems(): for plocus, lst in d.iteritems(): lst.sort(key=get_time) #======================================== # put everything together return gene_tree, LabeledRecon(species_map, locus_map, order)
#gene2species = phylo.read_gene2species(conf.smap) stree = treelib1.read_tree(conf.stree) tree = treelib1.read_tree(conf.tree) if conf.names: snames = dict(util.read_delim(conf.names)) else: snames = None if conf.brecon: brecon = phylo.read_brecon(conf.brecon, tree, stree) elif conf.recon: recon, events = phylo.read_recon_events(conf.recon, tree, stree) brecon = phylo.recon_events2brecon(recon, events) else: gene2species = phylo.read_gene2species(conf.smap) recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) brecon = phylo.recon_events2brecon(recon, events) phylo.add_implied_spec_nodes_brecon(tree, brecon) transsvg.draw_tree(tree, brecon, stree, filename=conf.output, snames=snames)
def draw_tree(tree, labels={}, xscale=100, yscale=20, canvas=None, leafPadding=10, leafFunc=lambda x: str(x.name), labelOffset=None, fontSize=10, labelSize=None, minlen=1, maxlen=util.INF, filename=sys.stdout, rmargin=150, lmargin=10, tmargin=0, bmargin=None, colormap=None, stree=None, layout=None, gene2species=None, lossColor=(0, 0, 1), dupColor=(1, 0, 0), eventSize=4, legendScale=False, autoclose=None, extendRoot=True, labelLeaves=True, drawHoriz=True, nodeSize=0): # set defaults fontRatio = 8. / 11. if labelSize == None: labelSize = .7 * fontSize if labelOffset == None: labelOffset = -1 if bmargin == None: bmargin = yscale if sum(x.dist for x in tree.nodes.values()) == 0: legendScale = False minlen = xscale if colormap == None: for node in tree: node.color = (0, 0, 0) else: colormap(tree) if stree and gene2species: recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) losses = phylo.find_loss(tree, stree, recon) else: events = None losses = None if len(labels) > 0 or (stree and gene2species): drawHoriz = True # layout tree if layout is None: coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen) else: coords = layout xcoords, ycoords = zip(* coords.values()) maxwidth = max(xcoords) maxheight = max(ycoords) + labelOffset # initialize canvas if canvas == None: canvas = svg.Svg(util.open_stream(filename, "w")) width = int(rmargin + maxwidth + lmargin) height = int(tmargin + maxheight + bmargin) canvas.beginSvg(width, height) if autoclose == None: autoclose = True else: if autoclose == None: autoclose = False # draw tree def walk(node): x, y = coords[node] if node.parent: parentx, parenty = coords[node.parent] else: if extendRoot: parentx, parenty = 0, y else: parentx, parenty = x, y # e.g. no branch # draw branch if drawHoriz: canvas.line(parentx, y, x, y, color=node.color) else: canvas.line(parentx, parenty, x, y, color=node.color) # draw branch labels if node.name in labels: branchlen = x - parentx lines = str(labels[node.name]).split("\n") labelwidth = max(map(len, lines)) labellen = min(labelwidth * fontRatio * fontSize, max(int(branchlen-1), 0)) for i, line in enumerate(lines): canvas.text(line, parentx + (branchlen - labellen)/2., y + labelOffset +(-len(lines)+1+i)*(labelSize+1), labelSize) # draw nodes if nodeSize > 0: canvas.circle(x, y, nodeSize, strokeColor=svg.null, fillColor=node.color) # draw leaf labels or recur if node.is_leaf(): if labelLeaves: canvas.text(leafFunc(node), x + leafPadding, y+fontSize/2., fontSize, fillColor=node.color) else: if drawHoriz: # draw vertical part of branch top = coords[node.children[0]][1] bot = coords[node.children[-1]][1] canvas.line(x, top, x, bot, color=node.color) # draw children for child in node.children: walk(child) canvas.beginTransform(("translate", lmargin, tmargin)) walk(tree.root) if stree and gene2species: draw_events(canvas, tree, coords, events, losses, lossColor=lossColor, dupColor=dupColor, size=eventSize) canvas.endTransform() # draw legend if legendScale: if legendScale == True: # automatically choose a scale length = maxwidth / float(xscale) order = math.floor(math.log10(length)) length = 10 ** order drawScale(lmargin, tmargin + maxheight + bmargin - fontSize, length, xscale, fontSize, canvas=canvas) if autoclose: canvas.endSvg() return canvas
def draw_tree(tree, labels={}, xscale=100, yscale=20, canvas=None, leafPadding=10, leafFunc=lambda x: str(x.name), labelOffset=None, fontSize=10, labelSize=None, minlen=1, maxlen=util.INF, filename=sys.stdout, rmargin=150, lmargin=10, tmargin=0, bmargin=None, colormap=None, stree=None, layout=None, gene2species=None, lossColor=(0, 0, 1), dupColor=(1, 0, 0), eventSize=4, legendScale=False, autoclose=None, extendRoot=True, labelLeaves=True, drawHoriz=True, nodeSize=0): # set defaults fontRatio = 8. / 11. if labelSize == None: labelSize = .7 * fontSize if labelOffset == None: labelOffset = -1 if bmargin == None: bmargin = yscale if sum(x.dist for x in tree.nodes.values()) == 0: legendScale = False minlen = xscale if colormap == None: for node in tree: node.color = (0, 0, 0) else: colormap(tree) if stree and gene2species: recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) losses = phylo.find_loss(tree, stree, recon) else: events = None losses = None if len(labels) > 0 or (stree and gene2species): drawHoriz = True # layout tree if layout is None: coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen) else: coords = layout xcoords, ycoords = zip(*coords.values()) maxwidth = max(xcoords) maxheight = max(ycoords) + labelOffset # initialize canvas if canvas == None: canvas = svg.Svg(util.open_stream(filename, "w")) width = int(rmargin + maxwidth + lmargin) height = int(tmargin + maxheight + bmargin) canvas.beginSvg(width, height) if autoclose == None: autoclose = True else: if autoclose == None: autoclose = False # draw tree def walk(node): x, y = coords[node] if node.parent: parentx, parenty = coords[node.parent] else: if extendRoot: parentx, parenty = 0, y else: parentx, parenty = x, y # e.g. no branch # draw branch if drawHoriz: canvas.line(parentx, y, x, y, color=node.color) else: canvas.line(parentx, parenty, x, y, color=node.color) # draw branch labels if node.name in labels: branchlen = x - parentx lines = str(labels[node.name]).split("\n") labelwidth = max(map(len, lines)) labellen = min(labelwidth * fontRatio * fontSize, max(int(branchlen - 1), 0)) for i, line in enumerate(lines): canvas.text( line, parentx + (branchlen - labellen) / 2., y + labelOffset + (-len(lines) + 1 + i) * (labelSize + 1), labelSize) # draw nodes if nodeSize > 0: canvas.circle(x, y, nodeSize, strokeColor=svg.null, fillColor=node.color) # draw leaf labels or recur if node.is_leaf(): if labelLeaves: canvas.text(leafFunc(node), x + leafPadding, y + fontSize / 2., fontSize, fillColor=node.color) else: if drawHoriz: # draw vertical part of branch top = coords[node.children[0]][1] bot = coords[node.children[-1]][1] canvas.line(x, top, x, bot, color=node.color) # draw children for child in node.children: walk(child) canvas.beginTransform(("translate", lmargin, tmargin)) walk(tree.root) if stree and gene2species: draw_events(canvas, tree, coords, events, losses, lossColor=lossColor, dupColor=dupColor, size=eventSize) canvas.endTransform() # draw legend if legendScale: if legendScale == True: # automatically choose a scale length = maxwidth / float(xscale) order = math.floor(math.log10(length)) length = 10**order drawScale(lmargin, tmargin + maxheight + bmargin - fontSize, length, xscale, fontSize, canvas=canvas) if autoclose: canvas.endSvg() return canvas
def dlcoal_recon_old(tree, stree, gene2species, n, duprate, lossrate, pretime=None, premean=None, nsearch=1000, maxdoom=20, nsamples=100, search=phylo.TreeSearchNni): """ Perform reconciliation using the DLCoal model Returns (maxp, maxrecon) where 'maxp' is the probability of the MAP reconciliation 'maxrecon' which further defined as maxrecon = {'coal_recon': coal_recon, 'locus_tree': locus_tree, 'locus_recon': locus_recon, 'locus_events': locus_events, 'daughters': daughters} """ # init coal tree coal_tree = tree # init locus tree as congruent to coal tree # equivalent to assuming no ILS locus_tree = coal_tree.copy() maxp = - util.INF maxrecon = None # init search locus_search = search(locus_tree) for i in xrange(nsearch): # TODO: propose other reconciliations beside LCA locus_tree2 = locus_tree.copy() phylo.recon_root(locus_tree2, stree, gene2species, newCopy=False) locus_recon = phylo.reconcile(locus_tree2, stree, gene2species) locus_events = phylo.label_events(locus_tree2, locus_recon) # propose daughters (TODO) daughters = set() # propose coal recon (TODO: propose others beside LCA) coal_recon = phylo.reconcile(coal_tree, locus_tree2, lambda x: x) # compute recon probability phylo.add_implied_spec_nodes(locus_tree2, stree, locus_recon, locus_events) p = prob_dlcoal_recon_topology(coal_tree, coal_recon, locus_tree2, locus_recon, locus_events, daughters, stree, n, duprate, lossrate, pretime, premean, maxdoom=maxdoom, nsamples=nsamples, add_spec=False) treelib.remove_single_children(locus_tree2) if p > maxp: maxp = p maxrecon = {"coal_recon": coal_recon, "locus_tree": locus_tree2, "locus_recon": locus_recon, "locus_events": locus_events, "daughters": daughters} locus_tree = locus_tree2.copy() locus_search.set_tree(locus_tree) else: locus_search.revert() # perform local rearrangement to locus tree locus_search.propose() return maxp, maxrecon
def test_birthDeathPrior(self): """test birth death prior (simple)""" l = 2 u = .5 maxdoom = 10 def gene2species(gene): return gene[:1].upper() stree = treelib.parse_newick("((A:1,B:1):1,((C:1,D:1):2,E:3):1);") tree = treelib.parse_newick( "((((a1,a2),(a3,a4)),(b1,b2)),(((c1,d1),(c2,d2)),e1));") recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2) # test gene reconciling within species tree tree = treelib.parse_newick( "((((a1,a2),(a3,a4)),(b1,b2)),((c1,d1),(c2,c3)));") recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2) # test gene reconciling within species tree tree = treelib.parseNewick("((a1,b1),c1);") recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, l, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, l, maxdoom) print "prior", p, p2 fequal(p, p2) # test case that occurred during simulation # non parsimonious reconciliation stree = treelib.parse_newick("((A:1,B:1):1,C:2);") tree = treelib.parse_newick("((a1,a2));") recon = { tree.nodes["a1"]: stree.nodes["A"], tree.nodes["a2"]: stree.nodes["A"], tree.nodes["a1"].parent: stree.nodes["A"].parent, tree.root: stree.root } events = { tree.nodes["a1"]: "gene", tree.nodes["a2"]: "gene", tree.nodes["a1"].parent: "dup", tree.root: "spec" } p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom, events=events) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom, events=events) tree.write_newick(oneline=True) print "\nprior", p, p2 fequal(p, p2) # complicated case stree = treelib.parse_newick("((A:1,B:1):1,C:2);") tree = treelib.parse_newick( "((((B2:1.072961,B8:1.072961):0.106756,((((A1:0.427377,(((A3:0.150067,A11:0.150067):0.038521,A2:0.188588):0.121082,A5:0.309671):0.117706):0.352590,A9:0.779967):0.113269,(A8:0.266488,A7:0.266488):0.626747):0.236597,(((B9:0.160640,B7:0.160640):0.098506,B4:0.259146):0.429865,B5:0.689011):0.440822):0.049885):0.714463,(B13:1.086980,((A10:1.000000,((B10:0.408524,(((B3:0.143778,B1:0.143778):0.023788,B6:0.167566):0.058639,B12:0.226204):0.182319):0.232105,B11:0.640629):0.359371):0.082149,(A6:0.277757,A4:0.277757):0.804392):0.004830):0.807201):0.105819,(C3:1.213803,(((C6:0.190132,C4:0.190132):0.011461,C5:0.201593):0.745740,(C1:0.017299,C2:0.017299):0.930034):0.266470):0.786197);" ) recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2) stree = treelib.parse_newick( "(((A:1,B:1):1,(C:1.5,D:1.5):0.5):.5,((E:.2,F:.2):.6):1.9);") tree = treelib.parse_newick( "(((A1:1.000000,B1:1.000000):1.000000,(((C2:0.718949,C1:0.718949):0.168784,C3:0.887733):0.612267,D1:1.500000):0.500000):0.500000,((F8:0.122975,F5:0.122975):6.518970,(((E4:0.200000,F6:0.200000):5.257236,((E3:0.200000,F7:0.200000):4.029009,(E2:0.200000,F1:0.200000):4.029009):1.228227):0.306982,(((E5:0.200000,F3:0.200000):1.068443,(E6:0.200000,F2:0.200000):1.068443):1.094596,(E1:0.200000,F4:0.200000):2.163039):3.401179):0.877727):1.458055);" ) recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2) # test for overflow stree = treelib.parse_newick("((A:1,B:1):1,C:2);") tree = treelib.parse_newick( "((((C24:0.940136,C6:0.940136):0.140529,((((C37:0.374306,(C26:0.054540,C10:0.054540):0.319766):0.046428,(C15:0.009875,C29:0.009875):0.410860):0.112550,(C3:0.213709,C28:0.213709):0.319576):0.034152,C13:0.567437):0.513228):0.545124,((((C36:0.036428,C30:0.036428):1.402769,(((C33:0.038848,C19:0.038848):0.352795,(C9:0.282410,(C1:0.000411,C21:0.000411):0.281998):0.109233):0.452052,((C34:0.108366,C12:0.108366):0.332454,C35:0.440820):0.402875):0.595502):0.039525,((((((C40:0.082790,C23:0.082790):0.003327,(C11:0.021474,C14:0.021474):0.064643):0.031631,C31:0.117748):0.019433,C17:0.137181):0.619636,C39:0.756818):0.139581,(C4:0.160113,(C41:0.116482,C32:0.116482):0.043631):0.736286):0.582323):0.000255,(C5:0.389128,((C25:0.112569,C27:0.112569):0.127253,(C22:0.139232,C18:0.139232):0.100590):0.149306):1.089849):0.146811):0.299534,(C2:1.197153,(C7:0.690311,(C16:0.070431,((C20:0.000466,C8:0.000466):0.060700,C38:0.061165):0.009265):0.619881):0.506842):0.728170);" ) print "leaves", len(tree.leaves()) recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2)
def test_birthDeathPrior(self): """test birth death prior (simple)""" l = 2 u = .5 maxdoom = 10 def gene2species(gene): return gene[:1].upper() stree = treelib.parse_newick("((A:1,B:1):1,((C:1,D:1):2,E:3):1);") tree = treelib.parse_newick("((((a1,a2),(a3,a4)),(b1,b2)),(((c1,d1),(c2,d2)),e1));") recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2) # test gene reconciling within species tree tree = treelib.parse_newick("((((a1,a2),(a3,a4)),(b1,b2)),((c1,d1),(c2,c3)));") recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2) # test gene reconciling within species tree tree = treelib.parseNewick("((a1,b1),c1);") recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, l, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, l, maxdoom) print "prior", p, p2 fequal(p, p2) # test case that occurred during simulation # non parsimonious reconciliation stree = treelib.parse_newick("((A:1,B:1):1,C:2);") tree = treelib.parse_newick("((a1,a2));") recon = {tree.nodes["a1"]: stree.nodes["A"], tree.nodes["a2"]: stree.nodes["A"], tree.nodes["a1"].parent: stree.nodes["A"].parent, tree.root: stree.root} events = {tree.nodes["a1"]: "gene", tree.nodes["a2"]: "gene", tree.nodes["a1"].parent: "dup", tree.root: "spec"} p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom, events=events) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom, events=events) tree.write_newick(oneline=True) print "\nprior", p, p2 fequal(p, p2) # complicated case stree = treelib.parse_newick("((A:1,B:1):1,C:2);") tree = treelib.parse_newick("((((B2:1.072961,B8:1.072961):0.106756,((((A1:0.427377,(((A3:0.150067,A11:0.150067):0.038521,A2:0.188588):0.121082,A5:0.309671):0.117706):0.352590,A9:0.779967):0.113269,(A8:0.266488,A7:0.266488):0.626747):0.236597,(((B9:0.160640,B7:0.160640):0.098506,B4:0.259146):0.429865,B5:0.689011):0.440822):0.049885):0.714463,(B13:1.086980,((A10:1.000000,((B10:0.408524,(((B3:0.143778,B1:0.143778):0.023788,B6:0.167566):0.058639,B12:0.226204):0.182319):0.232105,B11:0.640629):0.359371):0.082149,(A6:0.277757,A4:0.277757):0.804392):0.004830):0.807201):0.105819,(C3:1.213803,(((C6:0.190132,C4:0.190132):0.011461,C5:0.201593):0.745740,(C1:0.017299,C2:0.017299):0.930034):0.266470):0.786197);") recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2) stree = treelib.parse_newick( "(((A:1,B:1):1,(C:1.5,D:1.5):0.5):.5,((E:.2,F:.2):.6):1.9);") tree = treelib.parse_newick("(((A1:1.000000,B1:1.000000):1.000000,(((C2:0.718949,C1:0.718949):0.168784,C3:0.887733):0.612267,D1:1.500000):0.500000):0.500000,((F8:0.122975,F5:0.122975):6.518970,(((E4:0.200000,F6:0.200000):5.257236,((E3:0.200000,F7:0.200000):4.029009,(E2:0.200000,F1:0.200000):4.029009):1.228227):0.306982,(((E5:0.200000,F3:0.200000):1.068443,(E6:0.200000,F2:0.200000):1.068443):1.094596,(E1:0.200000,F4:0.200000):2.163039):3.401179):0.877727):1.458055);") recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2) # test for overflow stree = treelib.parse_newick("((A:1,B:1):1,C:2);") tree = treelib.parse_newick("((((C24:0.940136,C6:0.940136):0.140529,((((C37:0.374306,(C26:0.054540,C10:0.054540):0.319766):0.046428,(C15:0.009875,C29:0.009875):0.410860):0.112550,(C3:0.213709,C28:0.213709):0.319576):0.034152,C13:0.567437):0.513228):0.545124,((((C36:0.036428,C30:0.036428):1.402769,(((C33:0.038848,C19:0.038848):0.352795,(C9:0.282410,(C1:0.000411,C21:0.000411):0.281998):0.109233):0.452052,((C34:0.108366,C12:0.108366):0.332454,C35:0.440820):0.402875):0.595502):0.039525,((((((C40:0.082790,C23:0.082790):0.003327,(C11:0.021474,C14:0.021474):0.064643):0.031631,C31:0.117748):0.019433,C17:0.137181):0.619636,C39:0.756818):0.139581,(C4:0.160113,(C41:0.116482,C32:0.116482):0.043631):0.736286):0.582323):0.000255,(C5:0.389128,((C25:0.112569,C27:0.112569):0.127253,(C22:0.139232,C18:0.139232):0.100590):0.149306):1.089849):0.146811):0.299534,(C2:1.197153,(C7:0.690311,(C16:0.070431,((C20:0.000466,C8:0.000466):0.060700,C38:0.061165):0.009265):0.619881):0.506842):0.728170);") print "leaves", len(tree.leaves()) recon = phylo.reconcile(tree, stree, gene2species) p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom) print "prior", p, p2 fequal(p, p2)