示例#1
0
    def next_proposal(self):        
        self.locus_search.propose()
        
        # TODO: propose other reconciliations beside LCA
        locus_tree = self.locus_search.get_tree().copy()
        phylo.recon_root(locus_tree, self.reconer.stree,
                         self.reconer.gene2species,
                         newCopy=False)
        locus_recon = phylo.reconcile(locus_tree, self.reconer.stree,
                                      self.reconer.gene2species)
        locus_events = phylo.label_events(locus_tree, locus_recon)

        # propose daughters (TODO)
        daughters = set()

        # propose coal recon (TODO: propose others beside LCA)
        coal_recon = phylo.reconcile(self.reconer.coal_tree,
                                     locus_tree, lambda x: x)

        recon = {"coal_recon": coal_recon,
                 "locus_tree": locus_tree,
                 "locus_recon": locus_recon,
                 "locus_events": locus_events,
                 "daughters": daughters}
        return recon
示例#2
0
    def compute_cost(self, gtree):
        """Returns the rf cost"""
        recon = phylo.reconcile(gtree, self.stree, self.gene2species)
        
        #rf_cost = recon.size
        #for every node in recon:
        #   for every othernode in recon.dropLeft(index.nodeIn(recon)):
        #       if there exists an inverse for this key-value pair, subtract 2  
        #       from recon cost.
        
        rf_cost = 0
        recon_relevant = recon.copy()
        for node_key, node_value in recon.items():
            if not node_key.name is node_value.name:
                rf_cost += 1
         
        # for node_key, node_value in recon.items():
        #     recon_relevant.pop(node_key, node_value)
        #     for othernode_key, othernode_value in recon_relevant.items():
        #         if (node_key.name is othernode_value.name) and (node_value.name is othernode_key.name):
        #             rf_cost -= 2
        
        return rf_cost
     
#cherry yum diddly dip
示例#3
0
def count_dup_loss_coal_tree(coal_tree, extra, stree, gene2species,
                             implied=True, locus_mpr=True):
    """count dup loss coal"""

    if not locus_mpr:
        raise Exception("not implemented")

    # TODO: use locus_recon and locus_events rather than MPR
    #       (currently, phylo.py reconciliation functions fail for non-MPR)
    locus_tree = extra["locus_tree"]
    locus_recon = phylo.reconcile(locus_tree, stree, gene2species)
    locus_events = phylo.label_events(locus_tree, locus_recon)
    coal_recon = extra["coal_recon"]

    ndup, nloss, nappear = phylo.count_dup_loss_tree(locus_tree, stree, gene2species,
                                                     locus_recon, locus_events)

    # add implied speciation nodes if desired
    # this must be added AFTER counting dups and losses since it affects loss inference
    if implied:
        added = phylo.add_implied_spec_nodes(locus_tree, stree, locus_recon, locus_events)

    # count coals
    ncoal = 0
    counts = coal.count_lineages_per_branch(coal_tree, coal_recon, locus_tree)
    for lnode, (count_bot, count_top) in counts.iteritems():
        n = max(count_top-1, 0)
        locus_recon[lnode].data['coal'] += n
        ncoal += n

    if implied:
        phylo.remove_implied_spec_nodes(locus_tree, added, locus_recon, locus_events)

    return ndup, nloss, ncoal, nappear
示例#4
0
    def optimize_model(self, gtree, stree, gene2species):
        """Optimizes the model"""
        CostModel.optimize_model(self, gtree, stree, gene2species)
        
        # ensure gtree and stree are both rooted and binary
        if not (treelib.is_rooted(gtree) and treelib.is_binary(gtree)):
            raise Exception("gene tree must be rooted and binary")
        if not (treelib.is_rooted(stree) and treelib.is_binary(stree)):
            raise Exception("species tree must be rooted and binary")
        try:
            junk = phylo.reconcile(gtree, stree, gene2species)
        except:
            raise Exception("problem mapping gene tree to species tree")
    
        treeout = StringIO.StringIO()
        if not self.printed:
            import pprint
            treelib.draw_tree(gtree, out=treeout, minlen=5, maxlen=5)
            print "gene tree:\n"
            print(treeout.getvalue())
            
            treelib.draw_tree(self.stree, out=treeout, minlen=5, maxlen=5)
            print "spec tree:\n"
            print(treeout.getvalue())
            pprint.pprint(junk)

            self.printed = True
示例#5
0
    def _recon_lca(self, locus_tree):
        # get locus tree, and LCA (MPR) locus_recon
        locus_recon = phylo.reconcile(locus_tree, self._stree,
                                      self._gene2species)
        locus_events = phylo.label_events(locus_tree, locus_recon)

        # propose LCA (MPR) coal_recon
        coal_recon = phylo.reconcile(self._coal_tree, locus_tree, lambda x: x)

        # propose daughters
        daughters = self._propose_daughters(self._coal_tree, coal_recon,
                                            locus_tree, locus_recon,
                                            locus_events)

        return phyloDLC.Recon(coal_recon, locus_tree, locus_recon,
                              locus_events, daughters)
示例#6
0
    def _recon_lca(self, locus_tree):
        # get locus tree, and LCA (MPR) locus_recon
        locus_recon = phylo.reconcile(locus_tree, self._stree,
                                      self._gene2species)
        locus_events = phylo.label_events(locus_tree, locus_recon)

        # propose LCA (MPR) coal_recon
        coal_recon = phylo.reconcile(self._coal_tree,
                                     locus_tree, lambda x: x)

        # propose daughters
        daughters = self._propose_daughters(
            self._coal_tree, coal_recon,
            locus_tree, locus_recon, locus_events)

        return phyloDLC.Recon(coal_recon, locus_tree, locus_recon, locus_events,
                              daughters)
示例#7
0
    def prescreen(self, tree):
        # tree is coal tree, compute the associated lca reconciliation
        recon = phylo.reconcile(tree, self.locus_tree)

        # calculate the log probability of a reconciled coalescent tree (topology + reconciliation)
        # against the locus tree under the coalescent model
        return reconprob.prob_locus_coal_recon_topology(
            tree, recon, self.locus_tree, self.popsizes, self.daughters)
 def compute_cost(self, gtree):
     """Returns the duplication-loss cost"""
     recon = phylo.reconcile(gtree, self.stree, self.gene2species)
     events = phylo.label_events(gtree, recon)
     cost = 0
     if self.dupcost != 0:
         cost += phylo.count_dup(gtree, events) * self.dupcost
     if self.losscost != 0:
         cost += phylo.count_loss(gtree, self.stree, recon) * self.losscost
     return cost
示例#9
0
 def _compute_duplosscost(self, ltree):
     """Returns dup/loss cost from locus tree to species tree"""
     cost = 0
     if self.dupcost > 0 or self.losscost > 0:
         recon = phylo.reconcile(ltree, self.stree, self.gene2species)
         events = phylo.label_events(ltree, recon)
         if self.dupcost != 0:
             cost += phylo.count_dup(ltree, events) * self.dupcost
         if self.losscost != 0:
             cost += phylo.count_loss(ltree, self.stree, recon) * self.losscost
     return cost
示例#10
0
    def _compute_coalcost(self, gtree, ltree):
        """Returns deep coalescent cost from coalescent tree (gene tree) to locus tree

        Note: uses Zhang (RECOMB 2000) result that C = L - 2*D
        """
        cost = 0
        if self.coalcost > 0:
            recon = phylo.reconcile(gtree, ltree)
            events = phylo.label_events(gtree, recon)
            cost = (phylo.count_loss(gtree, ltree, recon) - 2*phylo.count_dup(gtree, events)) * self.coalcost
        return cost
示例#11
0
    def prescreen(self, tree):

        recon = phylo.reconcile(tree, self.stree, self.gene2species)
        events = phylo.label_events(tree, recon)

        #print tree.root.name
        #treelib.draw_tree_names(tree, maxlen=8)
        
        return duploss.prob_dup_loss(
            tree, self.stree, recon, events,
            self.duprate, self.lossrate)
示例#12
0
    def _recon_lca(self, coal_tree):
        # get coal tree, and LCA coal_recon
        coal_recon = phylo.reconcile(coal_tree, self._locus_tree, lambda x: x)

        # we do not explore the reconciliation space now
        self._coal_recon_enum = phylo.enum_recon(coal_tree,
                                                 self._locus_tree,
                                                 recon=coal_recon,
                                                 depth=self._coal_recon_depth)

        return Recon(coal_tree, coal_recon, self._locus_tree,
                     self._locus_recon, self._locus_events, self._daughters)
 def optimize_model(self, gtree, stree, gene2species):
     """Optimizes the model"""
     CostModel.optimize_model(self, gtree, stree, gene2species)
     
     # ensure gtree and stree are both rooted and binary
     if not (treelib.is_rooted(gtree) and treelib.is_binary(gtree)):
         raise Exception("gene tree must be rooted and binary")
     if not (treelib.is_rooted(stree) and treelib.is_binary(stree)):
         raise Exception("species tree must be rooted and binary")
     try:
         junk = phylo.reconcile(gtree, stree, gene2species)
     except:
         raise Exception("problem mapping gene tree to species tree")
示例#14
0
 def setup_recon(self, recon=None):
     # construct default reconciliation
     if recon == None and self.stree and self.gene2species:
         self.recon = phylo.reconcile(self.tree, self.stree, self.gene2species)
     else:
         self.recon = recon
     
     # construct events
     if self.recon:
         self.events = phylo.label_events(self.tree, self.recon)
         self.losses = phylo.find_loss(self.tree, self.stree, self.recon)
     else:
         self.events = None
         self.losses = None
示例#15
0
    def test_birthDeathPrior_large(self):
        """test birth death prior for large trees"""

        l = 0.000732
        u = 0.000859
        maxdoom = 20

        stree = treelib.read_tree("test/data/fungi.stree")
        gene2species = phylo.read_gene2species("test/data/fungi.smap")
        tree = treelib.read_tree("test/data/fungi/10169/10169.tree")
        recon = phylo.reconcile(tree, stree, gene2species)

        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print p
        self.assert_(p != -INF)
示例#16
0
    def test_birthDeathPrior_large(self):
        """test birth death prior for large trees"""
        
        l = 0.000732 
        u = 0.000859
        maxdoom = 20
        
        stree = treelib.read_tree("test/data/fungi.stree")
        gene2species = phylo.read_gene2species("test/data/fungi.smap")
        tree = treelib.read_tree("test/data/fungi/10169/10169.tree")
        recon = phylo.reconcile(tree, stree, gene2species)

        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print p
        self.assert_(p != -INF)
示例#17
0
    def _recon_lca(self, locus_tree):
        # get locus tree, and LCA locus_recon
        locus_recon = phylo.reconcile(locus_tree, self._stree,
                                      self._gene2species)
        locus_events = phylo.label_events(locus_tree, locus_recon)

        # propose LCA coal_recon
        coal_recon = phylo.reconcile(self._coal_tree,
                                     locus_tree, lambda x: x)

        # propose daughters (TODO)
        daughters = self._propose_daughters(
            self._coal_tree, coal_recon,
            locus_tree, locus_recon, locus_events)


        self._coal_recon_enum = phylo.enum_recon(
            self._coal_tree, locus_tree,
            recon=coal_recon,
            depth=self._coal_recon_depth)


        return Recon(coal_recon, locus_tree, locus_recon, locus_events,
                     daughters)
示例#18
0
    def prescreen(self, tree):
        recon = phylo.reconcile(tree, self.stree, self.gene2species)
        events = phylo.label_events(tree, recon)

        if self.dupcost == 0:
            dupcost = 0
        else:
            ndup = phylo.count_dup(tree, events)
            dupcost = ndup * self.dupcost

        if self.losscost == 0:
            losscost = 0
        else:
            nloss = phylo.count_loss(tree, self.stree, recon)
            losscost = nloss * self.losscost

        return dupcost + losscost
示例#19
0
    def compute_cost(self, gtree):
        """
        Returns -log [P(topology) + P(branch)],
        min cost = min neg log prob = max log prob = max prob
        """
        recon = phylo.reconcile(gtree, self.stree, self.gene2species)
        events = phylo.label_events(gtree, recon)

        # optimize branch lengths
        spidir.find_ml_branch_lengths_hky(gtree, self.align, self.bgfreq, self.kappa,
                                          maxiter=10, parsinit=False)

        branchp = spidir.branch_prior(gtree, self.stree, recon, events,
                                      self.params, self.duprate, self.lossrate, self.pretime)
        topp = spidir.calc_birth_death_prior(gtree, self.stree, recon,
                                             self.duprate, self.lossrate, events)
        return -(topp + branchp)
示例#20
0
    def prescreen(self, tree):
        recon = phylo.reconcile(tree, self.stree, self.gene2species)
        events = phylo.label_events(tree, recon)

        if self.dupcost == 0:
            dupcost = 0
        else:
            ndup = phylo.count_dup(tree, events)
            dupcost = ndup * self.dupcost

        if self.losscost == 0:
            losscost = 0
        else:
            nloss = phylo.count_loss(tree, self.stree, recon)
            losscost = nloss * self.losscost

        return dupcost + losscost
示例#21
0
    def optimize_model(self, gtree, stree, gene2species):
        """Optimizes the model"""
        CostModel.optimize_model(self, gtree, stree, gene2species)

        if self.dupcost < 0:
            self.parser.error("-D/--dupcost must be >= 0")
        if self.losscost < 0:
            self.parser.error("-L/--losscost must be >= 0")

        # ensure gtree and stree are both rooted and binary
        if not (treelib.is_rooted(gtree) and treelib.is_binary(gtree)):
            raise Exception("gene tree must be rooted and binary")
        if not (treelib.is_rooted(stree) and treelib.is_binary(stree)):
            raise Exception("species tree must be rooted and binary")
        try:
            junk = phylo.reconcile(gtree, stree, gene2species)
        except:
            raise Exception("problem mapping gene tree to species tree")
示例#22
0
    def test_birthDeathPriorFull(self):
        """test birth death prior with implied speciation nodes"""
        
        l = 2
        u = .5
        maxdoom = 10

        def gene2species(gene):
            return gene[:1].upper()

        stree = treelib.parse_newick("((A:1,B:1):1,((C:1,D:1):2,E:3):1);")
        tree = treelib.parse_newick("((((a1,a2),(a3,a4)),(b1,b2)),((c1,d1),(c2,c3)));")
        
        # test gene reconciling within species tree
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)
示例#23
0
    def test_birthDeathPriorFull(self):
        """test birth death prior with implied speciation nodes"""

        l = 2
        u = .5
        maxdoom = 10

        def gene2species(gene):
            return gene[:1].upper()

        stree = treelib.parse_newick("((A:1,B:1):1,((C:1,D:1):2,E:3):1);")
        tree = treelib.parse_newick(
            "((((a1,a2),(a3,a4)),(b1,b2)),((c1,d1),(c2,c3)));")

        # test gene reconciling within species tree
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)
示例#24
0
def count_dup_loss_coal_tree(coal_tree,
                             extra,
                             stree,
                             gene2species,
                             implied=True,
                             locus_mpr=True):
    """count dup loss coal"""

    if not locus_mpr:
        raise Exception("not implemented")

    # TODO: use locus_recon and locus_events rather than MPR
    #       (currently, phylo.py reconciliation functions fail for non-MPR)
    locus_tree = extra["locus_tree"]
    locus_recon = phylo.reconcile(locus_tree, stree, gene2species)
    locus_events = phylo.label_events(locus_tree, locus_recon)
    coal_recon = extra["coal_recon"]

    ndup, nloss, nappear = phylo.count_dup_loss_tree(locus_tree, stree,
                                                     gene2species, locus_recon,
                                                     locus_events)

    # add implied speciation nodes if desired
    # this must be added AFTER counting dups and losses since it affects loss inference
    if implied:
        added = phylo.add_implied_spec_nodes(locus_tree, stree, locus_recon,
                                             locus_events)

    # count coals
    ncoal = 0
    counts = coal.count_lineages_per_branch(coal_tree, coal_recon, locus_tree)
    for lnode, (count_bot, count_top) in counts.iteritems():
        n = max(count_top - 1, 0)
        locus_recon[lnode].data['coal'] += n
        ncoal += n

    if implied:
        phylo.remove_implied_spec_nodes(locus_tree, added, locus_recon,
                                        locus_events)

    return ndup, nloss, ncoal, nappear
示例#25
0
    def __init__(self,
                 stree,
                 locus_tree,
                 daughters,
                 gene2species,
                 search=phylo.TreeSearchNni,
                 num_coal_recons=1):
        self._stree = stree
        self._locus_tree = locus_tree
        self._daughters = daughters
        self._coal_search = search(None)

        # locus recon (static) -- propose LCA reconciliation
        self._locus_recon = phylo.reconcile(locus_tree, stree, gene2species)
        self._locus_events = phylo.label_events(locus_tree, self._locus_recon)

        # coal recon search
        self._num_coal_recons = num_coal_recons
        self._i_coal_recons = 1
        self._coal_recon_enum = None
        self._coal_recon_depth = 2
        self._accept_coal = False

        self._recon = None
示例#26
0
def sample_locus_tree_hem(stree, popsize, duprate, lossrate,
                          freq=1.0, freqdup=.05, freqloss=.05,
                          steptime=1e6, keep_extinct=False):
    
    """
    Sample a locus tree with birth-death and hemiplasy
    
    
    Runs a relaxed fixation assumption simulation on a species tree.
    Some simplifying assumptions are made for this version of the simulator:
      1) All branches of the species tree have the same population size
      2) All branches of the species tree have the same duplication rate
      3) All branches of the species tree have the same loss rate
      4) All branches of the species tree have the same duplication effect
      5) All branches of the species tree have the same loss effect
      6) All branches of the species tree have the same time between forced
           frequency changes
      7) There is a single allele at the root of the species tree.

    A duplication/loss effect is the change in frequency for either event.
    Appropriate default values for these effects may need to be determined.
    Furture iterations should remove these assumptions by incorporating
    dictionaries to allow values for each branch.

    parameters:
    stree is the initial species tree; it may be mutated by the simulator
    popsize is the population size (assmpt. 1)
    freq is the allele frequency (assmpt. 7)
    duprate is the duplication rate (in events/myr/indiv(?); assmpt. 2)
    lossrate is the loss rate (in events/myr/indiv(?); assmpt. 3)
    freqdup is the duplication effect (assmpt. 4)
    freqloss is the loss effect (assmpt. 5)
    forcetime is the maximum time between frequency changes (assmpt. 6)
    
    Returns the locus tree, as well as extra information
    including a reconciliation dictionary and an events dictionary.
    """
    
    ## sanity checks before running the simulator; may be removed or relaxed
    treelib.assert_tree(stree)
    assert popsize > 0
    assert 0.0 <= freq and freq <= 1.0
    assert duprate >= 0.0
    assert lossrate >= 0.0
    assert 0.0 <= freqdup and freqdup <= 1.0
    assert 0.0 <= freqloss and freqloss <= 1.0
    assert steptime > 0.0

    
    # special case: no duplications or losses
    if duprate == 0.0 and lossrate == 0.0:
        locus_tree = stree.copy()
        recon = phylo.reconcile(locus_tree, stree, lambda x: x)
        events = phylo.label_events(locus_tree, recon)

        return locus_tree, {"recon": recon,
                            "events": events,
                            "daughters": set()}
                                
    
    def event_is_dup(duprate, fullrate):
        return random.random() <= duprate / fullrate

    
    def sim_walk(gtree, snode, gparent, p,
                 s_walk_time=0.0, remaining_steptime=steptime,
                 daughter=False):
        """
        eventlog is a log of events along the gtree branch.
        Each entry has the form
          (time_on_branch, event_type, frequency, species_node),
          
        where
           0.0 <= time_on_branch <= branch_node.dist

        event_type is one of
           {'extinction', 'frequency', 'speciation', duplication',
            'loss', 'root', 'gene'},
            
        where 'root' is a unique event not added during the sim_walk process

        frequency is the branch frequency at the event time

        species_node is the name of the node of the species tree branch in
        which the event occurs
        """

        # create new node
        gnode = treelib.TreeNode(gtree.new_name())
        gtree.add_child(gparent, gnode)
        gnode.data = {"freq": p,
                      "log": []}
        eventlog = gnode.data["log"]
        g_walk_time = 0.0
        if daughter:
            eventlog.append((0.0, 'daughter', freqdup, snode.name))
            
        
        # grow this branch, determine next event
        event = None
        while True:
            if p <= 0.0:
                event = "extinct"
                break
            
            # determine remaing time
            remaining_s_dist = snode.dist - s_walk_time
            remaining_time = min(remaining_steptime, remaining_s_dist)

            # sample next dup/loss event
            eff_duprate = duprate * p / freqdup
            eff_lossrate = lossrate * p / freqloss
            eff_bothrate = eff_duprate + eff_lossrate            
            event_time = stats.exponentialvariate(eff_bothrate)

            # advance times
            time_delta = min(event_time, remaining_time)
            s_walk_time += time_delta
            g_walk_time += time_delta

            # sample new frequency
            p = coal.sample_freq_CDF(p, popsize, time_delta)

            # determine event
            if event_time < remaining_time:
                # dup/loss occurs
                if event_is_dup(eff_duprate, eff_bothrate):
                    # dup, stop growing
                    event = "dup"
                    break
                else:
                    # loss, continue growing
                    event = "loss"
                    
            else:
                if remaining_s_dist < remaining_steptime:
                    # we are at a speciation, stop growing
                    event = "spec"
                    break

            # process step
            if event == "loss":
                # LOSS EVENT
                p = max(p - freqloss, 0.0)
                remaining_steptime -= time_delta
                eventlog.append((g_walk_time, 'loss', p, snode.name))
            else:
                # NEXT TIME STEP
                remaining_steptime = steptime
                eventlog.append((g_walk_time, 'frequency', p, snode.name))
                

        # process event
        if event == "extinct":
            # EXTINCTION EVENT (p <= 0)
            gnode.dist = g_walk_time
            gnode.data['freq'] = 0.0
            eventlog.append((g_walk_time, 'extinction', 0.0, snode.name))

        
        elif event == "spec":
            # SPECIATION EVENT
            gnode.dist = g_walk_time
            gnode.data['freq'] = p
                        
            # add speciation event to event log and
            if snode.is_leaf():
                eventlog.append((g_walk_time, 'gene', p, snode.name))
            else:
                eventlog.append((g_walk_time, 'speciation', p, snode.name))
                for schild in snode.children:
                    sim_walk(gtree, schild, gnode, p)


        elif event == "dup":
            # DUPLICATION EVENT
            gnode.dist = g_walk_time
            gnode.data['freq'] = p
            eventlog.append((g_walk_time, 'duplication', p, snode.name))

            # recurse on mother
            sim_walk(gtree, snode, gnode, p, 
                     s_walk_time=s_walk_time, 
                     remaining_steptime=remaining_steptime)

            # recurse on daughter
            sim_walk(gtree, snode, gnode, freqdup, 
                     s_walk_time=s_walk_time, 
                     remaining_steptime=remaining_steptime,
                     daughter=True)

        else:
            raise Exception("unknown event '%s'" % event)
    
    
    # create new gene tree and simulate its evolution
    gtree = treelib.Tree()
    gtree.make_root()
    gtree.root.dist = 0.0
    gtree.root.data['freq'] = freq
    gtree.root.data['log'] = [(0.0, 'speciation', freq, stree.root.name)]

    # simulate locus tree
    sim_walk(gtree, stree.root.children[0], gtree.root, freq)
    sim_walk(gtree, stree.root.children[1], gtree.root, freq)
    
    
    # remove dead branches and single children
    extant_leaves = [leaf.name for leaf in gtree.leaves()
                     if leaf.data['freq'] > 0.0]
    extinctions = [leaf for leaf in gtree.leaves()
                   if leaf.data['freq'] == 0.0]

    if keep_extinct:
        full_gtree = gtree.copy()
        # do deep copy of data
        for node in full_gtree:
            node2 = gtree.nodes[node.name]
            for key, val in node2.data.items():
                node.data[key] = copy.copy(val)
        
    treelib.subtree_by_leaf_names(gtree, extant_leaves, keep_single=True)
    remove_single_children(gtree)

    # determine extra information (recon, events, daughters)
    extras = generate_extras(stree, gtree)

    if keep_extinct:
        extras["full_locus_tree"] = full_gtree
    
    return gtree, extras
示例#27
0
#=============================================================================
# parse options

conf, args = o.parse_args()

#gene2species = phylo.read_gene2species(conf.smap)
stree = treelib1.read_tree(conf.stree)
tree = treelib1.read_tree(conf.tree)
if conf.names:
    snames = dict(util.read_delim(conf.names))
else:
    snames = None

if conf.brecon:

    brecon = phylo.read_brecon(conf.brecon, tree, stree)

elif conf.recon:
    recon, events = phylo.read_recon_events(conf.recon, tree, stree)
    brecon = phylo.recon_events2brecon(recon, events)

else:
    gene2species = phylo.read_gene2species(conf.smap)
    recon = phylo.reconcile(tree, stree, gene2species)
    events = phylo.label_events(tree, recon)
    brecon = phylo.recon_events2brecon(recon, events)

phylo.add_implied_spec_nodes_brecon(tree, brecon)

transsvg.draw_tree(tree, brecon, stree, filename=conf.output, snames=snames)
示例#28
0
def recon_to_labeledrecon(coal_tree, recon, stree, gene2species,
                          name_internal="n", locus_mpr=True):
    """Convert from DLCoal to DLCpar reconciliation model

    If locus_mpr is set (default), use MPR from locus_tree to stree.
    """

    gene_tree = coal_tree.copy()
    coal_recon = recon.coal_recon
    locus_tree = recon.locus_tree
    if not locus_mpr:
        locus_recon = recon.locus_recon
        daughters = recon.daughters
    else:
        locus_recon = phylo.reconcile(locus_tree, stree, gene2species)
        locus_events = phylo.label_events(locus_tree, locus_recon)
        daughters = filter(lambda node: locus_events[node.parent] == "dup", recon.daughters)

    #========================================
    # find species map

    # find species tree subtree
    substree = treelib.subtree(stree, locus_recon[coal_recon[coal_tree.root]])

    # find species map
    species_map = {}
    for node in gene_tree:
        cnode = coal_tree.nodes[node.name]
        lnode = coal_recon[cnode]
        snode = locus_recon[lnode]
        species_map[node] = substree[snode.name]

    # add implied speciation and delay nodes to gene tree
    events = phylo.label_events(gene_tree, species_map)
    added_spec, added_dup, added_delay = add_implied_nodes(gene_tree, substree, species_map, events)

    # rename internal nodes
    common.rename_nodes(gene_tree, name_internal)

    #========================================
    # helper functions

    def walk_up(node):
        if node.name in coal_tree.nodes:
            return coal_tree.nodes[node.name]
        return walk_up(node.parent)

    def walk_down(node):
        if node.name in coal_tree.nodes:
            return coal_tree.nodes[node.name]
        assert len(node.children) == 1, (node.name, node.children)
        return walk_down(node.children[0])

    #========================================
    # find locus map

    # label loci in locus tree
    loci = {}
    next = 1
    # keep track of duplication ages (measured as dist from leaf since root dist may differ in coal and locus trees)
    locus_times = treelib.get_tree_ages(locus_tree)
    dup_times = {}
    dup_snodes = {}
    for lnode in locus_tree.preorder():
        if not lnode.parent:            # root
            loci[lnode] = next
        elif lnode in daughters:        # duplication
            next += 1
            loci[lnode] = next
            dup_times[next] = locus_times[lnode.parent]
            dup_snodes[next] = locus_recon[lnode.parent]
        else:                           # regular node
            loci[lnode] = loci[lnode.parent]

    # label loci in gene tree
    locus_map = {}
    for node in gene_tree:
        if node.name in coal_tree.nodes:
            # node in coal tree
            cnode = coal_tree.nodes[node.name]
            lnode = coal_recon[cnode]
            locus_map[node] = loci[lnode]
        else:
            # node not in coal tree, so use either parent or child locus
            cnode_up = walk_up(node)
            lnode_up = coal_recon[cnode_up]
            loci_up = loci[lnode_up]

            cnode_down = walk_down(node)
            lnode_down = coal_recon[cnode_down]
            loci_down = loci[lnode_down]

            if loci_up == loci_down:
                # parent and child locus match
                locus_map[node] = loci_up
            else:
                # determine whether to use parent or child locus
                snode = species_map[node]
                dup_snode = dup_snodes[loci_down]
                if (snode.name == dup_snode.name) or (snode.name in dup_snode.descendant_names()):
                    locus_map[node] = loci_down
                else:
                    locus_map[node] = loci_up

    #========================================
    # find order

    # find loci that give rise to new loci in each sbranch
    parent_loci = set()
    for node in gene_tree:
        if node.parent:
            locus = locus_map[node]
            plocus = locus_map[node.parent]

            if locus != plocus:
                snode = species_map[node]
                parent_loci.add((snode, plocus))

    # find order (locus tree and coal tree must use same timescale)
    order = {}
    for node in gene_tree:
        if node.parent:
            snode = species_map[node]
            plocus = locus_map[node.parent]

            if (snode, plocus) in parent_loci:
                order.setdefault(snode, {})
                order[snode].setdefault(plocus, [])
                order[snode][plocus].append(node)

    # find coalescent/duplication times (= negative age) and depths
    coal_times = treelib.get_tree_ages(coal_tree)
    depths = get_tree_depths(gene_tree, distfunc=lambda node: 1)
    def get_time(node):
        if locus_map[node.parent] != locus_map[node]:
            # duplication
            return -dup_times[locus_map[node]], depths[node]
        else:
            # walk up to the nearest node in the coal tree
            # if the node was added (due to spec or dup), it has a single child
            # so it can be placed directly after its parent without affecting the extra lineage count
            if node.name in coal_tree.nodes:
                cnode = coal_tree.nodes[node.name]
            else:
                cnode = walk_up(node)
            return -coal_times[cnode], depths[node]

    # sort by node times
    # 1) larger age (smaller dist from root) are earlier in sort
    # 2) if equal dist, then smaller depths are earlier in sort
    for snode, d in order.iteritems():
        for plocus, lst in d.iteritems():
            lst.sort(key=get_time)

    #========================================
    # put everything together

    return gene_tree, LabeledRecon(species_map, locus_map, order)
#gene2species = phylo.read_gene2species(conf.smap)
stree = treelib1.read_tree(conf.stree)
tree = treelib1.read_tree(conf.tree)
if conf.names:
    snames = dict(util.read_delim(conf.names))
else:
    snames = None

if conf.brecon:

    brecon = phylo.read_brecon(conf.brecon, tree, stree)



elif conf.recon:
    recon, events = phylo.read_recon_events(conf.recon, tree, stree)
    brecon = phylo.recon_events2brecon(recon, events)
    
else:
    gene2species = phylo.read_gene2species(conf.smap)
    recon = phylo.reconcile(tree, stree, gene2species)
    events = phylo.label_events(tree, recon)
    brecon = phylo.recon_events2brecon(recon, events)
    
phylo.add_implied_spec_nodes_brecon(tree, brecon)

transsvg.draw_tree(tree, brecon, stree, filename=conf.output,
                   snames=snames)


示例#30
0
def draw_tree(tree, labels={}, xscale=100, yscale=20, canvas=None,
              leafPadding=10, leafFunc=lambda x: str(x.name),
              labelOffset=None, fontSize=10, labelSize=None,
              minlen=1, maxlen=util.INF, filename=sys.stdout,
              rmargin=150, lmargin=10, tmargin=0, bmargin=None,
              colormap=None,
              stree=None,
              layout=None,
              gene2species=None,
              lossColor=(0, 0, 1),
              dupColor=(1, 0, 0),
              eventSize=4,
              legendScale=False, autoclose=None,
              extendRoot=True, labelLeaves=True, drawHoriz=True, nodeSize=0):
    
    # set defaults
    fontRatio = 8. / 11.
    
    if labelSize == None:
        labelSize = .7 * fontSize
    
    if labelOffset == None:
        labelOffset = -1
    
    if bmargin == None:
        bmargin = yscale
    
    if sum(x.dist for x in tree.nodes.values()) == 0:
        legendScale = False
        minlen = xscale
    
    if colormap == None:
        for node in tree:
            node.color = (0, 0, 0)
    else:
        colormap(tree)
    
    if stree and gene2species:
        recon = phylo.reconcile(tree, stree, gene2species)
        events = phylo.label_events(tree, recon)
        losses = phylo.find_loss(tree, stree, recon)
    else:
        events = None
        losses = None

    if len(labels) > 0 or (stree and gene2species):
        drawHoriz = True
    
    # layout tree
    if layout is None:
        coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen)
    else:
        coords = layout
    
    xcoords, ycoords = zip(* coords.values())
    maxwidth = max(xcoords)
    maxheight = max(ycoords) + labelOffset
    
    
    # initialize canvas
    if canvas == None:
        canvas = svg.Svg(util.open_stream(filename, "w"))
        width = int(rmargin + maxwidth + lmargin)
        height = int(tmargin + maxheight + bmargin)
        
        canvas.beginSvg(width, height)
        
        if autoclose == None:
            autoclose = True
    else:
        if autoclose == None:
            autoclose = False
    
    
    # draw tree
    def walk(node):
        x, y = coords[node]
        if node.parent:
            parentx, parenty = coords[node.parent]
        else:
            if extendRoot:
                parentx, parenty = 0, y
            else:
                parentx, parenty = x, y     # e.g. no branch
        
        # draw branch
        if drawHoriz:
            canvas.line(parentx, y, x, y, color=node.color)
        else:
            canvas.line(parentx, parenty, x, y, color=node.color)

        # draw branch labels
        if node.name in labels:
            branchlen = x - parentx
            lines = str(labels[node.name]).split("\n")
            labelwidth = max(map(len, lines))
            labellen = min(labelwidth * fontRatio * fontSize, 
                           max(int(branchlen-1), 0))
            
            for i, line in enumerate(lines):
                canvas.text(line,
                            parentx + (branchlen - labellen)/2., 
                            y + labelOffset 
                            +(-len(lines)+1+i)*(labelSize+1),
                            labelSize)

        # draw nodes
        if nodeSize > 0:
            canvas.circle(x, y, nodeSize, strokeColor=svg.null, fillColor=node.color)

        # draw leaf labels or recur
        if node.is_leaf():
            if labelLeaves:
                canvas.text(leafFunc(node), 
                            x + leafPadding, y+fontSize/2., fontSize,
                            fillColor=node.color)
        else:
            if drawHoriz:
                # draw vertical part of branch
                top = coords[node.children[0]][1]
                bot = coords[node.children[-1]][1]
                canvas.line(x, top, x, bot, color=node.color)
                
            # draw children
            for child in node.children:
                walk(child)
    
    canvas.beginTransform(("translate", lmargin, tmargin))
    walk(tree.root)
        
    if stree and gene2species:
        draw_events(canvas, tree, coords, events, losses,
                    lossColor=lossColor,
                    dupColor=dupColor,
                    size=eventSize)
    canvas.endTransform()
    
    # draw legend
    if legendScale:
        if legendScale == True:
            # automatically choose a scale
            length = maxwidth / float(xscale)
            order = math.floor(math.log10(length))
            length = 10 ** order
    
        drawScale(lmargin, tmargin + maxheight + bmargin - fontSize, 
                  length, xscale, fontSize, canvas=canvas)
    
    if autoclose:
        canvas.endSvg()
    
    return canvas
示例#31
0
def draw_tree(tree,
              labels={},
              xscale=100,
              yscale=20,
              canvas=None,
              leafPadding=10,
              leafFunc=lambda x: str(x.name),
              labelOffset=None,
              fontSize=10,
              labelSize=None,
              minlen=1,
              maxlen=util.INF,
              filename=sys.stdout,
              rmargin=150,
              lmargin=10,
              tmargin=0,
              bmargin=None,
              colormap=None,
              stree=None,
              layout=None,
              gene2species=None,
              lossColor=(0, 0, 1),
              dupColor=(1, 0, 0),
              eventSize=4,
              legendScale=False,
              autoclose=None,
              extendRoot=True,
              labelLeaves=True,
              drawHoriz=True,
              nodeSize=0):

    # set defaults
    fontRatio = 8. / 11.

    if labelSize == None:
        labelSize = .7 * fontSize

    if labelOffset == None:
        labelOffset = -1

    if bmargin == None:
        bmargin = yscale

    if sum(x.dist for x in tree.nodes.values()) == 0:
        legendScale = False
        minlen = xscale

    if colormap == None:
        for node in tree:
            node.color = (0, 0, 0)
    else:
        colormap(tree)

    if stree and gene2species:
        recon = phylo.reconcile(tree, stree, gene2species)
        events = phylo.label_events(tree, recon)
        losses = phylo.find_loss(tree, stree, recon)
    else:
        events = None
        losses = None

    if len(labels) > 0 or (stree and gene2species):
        drawHoriz = True

    # layout tree
    if layout is None:
        coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen)
    else:
        coords = layout

    xcoords, ycoords = zip(*coords.values())
    maxwidth = max(xcoords)
    maxheight = max(ycoords) + labelOffset

    # initialize canvas
    if canvas == None:
        canvas = svg.Svg(util.open_stream(filename, "w"))
        width = int(rmargin + maxwidth + lmargin)
        height = int(tmargin + maxheight + bmargin)

        canvas.beginSvg(width, height)

        if autoclose == None:
            autoclose = True
    else:
        if autoclose == None:
            autoclose = False

    # draw tree
    def walk(node):
        x, y = coords[node]
        if node.parent:
            parentx, parenty = coords[node.parent]
        else:
            if extendRoot:
                parentx, parenty = 0, y
            else:
                parentx, parenty = x, y  # e.g. no branch

        # draw branch
        if drawHoriz:
            canvas.line(parentx, y, x, y, color=node.color)
        else:
            canvas.line(parentx, parenty, x, y, color=node.color)

        # draw branch labels
        if node.name in labels:
            branchlen = x - parentx
            lines = str(labels[node.name]).split("\n")
            labelwidth = max(map(len, lines))
            labellen = min(labelwidth * fontRatio * fontSize,
                           max(int(branchlen - 1), 0))

            for i, line in enumerate(lines):
                canvas.text(
                    line, parentx + (branchlen - labellen) / 2.,
                    y + labelOffset + (-len(lines) + 1 + i) * (labelSize + 1),
                    labelSize)

        # draw nodes
        if nodeSize > 0:
            canvas.circle(x,
                          y,
                          nodeSize,
                          strokeColor=svg.null,
                          fillColor=node.color)

        # draw leaf labels or recur
        if node.is_leaf():
            if labelLeaves:
                canvas.text(leafFunc(node),
                            x + leafPadding,
                            y + fontSize / 2.,
                            fontSize,
                            fillColor=node.color)
        else:
            if drawHoriz:
                # draw vertical part of branch
                top = coords[node.children[0]][1]
                bot = coords[node.children[-1]][1]
                canvas.line(x, top, x, bot, color=node.color)

            # draw children
            for child in node.children:
                walk(child)

    canvas.beginTransform(("translate", lmargin, tmargin))
    walk(tree.root)

    if stree and gene2species:
        draw_events(canvas,
                    tree,
                    coords,
                    events,
                    losses,
                    lossColor=lossColor,
                    dupColor=dupColor,
                    size=eventSize)
    canvas.endTransform()

    # draw legend
    if legendScale:
        if legendScale == True:
            # automatically choose a scale
            length = maxwidth / float(xscale)
            order = math.floor(math.log10(length))
            length = 10**order

        drawScale(lmargin,
                  tmargin + maxheight + bmargin - fontSize,
                  length,
                  xscale,
                  fontSize,
                  canvas=canvas)

    if autoclose:
        canvas.endSvg()

    return canvas
示例#32
0
def dlcoal_recon_old(tree, stree, gene2species,
                 n, duprate, lossrate,
                 pretime=None, premean=None,
                 nsearch=1000,
                 maxdoom=20, nsamples=100,
                 search=phylo.TreeSearchNni):
    """
    Perform reconciliation using the DLCoal model

    Returns (maxp, maxrecon) where 'maxp' is the probability of the
    MAP reconciliation 'maxrecon' which further defined as

    maxrecon = {'coal_recon': coal_recon,
                'locus_tree': locus_tree,
                'locus_recon': locus_recon,
                'locus_events': locus_events,
                'daughters': daughters}
    
    """

    # init coal tree
    coal_tree = tree

    # init locus tree as congruent to coal tree
    # equivalent to assuming no ILS
    locus_tree = coal_tree.copy()

    maxp = - util.INF
    maxrecon = None

    # init search
    locus_search = search(locus_tree)

    for i in xrange(nsearch):       
        # TODO: propose other reconciliations beside LCA
        locus_tree2 = locus_tree.copy()
        phylo.recon_root(locus_tree2, stree, gene2species, newCopy=False)
        locus_recon = phylo.reconcile(locus_tree2, stree, gene2species)
        locus_events = phylo.label_events(locus_tree2, locus_recon)

        # propose daughters (TODO)
        daughters = set()

        # propose coal recon (TODO: propose others beside LCA)
        coal_recon = phylo.reconcile(coal_tree, locus_tree2, lambda x: x)

        # compute recon probability
        phylo.add_implied_spec_nodes(locus_tree2, stree,
                                     locus_recon, locus_events)
        p = prob_dlcoal_recon_topology(coal_tree, coal_recon,
                                       locus_tree2, locus_recon, locus_events,
                                       daughters,
                                       stree, n, duprate, lossrate,
                                       pretime, premean,
                                       maxdoom=maxdoom, nsamples=nsamples,
                                       add_spec=False)
        treelib.remove_single_children(locus_tree2)

        if p > maxp:
            maxp = p
            maxrecon = {"coal_recon": coal_recon,
                        "locus_tree": locus_tree2,
                        "locus_recon": locus_recon,
                        "locus_events": locus_events,
                        "daughters": daughters}
            locus_tree = locus_tree2.copy()
            locus_search.set_tree(locus_tree)
        else:
            locus_search.revert()

        # perform local rearrangement to locus tree
        locus_search.propose()




    return maxp, maxrecon
示例#33
0
    def test_birthDeathPrior(self):
        """test birth death prior (simple)"""

        l = 2
        u = .5
        maxdoom = 10

        def gene2species(gene):
            return gene[:1].upper()

        stree = treelib.parse_newick("((A:1,B:1):1,((C:1,D:1):2,E:3):1);")

        tree = treelib.parse_newick(
            "((((a1,a2),(a3,a4)),(b1,b2)),(((c1,d1),(c2,d2)),e1));")
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)

        # test gene reconciling within species tree
        tree = treelib.parse_newick(
            "((((a1,a2),(a3,a4)),(b1,b2)),((c1,d1),(c2,c3)));")
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)

        # test gene reconciling within species tree
        tree = treelib.parseNewick("((a1,b1),c1);")
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, l, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, l, maxdoom)
        print "prior", p, p2
        fequal(p, p2)

        # test case that occurred during simulation
        # non parsimonious reconciliation
        stree = treelib.parse_newick("((A:1,B:1):1,C:2);")
        tree = treelib.parse_newick("((a1,a2));")
        recon = {
            tree.nodes["a1"]: stree.nodes["A"],
            tree.nodes["a2"]: stree.nodes["A"],
            tree.nodes["a1"].parent: stree.nodes["A"].parent,
            tree.root: stree.root
        }
        events = {
            tree.nodes["a1"]: "gene",
            tree.nodes["a2"]: "gene",
            tree.nodes["a1"].parent: "dup",
            tree.root: "spec"
        }
        p = c_calcBirthDeathPrior(tree,
                                  stree,
                                  recon,
                                  l,
                                  u,
                                  maxdoom,
                                  events=events)
        p2 = calcBirthDeathPrior(tree,
                                 stree,
                                 recon,
                                 l,
                                 u,
                                 maxdoom,
                                 events=events)
        tree.write_newick(oneline=True)
        print "\nprior", p, p2
        fequal(p, p2)

        # complicated case
        stree = treelib.parse_newick("((A:1,B:1):1,C:2);")
        tree = treelib.parse_newick(
            "((((B2:1.072961,B8:1.072961):0.106756,((((A1:0.427377,(((A3:0.150067,A11:0.150067):0.038521,A2:0.188588):0.121082,A5:0.309671):0.117706):0.352590,A9:0.779967):0.113269,(A8:0.266488,A7:0.266488):0.626747):0.236597,(((B9:0.160640,B7:0.160640):0.098506,B4:0.259146):0.429865,B5:0.689011):0.440822):0.049885):0.714463,(B13:1.086980,((A10:1.000000,((B10:0.408524,(((B3:0.143778,B1:0.143778):0.023788,B6:0.167566):0.058639,B12:0.226204):0.182319):0.232105,B11:0.640629):0.359371):0.082149,(A6:0.277757,A4:0.277757):0.804392):0.004830):0.807201):0.105819,(C3:1.213803,(((C6:0.190132,C4:0.190132):0.011461,C5:0.201593):0.745740,(C1:0.017299,C2:0.017299):0.930034):0.266470):0.786197);"
        )
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)

        stree = treelib.parse_newick(
            "(((A:1,B:1):1,(C:1.5,D:1.5):0.5):.5,((E:.2,F:.2):.6):1.9);")
        tree = treelib.parse_newick(
            "(((A1:1.000000,B1:1.000000):1.000000,(((C2:0.718949,C1:0.718949):0.168784,C3:0.887733):0.612267,D1:1.500000):0.500000):0.500000,((F8:0.122975,F5:0.122975):6.518970,(((E4:0.200000,F6:0.200000):5.257236,((E3:0.200000,F7:0.200000):4.029009,(E2:0.200000,F1:0.200000):4.029009):1.228227):0.306982,(((E5:0.200000,F3:0.200000):1.068443,(E6:0.200000,F2:0.200000):1.068443):1.094596,(E1:0.200000,F4:0.200000):2.163039):3.401179):0.877727):1.458055);"
        )

        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)

        # test for overflow
        stree = treelib.parse_newick("((A:1,B:1):1,C:2);")
        tree = treelib.parse_newick(
            "((((C24:0.940136,C6:0.940136):0.140529,((((C37:0.374306,(C26:0.054540,C10:0.054540):0.319766):0.046428,(C15:0.009875,C29:0.009875):0.410860):0.112550,(C3:0.213709,C28:0.213709):0.319576):0.034152,C13:0.567437):0.513228):0.545124,((((C36:0.036428,C30:0.036428):1.402769,(((C33:0.038848,C19:0.038848):0.352795,(C9:0.282410,(C1:0.000411,C21:0.000411):0.281998):0.109233):0.452052,((C34:0.108366,C12:0.108366):0.332454,C35:0.440820):0.402875):0.595502):0.039525,((((((C40:0.082790,C23:0.082790):0.003327,(C11:0.021474,C14:0.021474):0.064643):0.031631,C31:0.117748):0.019433,C17:0.137181):0.619636,C39:0.756818):0.139581,(C4:0.160113,(C41:0.116482,C32:0.116482):0.043631):0.736286):0.582323):0.000255,(C5:0.389128,((C25:0.112569,C27:0.112569):0.127253,(C22:0.139232,C18:0.139232):0.100590):0.149306):1.089849):0.146811):0.299534,(C2:1.197153,(C7:0.690311,(C16:0.070431,((C20:0.000466,C8:0.000466):0.060700,C38:0.061165):0.009265):0.619881):0.506842):0.728170);"
        )
        print "leaves", len(tree.leaves())
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)
示例#34
0
    def test_birthDeathPrior(self):
        """test birth death prior (simple)"""
        
        l = 2
        u = .5
        maxdoom = 10

        def gene2species(gene):
            return gene[:1].upper()


        
        stree = treelib.parse_newick("((A:1,B:1):1,((C:1,D:1):2,E:3):1);")
        
        tree = treelib.parse_newick("((((a1,a2),(a3,a4)),(b1,b2)),(((c1,d1),(c2,d2)),e1));")
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)

        # test gene reconciling within species tree
        tree = treelib.parse_newick("((((a1,a2),(a3,a4)),(b1,b2)),((c1,d1),(c2,c3)));")
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)

        # test gene reconciling within species tree
        tree = treelib.parseNewick("((a1,b1),c1);")
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, l, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, l, maxdoom)
        print "prior", p, p2
        fequal(p, p2)


        # test case that occurred during simulation
        # non parsimonious reconciliation
        stree = treelib.parse_newick("((A:1,B:1):1,C:2);")
        tree = treelib.parse_newick("((a1,a2));")
        recon = {tree.nodes["a1"]: stree.nodes["A"],
                 tree.nodes["a2"]: stree.nodes["A"],
                 tree.nodes["a1"].parent: stree.nodes["A"].parent,
                 tree.root: stree.root}
        events = {tree.nodes["a1"]: "gene",
                 tree.nodes["a2"]: "gene",
                 tree.nodes["a1"].parent: "dup",
                 tree.root: "spec"}
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom,
                                  events=events)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom,
                                 events=events)
        tree.write_newick(oneline=True)
        print "\nprior", p, p2
        fequal(p, p2)

        # complicated case
        stree = treelib.parse_newick("((A:1,B:1):1,C:2);")
        tree = treelib.parse_newick("((((B2:1.072961,B8:1.072961):0.106756,((((A1:0.427377,(((A3:0.150067,A11:0.150067):0.038521,A2:0.188588):0.121082,A5:0.309671):0.117706):0.352590,A9:0.779967):0.113269,(A8:0.266488,A7:0.266488):0.626747):0.236597,(((B9:0.160640,B7:0.160640):0.098506,B4:0.259146):0.429865,B5:0.689011):0.440822):0.049885):0.714463,(B13:1.086980,((A10:1.000000,((B10:0.408524,(((B3:0.143778,B1:0.143778):0.023788,B6:0.167566):0.058639,B12:0.226204):0.182319):0.232105,B11:0.640629):0.359371):0.082149,(A6:0.277757,A4:0.277757):0.804392):0.004830):0.807201):0.105819,(C3:1.213803,(((C6:0.190132,C4:0.190132):0.011461,C5:0.201593):0.745740,(C1:0.017299,C2:0.017299):0.930034):0.266470):0.786197);")
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)

        stree = treelib.parse_newick(
                "(((A:1,B:1):1,(C:1.5,D:1.5):0.5):.5,((E:.2,F:.2):.6):1.9);")
        tree = treelib.parse_newick("(((A1:1.000000,B1:1.000000):1.000000,(((C2:0.718949,C1:0.718949):0.168784,C3:0.887733):0.612267,D1:1.500000):0.500000):0.500000,((F8:0.122975,F5:0.122975):6.518970,(((E4:0.200000,F6:0.200000):5.257236,((E3:0.200000,F7:0.200000):4.029009,(E2:0.200000,F1:0.200000):4.029009):1.228227):0.306982,(((E5:0.200000,F3:0.200000):1.068443,(E6:0.200000,F2:0.200000):1.068443):1.094596,(E1:0.200000,F4:0.200000):2.163039):3.401179):0.877727):1.458055);")
        
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)


        # test for overflow
        stree = treelib.parse_newick("((A:1,B:1):1,C:2);")
        tree = treelib.parse_newick("((((C24:0.940136,C6:0.940136):0.140529,((((C37:0.374306,(C26:0.054540,C10:0.054540):0.319766):0.046428,(C15:0.009875,C29:0.009875):0.410860):0.112550,(C3:0.213709,C28:0.213709):0.319576):0.034152,C13:0.567437):0.513228):0.545124,((((C36:0.036428,C30:0.036428):1.402769,(((C33:0.038848,C19:0.038848):0.352795,(C9:0.282410,(C1:0.000411,C21:0.000411):0.281998):0.109233):0.452052,((C34:0.108366,C12:0.108366):0.332454,C35:0.440820):0.402875):0.595502):0.039525,((((((C40:0.082790,C23:0.082790):0.003327,(C11:0.021474,C14:0.021474):0.064643):0.031631,C31:0.117748):0.019433,C17:0.137181):0.619636,C39:0.756818):0.139581,(C4:0.160113,(C41:0.116482,C32:0.116482):0.043631):0.736286):0.582323):0.000255,(C5:0.389128,((C25:0.112569,C27:0.112569):0.127253,(C22:0.139232,C18:0.139232):0.100590):0.149306):1.089849):0.146811):0.299534,(C2:1.197153,(C7:0.690311,(C16:0.070431,((C20:0.000466,C8:0.000466):0.060700,C38:0.061165):0.009265):0.619881):0.506842):0.728170);")
        print "leaves", len(tree.leaves())
        recon = phylo.reconcile(tree, stree, gene2species)
        p = c_calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        p2 = calcBirthDeathPrior(tree, stree, recon, l, u, maxdoom)
        print "prior", p, p2
        fequal(p, p2)