Пример #1
0
    def do_test_coal_sim(self, stree, gene2species, n,
                         ntrees=10000, tabsize=30):
        """Perform a coal gene tree simulation test"""

        tops = []
        lookup = {}

        util.tic("simulating %d trees" % ntrees)
        for i in xrange(ntrees):
            tree, recon = coal.sample_multicoal_tree(stree, n,
                                                     namefunc=lambda x: x)
            tops.append(phylo.hash_tree(tree))
            lookup[tops[-1]] = (tree, recon)
        util.toc()
        
        hist = histtab(tops)

        probs = []
        for row in hist:
            tree, recon= lookup[row["item"]]
            try:
                #treelib.draw_tree_names(tree, maxlen=5)
                treelib.remove_single_children(tree)
                nodes = set(tree.postorder())
                for node, snode in recon.items():
                    if node not in nodes:
                        del recon[node]
                p = coal.prob_coal_recon_topology(tree, recon, stree, n)
            except:
                draw_tree(tree, maxlen=5, minlen=5)
                raise
            probs.append(exp(p))

        return hist, probs
Пример #2
0
def _test_multicoal_tree(stree, n, nsamples):
    """test multicoal_tree"""
    tops = {}

    for i in xrange(nsamples):
        tree, recon = coal.sample_multicoal_tree(stree, n,
                                                 namefunc=lambda x: x)
        top = phylo.hash_tree(tree)
        tops.setdefault(top, [0, tree, recon])[0] += 1

    tab = Table(headers=["top", "simple_top", "percent", "prob"])
    for top, (num, tree, recon) in tops.items():
        tree2 = tree.copy()
        treelib.remove_single_children(tree2)

        print phylo.hash_tree(tree2)
        print phylo.hash_tree(stree)

        tab.add(top=top,
                simple_top=phylo.hash_tree(tree2),
                percent=num/float(nsamples),
                prob=exp(coal.prob_multicoal_recon_topology(
                    tree, recon, stree, n)))
    tab.sort(col="prob", reverse=True)

    return tab, tops
Пример #3
0
    def test_cdf_bmc(self):

        # test cdf mrca BMC
        stree = treelib.parse_newick(
            "((A:1000, B:1000):500, (C:700, D:700):800);")
        n = 1000
        gene_counts = dict.fromkeys(stree.leaf_names(), 1)
        T = 2000

        p = exp(coal.cdf_mrca_bounded_multicoal(gene_counts, T, stree, n))

        nsamples = 5000
        c = 0
        for i in xrange(nsamples):
            tree, recon = coal.sample_multicoal_tree(stree, n)
            if treelib.get_tree_timestamps(tree)[tree.root] < T:
                c += 1
        p2 = c / float(nsamples)

        fequal(p, p2, .05)
Пример #4
0
    def do_test_coal_sim(self,
                         stree,
                         gene2species,
                         n,
                         ntrees=10000,
                         tabsize=30):
        """Perform a coal gene tree simulation test"""

        tops = []
        lookup = {}

        util.tic("simulating %d trees" % ntrees)
        for i in xrange(ntrees):
            tree, recon = coal.sample_multicoal_tree(stree,
                                                     n,
                                                     namefunc=lambda x: x)
            tops.append(phylo.hash_tree(tree))
            lookup[tops[-1]] = (tree, recon)
        util.toc()

        hist = histtab(tops)

        probs = []
        for row in hist:
            tree, recon = lookup[row["item"]]
            try:
                #treelib.draw_tree_names(tree, maxlen=5)
                treelib.remove_single_children(tree)
                nodes = set(tree.postorder())
                for node, snode in recon.items():
                    if node not in nodes:
                        del recon[node]
                p = coal.prob_coal_recon_topology(tree, recon, stree, n)
            except:
                draw_tree(tree, maxlen=5, minlen=5)
                raise
            probs.append(exp(p))

        return hist, probs