Пример #1
0
    def test_prob_coal2(self):

        outdir = 'test/tmp/test_coal/Coal_test_prob_coal2/'
        make_clean_dir(outdir)

        k = 2
        n = 1000
        p = Gnuplot()
        p.enableOutput(False)
        p.plotfunc(lambda t: coal.prob_coal(t, k, n), 0, 4000, 10,
                   ymin=0)

        # draw single coal samples
        x = [coal.sample_coal(k, n) for i in xrange(200)]
        plotdistrib(x, 40, plot=p)
        p.enableOutput(True)
        p.save(outdir + 'plot.png')

        eq_sample_pdf(x, lambda t: coal.prob_coal(t, k, n), 40)
Пример #2
0
    def test_prob_coal_cond_counts_simple(self):

        # when we condition on b=1, it is the same as the bounded coal
        # PDF.
        # prob_coal_cond_counts is actually a more general version of
        # prob_bounded_coal

        outdir = 'test/tmp/test_coal/Coal_test_prob_coal_counys_simple/'
        make_clean_dir(outdir)

        a = 5
        b = 1
        t = 2000
        n = 1000
        p = Gnuplot()
        p.enableOutput(False)

        for x in frange(0, 2000, 10):
            y = coal.prob_coal_cond_counts_simple(x, a, b, t, n)
            y2 = coal.prob_bounded_coal(x, a, n, t)
            self.assertAlmostEqual(y, y2)
Пример #3
0
    def test_cdf_mrca(self):

        outdir = 'test/tmp/test_coal/Coal_test_cdf_mrca/'
        make_clean_dir(outdir)

        n = 1000
        k = 6
        step = 10
        x = list(frange(0, 5000, step))
        y = [coal.prob_mrca(i, k, n) * step for i in x]
        y2 = cumsum(y)
        y3 = [coal.cdf_mrca(t, k, n) for t in x]

        p = Gnuplot()
        p.enableOutput(False)
        p.plot(x, y2, style="lines")
        p.plot(x, y3, style="lines")
        p.enableOutput(True)
        p.save(outdir + 'plot.png')

        eq_sample_pdf(x, lambda t: coal.cdf_mrca(t, k, n), 40)
Пример #4
0
    def test_plot_prob_bounded_coal(self):
        n = 1000
        k = 4
        t = 800
        alltimes = []

        # sample times
        for i in xrange(5000):
            while True:
                times = [0]
                for j in xrange(k, 1, -1):
                    times.append(times[-1] + coal.sample_coal(j, n))
                    if times[-1] >= t:
                        break
                if times[-1] < t:
                    break
            alltimes.append(times)

        p = Gnuplot()
        for i in range(1, 2):
            x, y = distrib([q[i] - q[i-1] for q in alltimes], width=20)
            p.plot(x, y, style="lines", xmax=500)

        x = list(frange(0, 500, 10))
        #for i in range(1, 2): #k):
        y2 = [coal.prob_bounded_coal(j, k, n, t) for j in x]
        p.plot(x, y2, style="lines", xmax=500)

        fequals(y, y2, rel=.05, eabs=.01)
Пример #5
0
    def test_coin_sample_post(self):
        """Test sampling from posterior distribution"""

        outdir = 'test/tmp/test_hmm/test_coin_sample_post/'
        make_clean_dir(outdir)
        model = make_coin_model()

        # sample states and data
        ndata = 100
        states = list(islice(hmm.sample_hmm_states(model), ndata))
        data = list(hmm.sample_hmm_data(model, states))
        model.prob_emission = (
            lambda pos, state: model.prob_emission_data(state, data[pos]))

        p = Gnuplot()
        p.enableOutput(False)
        p.plot(states, style="lines")

        probs = hmm.get_posterior_probs(model, len(data))
        states2 = [exp(probs[i][1]) for i in xrange(len(data))]
        p.plot(util.vadds(states2, 1.5), style="lines", miny=-1, maxy=12)

        for i in range(2, 10):
            states2 = hmm.sample_posterior(model, ndata)
            self.assertTrue(stats.corr(states, states2) > .5)

            p.plot(util.vadds(states2, 1.5 * i),
                   style="lines",
                   miny=-1,
                   maxy=12)
        p.enableOutput(True)
        p.save(outdir + 'plot.png')
Пример #6
0
    def test_coin(self):
        """Test that viterbi and posterior coding work well."""

        outdir = 'test/tmp/test_hmm/test_coin/'
        make_clean_dir(outdir)

        model = make_coin_model()

        # sample states
        ndata = 100
        states = list(islice(hmm.sample_hmm_states(model), ndata))
        p = Gnuplot()
        p.enableOutput(False)
        p.plot(states, style="lines")

        # sample data
        data = list(hmm.sample_hmm_data(model, states))

        # viterbi
        model.prob_emission = (
            lambda pos, state: model.prob_emission_data(state, data[pos]))
        states2 = hmm.viterbi(model, len(data))

        # posterior
        probs = hmm.get_posterior_probs(model, len(data))
        states3 = [exp(probs[i][1]) for i in xrange(len(data))]

        # assert that inferences correlates with true state
        self.assertTrue(stats.corr(states, states2) > .5)
        self.assertTrue(stats.corr(states, states3) > .5)

        # plot inference
        p.plot(util.vadds(states2, 1.5), style="lines", miny=-1, maxy=4)
        p.plot(util.vadds(states3, 2.5), style="lines", miny=-1, maxy=4)
        p.enableOutput(True)
        p.save(outdir + 'plot.png')
    def test_coin_sample_post(self):
        """Test sampling from posterior distribution"""

        outdir = 'test/tmp/test_hmm/test_coin_sample_post/'
        make_clean_dir(outdir)
        model = make_coin_model()

        # sample states and data
        ndata = 100
        states = list(islice(hmm.sample_hmm_states(model), ndata))
        data = list(hmm.sample_hmm_data(model, states))
        model.prob_emission = (lambda pos, state:
                               model.prob_emission_data(state, data[pos]))

        p = Gnuplot()
        p.enableOutput(False)
        p.plot(states, style="lines")

        probs = hmm.get_posterior_probs(model, len(data))
        states2 = [exp(probs[i][1]) for i in xrange(len(data))]
        p.plot(util.vadds(states2, 1.5), style="lines", miny=-1, maxy=12)

        for i in range(2, 10):
            states2 = hmm.sample_posterior(model, ndata)
            self.assertTrue(stats.corr(states, states2) > .5)

            p.plot(util.vadds(states2, 1.5*i), style="lines", miny=-1, maxy=12)
        p.enableOutput(True)
        p.save(outdir + 'plot.png')
    def test_coin(self):
        """Test that viterbi and posterior coding work well."""

        outdir = 'test/tmp/test_hmm/test_coin/'
        make_clean_dir(outdir)

        model = make_coin_model()

        # sample states
        ndata = 100
        states = list(islice(hmm.sample_hmm_states(model), ndata))
        p = Gnuplot()
        p.enableOutput(False)
        p.plot(states, style="lines")

        # sample data
        data = list(hmm.sample_hmm_data(model, states))

        # viterbi
        model.prob_emission = (lambda pos, state:
                               model.prob_emission_data(state, data[pos]))
        states2 = hmm.viterbi(model, len(data))

        # posterior
        probs = hmm.get_posterior_probs(model, len(data))
        states3 = [exp(probs[i][1]) for i in xrange(len(data))]

        # assert that inferences correlates with true state
        self.assertTrue(stats.corr(states, states2) > .5)
        self.assertTrue(stats.corr(states, states3) > .5)

        # plot inference
        p.plot(util.vadds(states2, 1.5), style="lines", miny=-1, maxy=4)
        p.plot(util.vadds(states3, 2.5), style="lines", miny=-1, maxy=4)
        p.enableOutput(True)
        p.save(outdir + 'plot.png')
Пример #9
0
    def test_top(self):

        outdir = 'test/tmp/test_coal/BMC_test_top/'
        make_clean_dir(outdir)

        stree = treelib.parse_newick(
            "(((A:200, E:200):800, B:1000):500, (C:700, D:700):800);")
        n = 500
        T = 2000
        nsamples = 4000

        # compare top hist with simpler rejection sampling
        tops = {}
        tops2 = {}

        for i in xrange(nsamples):
            # use rejection sampling
            tree, recon = coal.sample_bounded_multicoal_tree_reject(
                stree, n, T, namefunc=lambda x: x)

            # sample tree
            tree2, recon2 = coal.sample_bounded_multicoal_tree(
                stree, n, T, namefunc=lambda x: x)

            top = phylo.hash_tree(tree)
            top2 = phylo.hash_tree(tree2)

            tops.setdefault(top, [0, tree, recon])[0] += 1
            tops.setdefault(top2, [0, tree2, recon2])

            tops2.setdefault(top2, [0, tree2, recon2])[0] += 1
            tops2.setdefault(top, [0, tree, recon])

        keys = tops.keys()
        x = [safelog(tops[i][0], default=0) for i in keys]
        y = [safelog(tops2[i][0], default=0) for i in keys]

        self.assertTrue(stats.corr(x, y) > .9)

        p = Gnuplot()
        p.enableOutput(False)
        p.plot(x, y)
        p.plot([min(x), max(x)], [min(x), max(x)], style="lines")
        p.enableOutput(True)
        p.save(outdir + 'plot.png')
Пример #10
0
    def test_fast_sample_bounded_coal(self):

        # sample bounded coal times efficiently
        n = 1000
        k = 5
        t = 500
        alltimes = []

        # sample times
        for i in xrange(5000):
            while True:
                times = [0]
                for j in xrange(k, 1, -1):
                    times.append(times[-1] + coal.sample_coal(j, n))
                    if times[-1] >= t:
                        break
                if times[-1] < t:
                    break
            alltimes.append(times)

        p = Gnuplot()
        for i in range(1, k):
            x, y = distrib([q[i] - q[i-1] for q in alltimes], width=30)
            p.plot(x, y, style="lines", xmax=500)
        p.enableOutput(True)
        p.replot()

        # sample times efficently
        alltimes2 = []
        for i in xrange(5000):
            times = [0]
            for j in xrange(k, 1, -1):
                times.append(times[-1] +
                             coal.sample_bounded_coal(j, n, t-times[-1]))
            alltimes2.append(times)

        #p = Gnuplot()
        for i in range(1, k):
            x, y = distrib([q[i] - q[i-1] for q in alltimes2], width=30)
            p.plot(x, y, style="lines", xmax=500)
        p.enableOutput(True)
        p.replot()
Пример #11
0
    def test_cdf_coal_cond_counts(self):

        # test coalescent pdf when conditioned on future lineage counts

        outdir = 'test/tmp/test_coal/Coal_test_cdf_coal_cond_counts/'
        make_clean_dir(outdir)

        a = 5
        for b in xrange(2, a):
            t = 500
            n = 1000
            p = Gnuplot()
            p.enableOutput(False)
            p.plotfunc(lambda x: coal.cdf_coal_cond_counts(
                x, a, b, t, n), 0, t, 10)

            # draw single coal samples using rejection sampling
            s = []
            for i in xrange(1000):
                while True:
                    times = coal.sample_coal_times(a, n)
                    if times[a-b-1] < t and (b == 1 or times[a-b] > t):
                        break
                s.append(times[0])

            x2, y2 = stats.cdf(s)
            p.plot(x2, y2, style='lines')
            p.enableOutput(True)
            p.save(outdir + 'plot-%d.png' % b)

            eq_sample_pdf(
                x2, lambda x: coal.prob_coal_cond_counts(x, a, b, t, n), 40)