示例#1
0
    def test_prior(self):
        """
        Calculate state priors
        """

        k = 10
        n = 1e4
        rho = 1.5e-8
        mu = 2.5e-8
        length = 1000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points()
        arghmm.discretize_arg(arg, times)
        new_name = "n%d" % (k-1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times)

        prior = [model.prob_prior(0, j)
                 for j in xrange(model.get_num_states(0))]
        print prior
        print sum(map(exp, prior))
        fequal(sum(map(exp, prior)), 1.0, rel=.01)
示例#2
0
    def test_trans_single(self):
        """
        Calculate transition probabilities

        Only calculate a single matrix
        """

        k = 4
        n = 1e4
        rho = 1.5e-8
        mu = 2.5e-8
        length = 1000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points(10)
        arghmm.discretize_arg(arg, times)
        print "recomb", arglib.get_recomb_pos(arg)

        new_name = "n%d" % (k - 1)
        arg = arghmm.remove_arg_thread(arg, new_name)
        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times)

        pos = 10
        tree = arg.get_marginal_tree(pos)
        mat = arghmm.calc_transition_probs(tree, model.states[pos],
                                           model.nlineages, model.times,
                                           model.time_steps, model.popsizes,
                                           rho)
        print model.states[pos]
        pc(mat)

        for row in mat:
            print sum(map(exp, row))
示例#3
0
    def test_emit(self):
        """
        Calculate emission probabilities
        """

        k = 10
        n = 1e4
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(1e3) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)

        arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                     times=times)

        muts = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, muts)

        new_name = "n%d" % (k-1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        trees, names = arghmm.arg2ctrees(arg, times)
        seqs2, nseqs, seqlen = arghmm.seqs2cseqs(seqs, names + [new_name])

        assert arghmm.arghmm_assert_emit(trees, len(times), times, mu,
                                         seqs2, nseqs, seqlen)
示例#4
0
    def test_emit(self):
        """
        Calculate emission probabilities
        """

        k = 10
        n = 1e4
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(1e3) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)

        arg = arghmm.sample_arg_dsmc(k,
                                     2 * n,
                                     rho,
                                     start=0,
                                     end=length,
                                     times=times)

        muts = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, muts)

        new_name = "n%d" % (k - 1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        trees, names = arghmm.arg2ctrees(arg, times)
        seqs2, nseqs, seqlen = arghmm.seqs2cseqs(seqs, names + [new_name])

        assert arghmm.arghmm_assert_emit(trees, len(times), times, mu, seqs2,
                                         nseqs, seqlen)
示例#5
0
    def test_trans_single(self):
        """
        Calculate transition probabilities

        Only calculate a single matrix
        """

        k = 4
        n = 1e4
        rho = 1.5e-8
        mu = 2.5e-8
        length = 1000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points(10)
        arghmm.discretize_arg(arg, times)
        print "recomb", arglib.get_recomb_pos(arg)

        new_name = "n%d" % (k-1)
        arg = arghmm.remove_arg_thread(arg, new_name)
        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times)

        pos = 10
        tree = arg.get_marginal_tree(pos)
        mat = arghmm.calc_transition_probs(
            tree, model.states[pos], model.nlineages,
            model.times, model.time_steps, model.popsizes, rho)
        print model.states[pos]
        pc(mat)

        for row in mat:
            print sum(map(exp, row))
示例#6
0
    def test_prior(self):
        """
        Calculate state priors
        """

        k = 10
        n = 1e4
        rho = 1.5e-8
        mu = 2.5e-8
        length = 1000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points()
        arghmm.discretize_arg(arg, times)
        new_name = "n%d" % (k - 1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times)

        prior = [
            model.prob_prior(0, j) for j in xrange(model.get_num_states(0))
        ]
        print prior
        print sum(map(exp, prior))
        fequal(sum(map(exp, prior)), 1.0, rel=.01)
示例#7
0
    def test_post_plot(self):

        k = 6
        n = 1e4
        rho = 1.5e-8 * 50
        mu = 2.5e-8 * 50
        length = 10000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points(ntimes=30)
        arghmm.discretize_arg(arg, times)

        pause()

        # save
        #arglib.write_arg("test/data/k4.arg", arg)
        #fasta.write_fasta("test/data/k4.fa", seqs)

        new_name = "n%d" % (k - 1)
        thread = list(
            arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False))
        p = plot(cget(thread, 1), style="lines", ymin=times[1], ylog=10)

        # remove chrom
        new_name = "n%d" % (k - 1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg,
                              seqs,
                              new_name=new_name,
                              times=times,
                              rho=rho,
                              mu=mu)
        print "states", len(model.states[0])
        print "muts", len(muts)
        print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1]

        p.plot(model.recomb_pos, [10000] * len(model.recomb_pos),
               style="points")

        probs = arghmm.get_posterior_probs(model, length, verbose=True)
        print "done"

        high = list(arghmm.iter_posterior_times(model, probs, .95))
        low = list(arghmm.iter_posterior_times(model, probs, .05))
        p.gnuplot("set linestyle 2")
        p.plot(high, style="lines")
        p.gnuplot("set linestyle 2")
        p.plot(low, style="lines")

        #write_list("test/data/post_real.txt", cget(thread, 1))
        #write_list("test/data/post_high.txt", high)
        #write_list("test/data/post_low.txt", low)

        pause()
示例#8
0
    def test_post_plot(self):

        k = 6
        n = 1e4
        rho = 1.5e-8 * 50
        mu = 2.5e-8 * 50
        length = 10000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points(ntimes=30)
        arghmm.discretize_arg(arg, times)

        pause()

        # save
        #arglib.write_arg("test/data/k4.arg", arg)
        #fasta.write_fasta("test/data/k4.fa", seqs)

        new_name = "n%d" % (k-1)
        thread = list(arghmm.iter_chrom_thread(arg, arg[new_name],
                                               by_block=False))
        p = plot(cget(thread, 1), style="lines", ymin=times[1],
                 ylog=10)

        # remove chrom
        new_name = "n%d" % (k-1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times,
                              rho=rho, mu=mu)
        print "states", len(model.states[0])
        print "muts", len(muts)
        print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1]

        p.plot(model.recomb_pos, [10000] * len(model.recomb_pos),
               style="points")

        probs = arghmm.get_posterior_probs(model, length, verbose=True)
        print "done"

        high = list(arghmm.iter_posterior_times(model, probs, .95))
        low = list(arghmm.iter_posterior_times(model, probs, .05))
        p.gnuplot("set linestyle 2")
        p.plot(high, style="lines")
        p.gnuplot("set linestyle 2")
        p.plot(low, style="lines")


        #write_list("test/data/post_real.txt", cget(thread, 1))
        #write_list("test/data/post_high.txt", high)
        #write_list("test/data/post_low.txt", low)

        pause()
示例#9
0
    def test_trans_switch_single(self):
        """
        Calculate transitions probabilities for switching between blocks

        Only calculate a single matrix
        """

        k = 5
        n = 1e4
        rho = 1.5e-8 * 100
        mu = 2.5e-8
        length = 1000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        #arglib.write_arg("tmp/a.arg", arg)
        #arg = arglib.read_arg("tmp/a.arg")
        #arg.set_ancestral()

        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points(5)
        arghmm.discretize_arg(arg, times)

        new_name = "n%d" % (k - 1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times)

        # get recombs
        recombs = list(x.pos for x in arghmm.iter_visible_recombs(arg))
        print "recomb", recombs

        pos = recombs[0] + 1
        tree = arg.get_marginal_tree(pos - .5)
        last_tree = arg.get_marginal_tree(pos - 1 - .5)

        print "states1>>", model.states[pos - 1]
        print "states2>>", model.states[pos]

        treelib.draw_tree_names(last_tree.get_tree(), minlen=5, maxlen=5)
        treelib.draw_tree_names(tree.get_tree(), minlen=5, maxlen=5)

        print "pos>>", pos
        recomb = [x for x in tree
                  if x.event == "recomb" and x.pos + 1 == pos][0]
        mat = arghmm.calc_transition_probs_switch(tree, last_tree, recomb.name,
                                                  model.states[pos - 1],
                                                  model.states[pos],
                                                  model.nlineages, model.times,
                                                  model.time_steps,
                                                  model.popsizes, rho)
        pc(mat)
示例#10
0
    def test_trans_switch_single(self):
        """
        Calculate transitions probabilities for switching between blocks

        Only calculate a single matrix
        """

        k = 5
        n = 1e4
        rho = 1.5e-8 * 100
        mu = 2.5e-8
        length = 1000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        #arglib.write_arg("tmp/a.arg", arg)
        #arg = arglib.read_arg("tmp/a.arg")
        #arg.set_ancestral()


        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points(5)
        arghmm.discretize_arg(arg, times)

        new_name = "n%d" % (k-1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times)

        # get recombs
        recombs = list(x.pos for x in arghmm.iter_visible_recombs(arg))
        print "recomb", recombs

        pos = recombs[0] + 1
        tree = arg.get_marginal_tree(pos-.5)
        last_tree = arg.get_marginal_tree(pos-1-.5)

        print "states1>>", model.states[pos-1]
        print "states2>>", model.states[pos]

        treelib.draw_tree_names(last_tree.get_tree(), minlen=5, maxlen=5)
        treelib.draw_tree_names(tree.get_tree(), minlen=5, maxlen=5)

        print "pos>>", pos
        recomb = [x for x in tree
                  if x.event == "recomb" and x.pos+1 == pos][0]
        mat = arghmm.calc_transition_probs_switch(
            tree, last_tree, recomb.name,
            model.states[pos-1], model.states[pos],
            model.nlineages, model.times,
            model.time_steps, model.popsizes, rho)
        pc(mat)
示例#11
0
    def test_state_corr(self):

        k = 12
        n = 1e4
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(1e3) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200e3)

        arg = arghmm.sample_arg_dsmc(k,
                                     2 * n,
                                     rho,
                                     start=0,
                                     end=length,
                                     times=times)
        muts = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arglib.make_alignment(arg, muts)

        # remove chrom
        new_name = "n%d" % (k - 1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg,
                              seqs,
                              new_name=new_name,
                              times=times,
                              rho=rho,
                              mu=mu)
        print "states", len(model.states[0])

        nstates = len(model.states[0])
        prior = [-util.INF] * nstates
        prior[random.randint(0, nstates)] = 0.0

        probs1 = list(arghmm.forward_algorithm(model, length, verbose=True))
        probs2 = list(
            arghmm.forward_algorithm(model, length, prior=prior, verbose=True))

        model.rho *= 1e-9
        probs3 = list(
            arghmm.forward_algorithm(model, length, prior=prior, verbose=True))

        p = plot(vsubs(probs1[length - 1], mean(probs1[length - 1])))
        p.plot(vsubs(probs2[length - 1], mean(probs2[length - 1])))
        p.plot(vsubs(probs3[length - 1], mean(probs3[length - 1])))

        pause()
示例#12
0
    def test_backward(self):
        """
        Run backward algorithm
        """

        k = 3
        n = 1e4
        rho = 1.5e-8 * 100
        mu = 2.5e-8 * 100
        length = 10000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points(ntimes=10)
        arghmm.discretize_arg(arg, times)

        tree = arg.get_marginal_tree(0)
        print tree.root.age
        treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4)

        # remove chrom
        new_name = "n%d" % (k - 1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg,
                              seqs,
                              new_name=new_name,
                              times=times,
                              rho=rho,
                              mu=mu)
        print "states", len(model.states[0])
        print "recomb", model.recomb_pos
        print "muts", len(muts)

        probs = hmm.backward_algorithm(model, length, verbose=True)

        for pcol in probs:
            p = sum(map(exp, pcol))
            print p, " ".join("%.3f" % f for f in map(exp, pcol))
示例#13
0
    def test_state_corr(self):

        k = 12
        n = 1e4
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(1e3) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200e3)

        arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                     times=times)
        muts = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arglib.make_alignment(arg, muts)

        # remove chrom
        new_name = "n%d" % (k-1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times,
                              rho=rho, mu=mu)
        print "states", len(model.states[0])


        nstates = len(model.states[0])
        prior = [-util.INF] * nstates
        prior[random.randint(0, nstates)] = 0.0

        probs1 = list(arghmm.forward_algorithm(model, length, verbose=True))
        probs2 = list(arghmm.forward_algorithm(model, length, prior=prior,
                                               verbose=True))

        model.rho *= 1e-9
        probs3 = list(arghmm.forward_algorithm(model, length, prior=prior,
                                               verbose=True))

        p = plot(vsubs(probs1[length-1], mean(probs1[length-1])))
        p.plot(vsubs(probs2[length-1], mean(probs2[length-1])))
        p.plot(vsubs(probs3[length-1], mean(probs3[length-1])))

        pause()
示例#14
0
    def test_emit_argmax(self):
        """
        Calculate emission probabilities
        """

        k = 10
        n = 1e4
        rho = 0.0
        mu = 2.5e-8 * 100
        length = 10000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points(10)
        arghmm.discretize_arg(arg, times)

        new_name = "n%d" % (k - 1)
        thread = list(arghmm.iter_chrom_thread(arg, arg[new_name]))
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times)

        nstates = model.get_num_states(1)
        probs = [0.0 for j in xrange(nstates)]
        for i in xrange(1, length):
            if i % 100 == 0:
                print i
            for j in xrange(nstates):
                probs[j] += model.prob_emission(i, j)
        print

        # is the maximum likelihood emission matching truth
        data = sorted(zip(probs, model.states[0]), reverse=True)
        pc(data[:20])

        state = (thread[0][0], times.index(thread[0][1]))

        print data[0][1], state
        assert data[0][1] == state
示例#15
0
    def test_emit_argmax(self):
        """
        Calculate emission probabilities
        """

        k = 10
        n = 1e4
        rho = 0.0
        mu = 2.5e-8 * 100
        length = 10000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points(10)
        arghmm.discretize_arg(arg, times)

        new_name = "n%d" % (k-1)
        thread = list(arghmm.iter_chrom_thread(arg, arg[new_name]))
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times)

        nstates = model.get_num_states(1)
        probs = [0.0 for j in xrange(nstates)]
        for i in xrange(1, length):
            if i % 100 == 0:
                print i
            for j in xrange(nstates):
                probs[j] += model.prob_emission(i, j)
        print

        # is the maximum likelihood emission matching truth
        data = sorted(zip(probs, model.states[0]), reverse=True)
        pc(data[:20])

        state = (thread[0][0], times.index(thread[0][1]))

        print data[0][1], state
        assert data[0][1] == state
示例#16
0
    def test_remove_thread(self):
        """
        Remove a leaf from an ARG
        """

        k = 3
        n = 1e4
        rho = 1.5e-8 * 20
        mu = 2.5e-8
        length = 1000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)

        times = arghmm.get_time_points(10)
        arghmm.discretize_arg(arg, times)
        chrom = "n%d" % (k-1)
        arg = arghmm.remove_arg_thread(arg, chrom)

        recomb = arglib.get_recomb_pos(arg)
        print "recomb", recomb

        tree = arg.get_marginal_tree(-.5)

        draw_tree_names(tree.get_tree(), minlen=5, maxlen=5)
        print sorted([(x.pos, x.event) for x in tree])
示例#17
0
    def test_remove_thread(self):
        """
        Remove a leaf from an ARG
        """

        k = 3
        n = 1e4
        rho = 1.5e-8 * 20
        mu = 2.5e-8
        length = 1000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)

        times = arghmm.get_time_points(10)
        arghmm.discretize_arg(arg, times)
        chrom = "n%d" % (k - 1)
        arg = arghmm.remove_arg_thread(arg, chrom)

        recomb = arglib.get_recomb_pos(arg)
        print "recomb", recomb

        tree = arg.get_marginal_tree(-.5)

        draw_tree_names(tree.get_tree(), minlen=5, maxlen=5)
        print sorted([(x.pos, x.event) for x in tree])
示例#18
0
    def test_backward(self):
        """
        Run backward algorithm
        """

        k = 3
        n = 1e4
        rho = 1.5e-8 * 100
        mu = 2.5e-8 * 100
        length = 10000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        times = arghmm.get_time_points(ntimes=10)
        arghmm.discretize_arg(arg, times)

        tree = arg.get_marginal_tree(0)
        print tree.root.age
        treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4)

        # remove chrom
        new_name = "n%d" % (k-1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times,
                              rho=rho, mu=mu)
        print "states", len(model.states[0])
        print "recomb", model.recomb_pos
        print "muts", len(muts)

        probs = hmm.backward_algorithm(model, length, verbose=True)

        for pcol in probs:
            p = sum(map(exp, pcol))
            print p, " ".join("%.3f" % f for f in map(exp, pcol))