Exemplo n.º 1
0
    def test_emit_internal(self):
        """
        Calculate emission probabilities
        """

        k = 10
        n = 1e4
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(10e3) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)

        arg = arghmm.sample_arg_dsmc(k,
                                     2 * n,
                                     rho,
                                     start=0,
                                     end=length,
                                     times=times)

        muts = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, muts)

        trees, names = arghmm.arg2ctrees(arg, times)
        seqs2, nseqs, seqlen = arghmm.seqs2cseqs(seqs, names)

        assert arghmm.arghmm_assert_emit_internal(trees, len(times), times, mu,
                                                  seqs2, nseqs, seqlen)
Exemplo n.º 2
0
    def test_ld_block(self):

        k = 30
        n = 1e4
        rho = 1.5e-8
        mu = 2.5e-8
        length = 200e3
        times = arghmm.get_time_points(ntimes=20, maxtime=200e3)
        compress = 20

        arg = arghmm.sample_arg_dsmc(k,
                                     2 * n,
                                     rho,
                                     start=0,
                                     end=length,
                                     times=times)
        muts = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, muts)
        sites = arghmm.seqs2sites(seqs)

        #cols = transpose(seqs.values())[::10000]
        cols = mget(sites, sites.positions)
        cols = cols[:1000]

        ld = arghmm.calc_ld_matrix(cols, arghmm.calc_ld_Dp)

        heatmap(ld, width=2, height=2)
        pause()
Exemplo n.º 3
0
    def test_emit_parsimony(self):
        """
        Calculate emission probabilities with parsimony
        """

        k = 10
        n = 1e4
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(100e3) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)

        x = []; y = []
        for i in range(20):
            print i
            arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                         times=times)
            muts = arghmm.sample_arg_mutations(arg, mu, times)
            seqs = arghmm.make_alignment(arg, muts)

            x.append(arghmm.calc_likelihood(
                arg, seqs, mu=mu, times=times, delete_arg=False))
            y.append(arghmm.calc_likelihood_parsimony(
                arg, seqs, mu=mu, times=times, delete_arg=False))

        p = plot(x, y, xlab="true likelihood", ylab="parsimony likelihood")
        p.plot([min(x), max(x)], [min(x), max(x)], style="lines")
        pause()
Exemplo n.º 4
0
    def test_emit(self):
        """
        Calculate emission probabilities
        """

        k = 10
        n = 1e4
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(1e3) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)

        arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                     times=times)

        muts = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, muts)

        new_name = "n%d" % (k-1)
        arg = arghmm.remove_arg_thread(arg, new_name)

        trees, names = arghmm.arg2ctrees(arg, times)
        seqs2, nseqs, seqlen = arghmm.seqs2cseqs(seqs, names + [new_name])

        assert arghmm.arghmm_assert_emit(trees, len(times), times, mu,
                                         seqs2, nseqs, seqlen)
Exemplo n.º 5
0
    def test_emit_parsimony(self):
        """
        Calculate emission probabilities with parsimony
        """

        k = 10
        n = 1e4
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(100e3) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)

        x = []
        y = []
        for i in range(20):
            print i
            arg = arghmm.sample_arg_dsmc(k,
                                         2 * n,
                                         rho,
                                         start=0,
                                         end=length,
                                         times=times)
            muts = arghmm.sample_arg_mutations(arg, mu, times)
            seqs = arghmm.make_alignment(arg, muts)

            x.append(
                arghmm.calc_likelihood(arg,
                                       seqs,
                                       mu=mu,
                                       times=times,
                                       delete_arg=False))
            y.append(
                arghmm.calc_likelihood_parsimony(arg,
                                                 seqs,
                                                 mu=mu,
                                                 times=times,
                                                 delete_arg=False))

        p = plot(x, y, xlab="true likelihood", ylab="parsimony likelihood")
        p.plot([min(x), max(x)], [min(x), max(x)], style="lines")
        pause()
Exemplo n.º 6
0
    def test_ld_block(self):

        k = 30
        n = 1e4
        rho = 1.5e-8
        mu = 2.5e-8
        length = 200e3
        times = arghmm.get_time_points(ntimes=20, maxtime=200e3)
        compress = 20

        arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                     times=times)
        muts = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, muts)
        sites = arghmm.seqs2sites(seqs)

        #cols = transpose(seqs.values())[::10000]
        cols = mget(sites, sites.positions)
        cols = cols[:1000]

        ld = arghmm.calc_ld_matrix(cols, arghmm.calc_ld_Dp)

        heatmap(ld, width=2, height=2)
        pause()
Exemplo n.º 7
0
    def test_est_arg_popsize(self):
        """
        Fully sample an ARG from stratch using API
        """

        k = 20
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(2e6) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)
        popsize = 1e4
        popsize2 = 1e4 * .5
        a = int(.3 * length)
        b = int(.7 * length)
        refine = 0

        util.tic("sim ARG")
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=0, end=a)
        arg = arglib.sample_arg_smc(k, 2 * popsize2,
                                    rho, start=a, end=b,
                                    init_tree=arg)
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=b, end=length,
                                    init_tree=arg)

        # sim seq
        mut = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, mut)
        util.toc()

        # sample arg
        util.tic("sample arg")
        arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times,
                                 popsizes=1e4, carg=True)
        arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, 
                                         rho=rho, mu=mu, times=times,
                                         refine=200)
        arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, 
                                       rho=rho, mu=mu, times=times,
                                       refine=200)
        util.toc()

        x = []; y = []
        for (start, end), tree in arglib.iter_tree_tracks(arg2):
            arglib.remove_single_lineages(tree)
            x.append(start)
            y.append(mle_popsize_tree(tree, mintime=0))

        # thin popsizes
        x2 = list(range(0, length, length//5000)); y2 = []
        j = 0
        for i in range(len(x2)):
            while j < len(x) and x[j] < x2[i]:
                j += 1
            y2.append(y[min(j, len(y)-1)])

        x3, y3 = stats.smooth2(x2, y2, 100e3)
        p = plot(x, y, ymin=0)
        p.plot(x3, y3, style='lines')
        p.plot([0, a, a, b, b, length],
               [popsize, popsize, popsize2, popsize2, popsize, popsize],
               style='lines')
        
        pause()
Exemplo n.º 8
0
    def test_est_arg_popsize(self):
        """
        Fully sample an ARG from stratch using API
        """

        k = 20
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(2e6) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)
        popsize = 1e4
        popsize2 = 1e4 * .5
        a = int(.3 * length)
        b = int(.7 * length)
        refine = 0

        util.tic("sim ARG")
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=0, end=a)
        arg = arglib.sample_arg_smc(k, 2 * popsize2,
                                    rho, start=a, end=b,
                                    init_tree=arg)
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=b, end=length,
                                    init_tree=arg)

        # sim seq
        mut = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, mut)
        util.toc()

        # sample arg
        util.tic("sample arg")
        arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times,
                                 popsizes=1e4, carg=True)
        arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, 
                                         rho=rho, mu=mu, times=times,
                                         refine=200)
        arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, 
                                       rho=rho, mu=mu, times=times,
                                       refine=200)
        util.toc()

        x = []; y = []
        for (start, end), tree in arglib.iter_tree_tracks(arg2):
            arglib.remove_single_lineages(tree)
            x.append(start)
            y.append(mle_popsize_tree(tree, mintime=0))

        # thin popsizes
        x2 = range(0, length, length//5000); y2 = []
        j = 0
        for i in range(len(x2)):
            while j < len(x) and x[j] < x2[i]:
                j += 1
            y2.append(y[min(j, len(y)-1)])

        x3, y3 = stats.smooth2(x2, y2, 100e3)
        p = plot(x, y, ymin=0)
        p.plot(x3, y3, style='lines')
        p.plot([0, a, a, b, b, length],
               [popsize, popsize, popsize2, popsize2, popsize, popsize],
               style='lines')
        
        pause()