def test_ld_block(self): k = 30 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 200e3 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) compress = 20 arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) sites = arghmm.seqs2sites(seqs) #cols = transpose(seqs.values())[::10000] cols = mget(sites, sites.positions) cols = cols[:1000] ld = arghmm.calc_ld_matrix(cols, arghmm.calc_ld_Dp) heatmap(ld, width=2, height=2) pause()
def test_emit_internal(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(10e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) trees, names = arghmm.arg2ctrees(arg, times) seqs2, nseqs, seqlen = arghmm.seqs2cseqs(seqs, names) assert arghmm.arghmm_assert_emit_internal(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_emit_parsimony(self): """ Calculate emission probabilities with parsimony """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) x = []; y = [] for i in range(20): print i arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) x.append(arghmm.calc_likelihood( arg, seqs, mu=mu, times=times, delete_arg=False)) y.append(arghmm.calc_likelihood_parsimony( arg, seqs, mu=mu, times=times, delete_arg=False)) p = plot(x, y, xlab="true likelihood", ylab="parsimony likelihood") p.plot([min(x), max(x)], [min(x), max(x)], style="lines") pause()
def test_compress_align(self): """Test the compression of sequence alignments""" k = 12 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 200e3 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) compress = 20 arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arglib.make_alignment(arg, muts) seqs2, cols = arghmm.compress_align(seqs, compress) print seqs2.alignlen(), length / compress delta = [cols[i] - cols[i - 1] for i in range(1, len(cols))] plot(cols) plothist(delta, width=1) variant = [arghmm.is_variant(seqs, i) for i in range(seqs.alignlen())] print histtab(variant) print histtab(mget(variant, cols)) pause()
def test_arg_joint(self): """ Compute joint probability of an ARG """ k = 2 n = 1e4 rho = 1.5e-8 * 20 rho2 = rho mu = 2.5e-8 * 20 length = 10000 times = arghmm.get_time_points(ntimes=20, maxtime=200000) refine = 0 arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times=times) seqs = arglib.make_alignment(arg, muts) lk = arghmm.calc_joint_prob(arg, seqs, mu=mu, rho=rho, times=times) print lk
def test_emit(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) trees, names = arghmm.arg2ctrees(arg, times) seqs2, nseqs, seqlen = arghmm.seqs2cseqs(seqs, names + [new_name]) assert arghmm.arghmm_assert_emit(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_compress_align(self): """Test the compression of sequence alignments""" k = 12 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 200e3 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) compress = 20 arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arglib.make_alignment(arg, muts) seqs2, cols = arghmm.compress_align(seqs, compress) print seqs2.alignlen(), length / compress delta = [cols[i] - cols[i-1] for i in range(1, len(cols))] plot(cols) plothist(delta, width=1) variant = [arghmm.is_variant(seqs, i) for i in range(seqs.alignlen())] print histtab(variant) print histtab(mget(variant, cols)) pause()
def test_state_corr(self): k = 12 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arglib.make_alignment(arg, muts) # remove chrom new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) nstates = len(model.states[0]) prior = [-util.INF] * nstates prior[random.randint(0, nstates)] = 0.0 probs1 = list(arghmm.forward_algorithm(model, length, verbose=True)) probs2 = list( arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) model.rho *= 1e-9 probs3 = list( arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) p = plot(vsubs(probs1[length - 1], mean(probs1[length - 1]))) p.plot(vsubs(probs2[length - 1], mean(probs2[length - 1]))) p.plot(vsubs(probs3[length - 1], mean(probs3[length - 1]))) pause()
def test_emit_parsimony(self): """ Calculate emission probabilities with parsimony """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) x = [] y = [] for i in range(20): print i arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) x.append( arghmm.calc_likelihood(arg, seqs, mu=mu, times=times, delete_arg=False)) y.append( arghmm.calc_likelihood_parsimony(arg, seqs, mu=mu, times=times, delete_arg=False)) p = plot(x, y, xlab="true likelihood", ylab="parsimony likelihood") p.plot([min(x), max(x)], [min(x), max(x)], style="lines") pause()
def test_arg_joint(self): """ Compute joint probability of an ARG """ k = 2 n = 1e4 rho = 1.5e-8 * 20 rho2 = rho mu = 2.5e-8 * 20 length = 10000 times = arghmm.get_time_points(ntimes=20, maxtime=200000) refine = 0 arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times=times) seqs = arglib.make_alignment(arg, muts) lk = arghmm.calc_joint_prob(arg, seqs, mu=mu, rho=rho, times=times) print lk
def test_sample_arg_popsizes_trees_infer(self): """ Fully sample an ARG from stratch using API """ k = 6 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(10e6) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=160000) popsizes = [1e4 * (61.-i)/60. for i in range(len(times))] refine = 5 util.tic("sim ARG") #arg = arglib.sample_arg_smc(k, 2 * popsizes[0], # rho, start=0, end=length) arg = arghmm.sample_arg_dsmc(k, [2*p for p in popsizes], rho, start=0, end=length, times=times) util.toc() muts = arghmm.sample_arg_mutations(arg, mu, times=times) seqs = arglib.make_alignment(arg, muts) popsizes2 = [0] * (len(times) - 1) nsamples = 1 for i in range(nsamples): arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times, popsizes=popsizes, refine=refine, verbose=True, carg=True) popsizes3 = arghmm.est_popsizes_trees(arg2, times, length/1000, verbose=True) print(popsizes3) popsizes2 = vadd(popsizes2, popsizes3) popsizes2 = vdivs(popsizes2, float(nsamples)) print(popsizes2) p = plot(times, popsizes, xlog=10, xmin=10) p.plot(times[1:], popsizes2) pause()
def test_sample_arg_popsizes_trees_infer(self): """ Fully sample an ARG from stratch using API """ k = 6 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(10e6) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=160000) popsizes = [1e4 * (61.-i)/60. for i in range(len(times))] refine = 5 util.tic("sim ARG") #arg = arglib.sample_arg_smc(k, 2 * popsizes[0], # rho, start=0, end=length) arg = arghmm.sample_arg_dsmc(k, [2*p for p in popsizes], rho, start=0, end=length, times=times) util.toc() muts = arghmm.sample_arg_mutations(arg, mu, times=times) seqs = arglib.make_alignment(arg, muts) popsizes2 = [0] * (len(times) - 1) nsamples = 1 for i in range(nsamples): arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times, popsizes=popsizes, refine=refine, verbose=True, carg=True) popsizes3 = arghmm.est_popsizes_trees(arg2, times, length/1000, verbose=True) print popsizes3 popsizes2 = vadd(popsizes2, popsizes3) popsizes2 = vdivs(popsizes2, float(nsamples)) print popsizes2 p = plot(times, popsizes, xlog=10, xmin=10) p.plot(times[1:], popsizes2) pause()
def test_state_corr(self): k = 12 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arglib.make_alignment(arg, muts) # remove chrom new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) nstates = len(model.states[0]) prior = [-util.INF] * nstates prior[random.randint(0, nstates)] = 0.0 probs1 = list(arghmm.forward_algorithm(model, length, verbose=True)) probs2 = list(arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) model.rho *= 1e-9 probs3 = list(arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) p = plot(vsubs(probs1[length-1], mean(probs1[length-1]))) p.plot(vsubs(probs2[length-1], mean(probs2[length-1]))) p.plot(vsubs(probs3[length-1], mean(probs3[length-1]))) pause()
def test_ld_block(self): k = 30 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 200e3 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) compress = 20 arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) sites = arghmm.seqs2sites(seqs) #cols = transpose(seqs.values())[::10000] cols = mget(sites, sites.positions) cols = cols[:1000] ld = arghmm.calc_ld_matrix(cols, arghmm.calc_ld_Dp) heatmap(ld, width=2, height=2) pause()
def test_est_arg_popsize(self): """ Fully sample an ARG from stratch using API """ k = 20 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(2e6) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) popsize = 1e4 popsize2 = 1e4 * .5 a = int(.3 * length) b = int(.7 * length) refine = 0 util.tic("sim ARG") arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=0, end=a) arg = arglib.sample_arg_smc(k, 2 * popsize2, rho, start=a, end=b, init_tree=arg) arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=b, end=length, init_tree=arg) # sim seq mut = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, mut) util.toc() # sample arg util.tic("sample arg") arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times, popsizes=1e4, carg=True) arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) util.toc() x = []; y = [] for (start, end), tree in arglib.iter_tree_tracks(arg2): arglib.remove_single_lineages(tree) x.append(start) y.append(mle_popsize_tree(tree, mintime=0)) # thin popsizes x2 = list(range(0, length, length//5000)); y2 = [] j = 0 for i in range(len(x2)): while j < len(x) and x[j] < x2[i]: j += 1 y2.append(y[min(j, len(y)-1)]) x3, y3 = stats.smooth2(x2, y2, 100e3) p = plot(x, y, ymin=0) p.plot(x3, y3, style='lines') p.plot([0, a, a, b, b, length], [popsize, popsize, popsize2, popsize2, popsize, popsize], style='lines') pause()
def test_est_arg_popsize(self): """ Fully sample an ARG from stratch using API """ k = 20 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(2e6) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) popsize = 1e4 popsize2 = 1e4 * .5 a = int(.3 * length) b = int(.7 * length) refine = 0 util.tic("sim ARG") arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=0, end=a) arg = arglib.sample_arg_smc(k, 2 * popsize2, rho, start=a, end=b, init_tree=arg) arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=b, end=length, init_tree=arg) # sim seq mut = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, mut) util.toc() # sample arg util.tic("sample arg") arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times, popsizes=1e4, carg=True) arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) util.toc() x = []; y = [] for (start, end), tree in arglib.iter_tree_tracks(arg2): arglib.remove_single_lineages(tree) x.append(start) y.append(mle_popsize_tree(tree, mintime=0)) # thin popsizes x2 = range(0, length, length//5000); y2 = [] j = 0 for i in range(len(x2)): while j < len(x) and x[j] < x2[i]: j += 1 y2.append(y[min(j, len(y)-1)]) x3, y3 = stats.smooth2(x2, y2, 100e3) p = plot(x, y, ymin=0) p.plot(x3, y3, style='lines') p.plot([0, a, a, b, b, length], [popsize, popsize, popsize2, popsize2, popsize, popsize], style='lines') pause()