def test_sample_arg_smc(self): """Sample an ARG using the SMC process""" length = 10000 # length of locus k = 5 # number of lineages n = 1e4 # effective popsize rho = 1.5e-8 # recomb/site/gen arg = arglib.sample_arg_smc(k, n, rho, 0, length) arglib.assert_arg(arg)
def test_est_popsize2(self): """ Fully sample an ARG from stratch using API """ k = 20 rho = 1.5e-8 mu = 2.5e-8 length = int(4e6) popsize = 1e4 popsize2 = 1e4 * .5 a = int(.3 * length) b = int(.7 * length) refine = 0 util.tic("sim ARG") arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=0, end=a) arg = arglib.sample_arg_smc(k, 2 * popsize2, rho, start=a, end=b, init_tree=arg) arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=b, end=length, init_tree=arg) util.toc() x = []; y = [] for (start, end), tree in arglib.iter_tree_tracks(arg): arglib.remove_single_lineages(tree) x.append(start) y.append(mle_popsize_tree(tree, mintime=0)) x2, y2 = stats.smooth2(x, y, 100e3) p = plot(x, y, ymin=0) p.plot(x2, y2, style='lines') p.plot([0, a, a, b, b, length], [popsize, popsize, popsize2, popsize2, popsize, popsize], style='lines') pause()
def test_forward(): k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3 / 20) times = argweaver.get_time_points(ntimes=100) arg = arglib.sample_arg_smc(k, 2 * n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) argweaver.discretize_arg(arg, times) # remove chrom new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) carg = argweaverc.arg2ctrees(arg, times) util.tic("C fast") probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times) util.toc() util.tic("C slow") probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times, slow=True) util.toc() for i, (col1, col2) in enumerate(izip(probs1, probs2)): for a, b in izip(col1, col2): fequal(a, b, rel=.0001)
def test_forward(): k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3 / 20) times = argweaver.get_time_points(ntimes=100) arg = arglib.sample_arg_smc(k, 2*n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recomb", len(arglib.get_recombs(arg)) argweaver.discretize_arg(arg, times) # remove chrom new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) carg = argweaverc.arg2ctrees(arg, times) util.tic("C fast") probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times) util.toc() util.tic("C slow") probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times, slow=True) util.toc() for i, (col1, col2) in enumerate(izip(probs1, probs2)): for a, b in izip(col1, col2): fequal(a, b, rel=.0001)
smc_coals_list = [] for i in range(20): k = 10 n = 10e3 length = 500e3 rho = 1.5e-8 # simulate an ARG from the CwR and convert it into SMC-style tic("simulate %d" % i) cwr_arg = arglib.sample_arg(k, n, rho, start=0, end=length) cwr_arg_converted = arglib.smcify_arg(cwr_arg) toc() # simulate an ARG directly from SMC process smc_arg = arglib.sample_arg_smc(k, n, rho, start=0, end=length) # gather all coalescence times cwr_coals = [node.age for node in cwr_arg_converted if node.event == 'coal'] smc_coals = [node.age for node in smc_arg if node.event == 'coal'] print len(cwr_coals), len(smc_coals) cwr_coals_list.append(cwr_coals) smc_coals_list.append(smc_coals) rplot_start('figures/cwr-smc-coals.pdf') rp.plot([], main='Comparison of CwR and SMC coalescence times', xlab='generations', ylab='', xlim=[50, 100e3], ylim=[0, 1],
def test_est_arg_popsize(self): """ Fully sample an ARG from stratch using API """ k = 20 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(2e6) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) popsize = 1e4 popsize2 = 1e4 * .5 a = int(.3 * length) b = int(.7 * length) refine = 0 util.tic("sim ARG") arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=0, end=a) arg = arglib.sample_arg_smc(k, 2 * popsize2, rho, start=a, end=b, init_tree=arg) arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=b, end=length, init_tree=arg) # sim seq mut = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, mut) util.toc() # sample arg util.tic("sample arg") arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times, popsizes=1e4, carg=True) arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) util.toc() x = []; y = [] for (start, end), tree in arglib.iter_tree_tracks(arg2): arglib.remove_single_lineages(tree) x.append(start) y.append(mle_popsize_tree(tree, mintime=0)) # thin popsizes x2 = list(range(0, length, length//5000)); y2 = [] j = 0 for i in range(len(x2)): while j < len(x) and x[j] < x2[i]: j += 1 y2.append(y[min(j, len(y)-1)]) x3, y3 = stats.smooth2(x2, y2, 100e3) p = plot(x, y, ymin=0) p.plot(x3, y3, style='lines') p.plot([0, a, a, b, b, length], [popsize, popsize, popsize2, popsize2, popsize, popsize], style='lines') pause()
def test_est_arg_popsize(self): """ Fully sample an ARG from stratch using API """ k = 20 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(2e6) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) popsize = 1e4 popsize2 = 1e4 * .5 a = int(.3 * length) b = int(.7 * length) refine = 0 util.tic("sim ARG") arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=0, end=a) arg = arglib.sample_arg_smc(k, 2 * popsize2, rho, start=a, end=b, init_tree=arg) arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=b, end=length, init_tree=arg) # sim seq mut = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, mut) util.toc() # sample arg util.tic("sample arg") arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times, popsizes=1e4, carg=True) arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) util.toc() x = []; y = [] for (start, end), tree in arglib.iter_tree_tracks(arg2): arglib.remove_single_lineages(tree) x.append(start) y.append(mle_popsize_tree(tree, mintime=0)) # thin popsizes x2 = range(0, length, length//5000); y2 = [] j = 0 for i in range(len(x2)): while j < len(x) and x[j] < x2[i]: j += 1 y2.append(y[min(j, len(y)-1)]) x3, y3 = stats.smooth2(x2, y2, 100e3) p = plot(x, y, ymin=0) p.plot(x3, y3, style='lines') p.plot([0, a, a, b, b, length], [popsize, popsize, popsize2, popsize2, popsize, popsize], style='lines') pause()