def test_prior(self): """ Calculate state priors """ k = 10 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points() arghmm.discretize_arg(arg, times) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) prior = [model.prob_prior(0, j) for j in xrange(model.get_num_states(0))] print prior print sum(map(exp, prior)) fequal(sum(map(exp, prior)), 1.0, rel=.01)
def test_trans_single(self): """ Calculate transition probabilities Only calculate a single matrix """ k = 4 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) print "recomb", arglib.get_recomb_pos(arg) new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) pos = 10 tree = arg.get_marginal_tree(pos) mat = arghmm.calc_transition_probs(tree, model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) print model.states[pos] pc(mat) for row in mat: print sum(map(exp, row))
def test_emit(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) trees, names = arghmm.arg2ctrees(arg, times) seqs2, nseqs, seqlen = arghmm.seqs2cseqs(seqs, names + [new_name]) assert arghmm.arghmm_assert_emit(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_emit(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) trees, names = arghmm.arg2ctrees(arg, times) seqs2, nseqs, seqlen = arghmm.seqs2cseqs(seqs, names + [new_name]) assert arghmm.arghmm_assert_emit(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_trans_single(self): """ Calculate transition probabilities Only calculate a single matrix """ k = 4 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) print "recomb", arglib.get_recomb_pos(arg) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) pos = 10 tree = arg.get_marginal_tree(pos) mat = arghmm.calc_transition_probs( tree, model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) print model.states[pos] pc(mat) for row in mat: print sum(map(exp, row))
def test_prior(self): """ Calculate state priors """ k = 10 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points() arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) prior = [ model.prob_prior(0, j) for j in xrange(model.get_num_states(0)) ] print prior print sum(map(exp, prior)) fequal(sum(map(exp, prior)), 1.0, rel=.01)
def test_post_plot(self): k = 6 n = 1e4 rho = 1.5e-8 * 50 mu = 2.5e-8 * 50 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=30) arghmm.discretize_arg(arg, times) pause() # save #arglib.write_arg("test/data/k4.arg", arg) #fasta.write_fasta("test/data/k4.fa", seqs) new_name = "n%d" % (k - 1) thread = list( arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=times[1], ylog=10) # remove chrom new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [10000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.gnuplot("set linestyle 2") p.plot(high, style="lines") p.gnuplot("set linestyle 2") p.plot(low, style="lines") #write_list("test/data/post_real.txt", cget(thread, 1)) #write_list("test/data/post_high.txt", high) #write_list("test/data/post_low.txt", low) pause()
def test_post_plot(self): k = 6 n = 1e4 rho = 1.5e-8 * 50 mu = 2.5e-8 * 50 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=30) arghmm.discretize_arg(arg, times) pause() # save #arglib.write_arg("test/data/k4.arg", arg) #fasta.write_fasta("test/data/k4.fa", seqs) new_name = "n%d" % (k-1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=times[1], ylog=10) # remove chrom new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [10000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.gnuplot("set linestyle 2") p.plot(high, style="lines") p.gnuplot("set linestyle 2") p.plot(low, style="lines") #write_list("test/data/post_real.txt", cget(thread, 1)) #write_list("test/data/post_high.txt", high) #write_list("test/data/post_low.txt", low) pause()
def test_trans_switch_single(self): """ Calculate transitions probabilities for switching between blocks Only calculate a single matrix """ k = 5 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) #arglib.write_arg("tmp/a.arg", arg) #arg = arglib.read_arg("tmp/a.arg") #arg.set_ancestral() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(5) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) # get recombs recombs = list(x.pos for x in arghmm.iter_visible_recombs(arg)) print "recomb", recombs pos = recombs[0] + 1 tree = arg.get_marginal_tree(pos - .5) last_tree = arg.get_marginal_tree(pos - 1 - .5) print "states1>>", model.states[pos - 1] print "states2>>", model.states[pos] treelib.draw_tree_names(last_tree.get_tree(), minlen=5, maxlen=5) treelib.draw_tree_names(tree.get_tree(), minlen=5, maxlen=5) print "pos>>", pos recomb = [x for x in tree if x.event == "recomb" and x.pos + 1 == pos][0] mat = arghmm.calc_transition_probs_switch(tree, last_tree, recomb.name, model.states[pos - 1], model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) pc(mat)
def test_trans_switch_single(self): """ Calculate transitions probabilities for switching between blocks Only calculate a single matrix """ k = 5 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) #arglib.write_arg("tmp/a.arg", arg) #arg = arglib.read_arg("tmp/a.arg") #arg.set_ancestral() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(5) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) # get recombs recombs = list(x.pos for x in arghmm.iter_visible_recombs(arg)) print "recomb", recombs pos = recombs[0] + 1 tree = arg.get_marginal_tree(pos-.5) last_tree = arg.get_marginal_tree(pos-1-.5) print "states1>>", model.states[pos-1] print "states2>>", model.states[pos] treelib.draw_tree_names(last_tree.get_tree(), minlen=5, maxlen=5) treelib.draw_tree_names(tree.get_tree(), minlen=5, maxlen=5) print "pos>>", pos recomb = [x for x in tree if x.event == "recomb" and x.pos+1 == pos][0] mat = arghmm.calc_transition_probs_switch( tree, last_tree, recomb.name, model.states[pos-1], model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) pc(mat)
def test_state_corr(self): k = 12 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arglib.make_alignment(arg, muts) # remove chrom new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) nstates = len(model.states[0]) prior = [-util.INF] * nstates prior[random.randint(0, nstates)] = 0.0 probs1 = list(arghmm.forward_algorithm(model, length, verbose=True)) probs2 = list( arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) model.rho *= 1e-9 probs3 = list( arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) p = plot(vsubs(probs1[length - 1], mean(probs1[length - 1]))) p.plot(vsubs(probs2[length - 1], mean(probs2[length - 1]))) p.plot(vsubs(probs3[length - 1], mean(probs3[length - 1]))) pause()
def test_backward(self): """ Run backward algorithm """ k = 3 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "recomb", model.recomb_pos print "muts", len(muts) probs = hmm.backward_algorithm(model, length, verbose=True) for pcol in probs: p = sum(map(exp, pcol)) print p, " ".join("%.3f" % f for f in map(exp, pcol))
def test_state_corr(self): k = 12 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arglib.make_alignment(arg, muts) # remove chrom new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) nstates = len(model.states[0]) prior = [-util.INF] * nstates prior[random.randint(0, nstates)] = 0.0 probs1 = list(arghmm.forward_algorithm(model, length, verbose=True)) probs2 = list(arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) model.rho *= 1e-9 probs3 = list(arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) p = plot(vsubs(probs1[length-1], mean(probs1[length-1]))) p.plot(vsubs(probs2[length-1], mean(probs2[length-1]))) p.plot(vsubs(probs3[length-1], mean(probs3[length-1]))) pause()
def test_emit_argmax(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 0.0 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name])) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) nstates = model.get_num_states(1) probs = [0.0 for j in xrange(nstates)] for i in xrange(1, length): if i % 100 == 0: print i for j in xrange(nstates): probs[j] += model.prob_emission(i, j) print # is the maximum likelihood emission matching truth data = sorted(zip(probs, model.states[0]), reverse=True) pc(data[:20]) state = (thread[0][0], times.index(thread[0][1])) print data[0][1], state assert data[0][1] == state
def test_emit_argmax(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 0.0 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k-1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name])) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) nstates = model.get_num_states(1) probs = [0.0 for j in xrange(nstates)] for i in xrange(1, length): if i % 100 == 0: print i for j in xrange(nstates): probs[j] += model.prob_emission(i, j) print # is the maximum likelihood emission matching truth data = sorted(zip(probs, model.states[0]), reverse=True) pc(data[:20]) state = (thread[0][0], times.index(thread[0][1])) print data[0][1], state assert data[0][1] == state
def test_remove_thread(self): """ Remove a leaf from an ARG """ k = 3 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) chrom = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, chrom) recomb = arglib.get_recomb_pos(arg) print "recomb", recomb tree = arg.get_marginal_tree(-.5) draw_tree_names(tree.get_tree(), minlen=5, maxlen=5) print sorted([(x.pos, x.event) for x in tree])
def test_remove_thread(self): """ Remove a leaf from an ARG """ k = 3 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) chrom = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, chrom) recomb = arglib.get_recomb_pos(arg) print "recomb", recomb tree = arg.get_marginal_tree(-.5) draw_tree_names(tree.get_tree(), minlen=5, maxlen=5) print sorted([(x.pos, x.event) for x in tree])
def test_backward(self): """ Run backward algorithm """ k = 3 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "recomb", model.recomb_pos print "muts", len(muts) probs = hmm.backward_algorithm(model, length, verbose=True) for pcol in probs: p = sum(map(exp, pcol)) print p, " ".join("%.3f" % f for f in map(exp, pcol))