def sample_arg_mutations(arg, mu, times=None): """ Simulate mutations on an ARG. Mutations are represented as (node, parent, site, time). arg -- ARG on which to simulate mutations mu -- mutation rate (mutations/site/gen) times -- optional list of discretized time points """ mutations = [] minlen = times[1] * .1 if times else 0.0 for (start, end), tree in arglib.iter_local_trees(arg): arglib.remove_single_lineages(tree) for node in tree: if not node.parents: continue blen = max(node.get_dist(), minlen) rate = blen * mu i = start while i < end: i += random.expovariate(rate) if i < end: t = random.uniform(node.age, node.age + blen) mutations.append((node, node.parents[0], int(i), t)) return mutations
def test_local_trees(self): rho = 1.5e-8 # recomb/site/gen l = 10000 # length of locus k = 10 # number of lineages n = 2*1e4 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) blocks1 = util.cget(arglib.iter_local_trees(arg, 200, 1200), 0) blocks2 = list(arglib.iter_recomb_blocks(arg, 200, 1200)) self.assertEqual(blocks1, blocks2)
def test_marginal_leaves(self): rho = 1.5e-8 # recomb/site/gen l = 10000 # length of locus k = 10 # number of lineages n = 2*10000 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) for (start, end), tree in arglib.iter_local_trees(arg): arglib.remove_single_lineages(tree) mid = (start + end) / 2.0 for node in tree: a = set(tree.leaves(node)) b = set(arglib.get_marginal_leaves(arg, node, mid)) self.assertEqual(a, b)
def _test_prog_infsites(): make_clean_dir("test/data/test_prog_infsites") run_cmd("""bin/arg-sim \ -k 40 -L 200000 \ -N 1e4 -r 1.5e-8 -m 2.5e-8 --infsites \ --ntimes 20 --maxtime 400e3 \ -o test/data/test_prog_infsites/0""") make_clean_dir("test/data/test_prog_infsites/0.sample") run_cmd("""bin/arg-sample \ -s test/data/test_prog_infsites/0.sites \ -N 1e4 -r 1.5e-8 -m 2.5e-8 \ --ntimes 5 --maxtime 100e3 -c 1 \ --climb 0 -n 20 --infsites \ -x 1 \ -o test/data/test_prog_infsites/0.sample/out""") arg = argweaver.read_arg( "test/data/test_prog_infsites/0.sample/out.0.smc.gz") sites = argweaver.read_sites("test/data/test_prog_infsites/0.sites") print "names", sites.names print noncompats = [] for block, tree in arglib.iter_local_trees(arg): tree = tree.get_tree() treelib.remove_single_children(tree) phylo.hash_order_tree(tree) for pos, col in sites.iter_region(block[0]+1, block[1]+1): assert block[0]+1 <= pos <= block[1]+1, (block, pos) split = sites_split(sites.names, col) node = arglib.split_to_tree_branch(tree, split) if node is None: noncompats.append(pos) print "noncompat", block, pos, col print phylo.hash_tree(tree) print tree.leaf_names() print "".join(col[sites.names.index(name)] for name in tree.leaf_names()) print split print print "num noncompats", len(noncompats)
def _test_prog_infsites(): make_clean_dir("test/tmp/test_prog_infsites") run_cmd("""bin/arg-sim \ -k 40 -L 200000 \ -N 1e4 -r 1.5e-8 -m 2.5e-8 --infsites \ --ntimes 20 --maxtime 400e3 \ -o test/tmp/test_prog_infsites/0""") make_clean_dir("test/tmp/test_prog_infsites/0.sample") run_cmd("""bin/arg-sample \ -s test/tmp/test_prog_infsites/0.sites \ -N 1e4 -r 1.5e-8 -m 2.5e-8 \ --ntimes 5 --maxtime 100e3 -c 1 \ --climb 0 -n 20 --infsites \ -x 1 \ -o test/tmp/test_prog_infsites/0.sample/out""") arg = argweaver.read_arg( "test/tmp/test_prog_infsites/0.sample/out.0.smc.gz") sites = argweaver.read_sites("test/tmp/test_prog_infsites/0.sites") print "names", sites.names print noncompats = [] for block, tree in arglib.iter_local_trees(arg): tree = tree.get_tree() treelib.remove_single_children(tree) phylo.hash_order_tree(tree) for pos, col in sites.iter_region(block[0] + 1, block[1] + 1): assert block[0] + 1 <= pos <= block[1] + 1, (block, pos) split = sites_split(sites.names, col) node = arglib.split_to_tree_branch(tree, split) if node is None: noncompats.append(pos) print "noncompat", block, pos, col print phylo.hash_tree(tree) print tree.leaf_names() print "".join(col[sites.names.index(name)] for name in tree.leaf_names()) print split print print "num noncompats", len(noncompats)
def arg_equal(arg, arg2): # test recomb points recombs = sorted(x.pos for x in arg if x.event == "recomb") recombs2 = sorted(x.pos for x in arg2 if x.event == "recomb") nose.tools.assert_equal(recombs, recombs2) # check local tree topologies for (start, end), tree in arglib.iter_local_trees(arg): pos = (start + end) / 2.0 arglib.remove_single_lineages(tree) tree1 = tree.get_tree() tree2 = arg2.get_marginal_tree(pos) arglib.remove_single_lineages(tree2) tree2 = tree2.get_tree() hash1 = phylo.hash_tree(tree1) hash2 = phylo.hash_tree(tree2) nose.tools.assert_equal(hash1, hash2) # check sprs sprs1 = arglib.iter_arg_sprs(arg, use_leaves=True) sprs2 = arglib.iter_arg_sprs(arg2, use_leaves=True) for (pos1, recomb1, coal1), (pos2, recomb2, coal2) in zip(sprs1, sprs2): recomb1 = (sorted(recomb1[0]), recomb1[1]) recomb2 = (sorted(recomb2[0]), recomb2[1]) coal1 = (sorted(coal1[0]), coal1[1]) coal2 = (sorted(coal2[0]), coal2[1]) # check pos, leaves, time nose.tools.assert_equal(pos1, pos2) nose.tools.assert_equal(recomb1, recomb2) nose.tools.assert_equal(coal1, coal2)
def arg_equal(arg, arg2): # test recomb points recombs = sorted(x.pos for x in arg if x.event == "recomb") recombs2 = sorted(x.pos for x in arg2 if x.event == "recomb") nose.tools.assert_equal(recombs, recombs2) # check local tree topologies for (start, end), tree in arglib.iter_local_trees(arg): pos = (start + end) / 2.0 arglib.remove_single_lineages(tree) tree1 = tree.get_tree() tree2 = arg2.get_marginal_tree(pos) arglib.remove_single_lineages(tree2) tree2 = tree2.get_tree() hash1 = phylo.hash_tree(tree1) hash2 = phylo.hash_tree(tree2) nose.tools.assert_equal(hash1, hash2) # check sprs sprs1 = arglib.iter_arg_sprs(arg, use_leaves=True) sprs2 = arglib.iter_arg_sprs(arg2, use_leaves=True) for (pos1, recomb1, coal1), (pos2, recomb2, coal2) in izip(sprs1, sprs2): recomb1 = (sorted(recomb1[0]), recomb1[1]) recomb2 = (sorted(recomb2[0]), recomb2[1]) coal1 = (sorted(coal1[0]), coal1[1]) coal2 = (sorted(coal2[0]), coal2[1]) # check pos, leaves, time nose.tools.assert_equal(pos1, pos2) nose.tools.assert_equal(recomb1, recomb2) nose.tools.assert_equal(coal1, coal2)