Exemplo n.º 1
0
def sample_arg_mutations(arg, mu, times=None):
    """
    Simulate mutations on an ARG.

    Mutations are represented as (node, parent, site, time).

    arg -- ARG on which to simulate mutations
    mu -- mutation rate (mutations/site/gen)
    times -- optional list of discretized time points
    """
    mutations = []
    minlen = times[1] * .1 if times else 0.0

    for (start, end), tree in arglib.iter_local_trees(arg):
        arglib.remove_single_lineages(tree)
        for node in tree:
            if not node.parents:
                continue
            blen = max(node.get_dist(), minlen)
            rate = blen * mu
            i = start
            while i < end:
                i += random.expovariate(rate)
                if i < end:
                    t = random.uniform(node.age, node.age + blen)
                    mutations.append((node, node.parents[0], int(i), t))
    return mutations
Exemplo n.º 2
0
def sample_arg_mutations(arg, mu, times=None):
    """
    Simulate mutations on an ARG.

    Mutations are represented as (node, parent, site, time).

    arg -- ARG on which to simulate mutations
    mu -- mutation rate (mutations/site/gen)
    times -- optional list of discretized time points
    """
    mutations = []
    minlen = times[1] * .1 if times else 0.0

    for (start, end), tree in arglib.iter_local_trees(arg):
        arglib.remove_single_lineages(tree)
        for node in tree:
            if not node.parents:
                continue
            blen = max(node.get_dist(), minlen)
            rate = blen * mu
            i = start
            while i < end:
                i += random.expovariate(rate)
                if i < end:
                    t = random.uniform(node.age, node.age + blen)
                    mutations.append((node, node.parents[0], int(i), t))
    return mutations
Exemplo n.º 3
0
    def test_local_trees(self):

        rho = 1.5e-8   # recomb/site/gen
        l = 10000      # length of locus
        k = 10         # number of lineages
        n = 2*1e4      # effective popsize

        arg = arglib.sample_arg(k, n, rho, 0, l)
        blocks1 = util.cget(arglib.iter_local_trees(arg, 200, 1200), 0)
        blocks2 = list(arglib.iter_recomb_blocks(arg, 200, 1200))
        self.assertEqual(blocks1, blocks2)
Exemplo n.º 4
0
    def test_marginal_leaves(self):

        rho = 1.5e-8   # recomb/site/gen
        l = 10000      # length of locus
        k = 10         # number of lineages
        n = 2*10000    # effective popsize

        arg = arglib.sample_arg(k, n, rho, 0, l)

        for (start, end), tree in arglib.iter_local_trees(arg):
            arglib.remove_single_lineages(tree)
            mid = (start + end) / 2.0
            for node in tree:
                a = set(tree.leaves(node))
                b = set(arglib.get_marginal_leaves(arg, node, mid))
                self.assertEqual(a, b)
Exemplo n.º 5
0
def _test_prog_infsites():

    make_clean_dir("test/data/test_prog_infsites")

    run_cmd("""bin/arg-sim \
        -k 40 -L 200000 \
        -N 1e4 -r 1.5e-8 -m 2.5e-8 --infsites \
        --ntimes 20 --maxtime 400e3 \
        -o test/data/test_prog_infsites/0""")

    make_clean_dir("test/data/test_prog_infsites/0.sample")
    run_cmd("""bin/arg-sample \
        -s test/data/test_prog_infsites/0.sites \
        -N 1e4 -r 1.5e-8 -m 2.5e-8 \
        --ntimes 5 --maxtime 100e3 -c 1 \
        --climb 0 -n 20 --infsites \
        -x 1 \
        -o test/data/test_prog_infsites/0.sample/out""")

    arg = argweaver.read_arg(
        "test/data/test_prog_infsites/0.sample/out.0.smc.gz")
    sites = argweaver.read_sites("test/data/test_prog_infsites/0.sites")
    print "names", sites.names
    print

    noncompats = []
    for block, tree in arglib.iter_local_trees(arg):
        tree = tree.get_tree()
        treelib.remove_single_children(tree)
        phylo.hash_order_tree(tree)
        for pos, col in sites.iter_region(block[0]+1, block[1]+1):
            assert block[0]+1 <= pos <= block[1]+1, (block, pos)
            split = sites_split(sites.names, col)
            node = arglib.split_to_tree_branch(tree, split)
            if node is None:
                noncompats.append(pos)
                print "noncompat", block, pos, col
                print phylo.hash_tree(tree)
                print tree.leaf_names()
                print "".join(col[sites.names.index(name)]
                              for name in tree.leaf_names())
                print split
                print
    print "num noncompats", len(noncompats)
Exemplo n.º 6
0
def _test_prog_infsites():

    make_clean_dir("test/tmp/test_prog_infsites")

    run_cmd("""bin/arg-sim \
        -k 40 -L 200000 \
        -N 1e4 -r 1.5e-8 -m 2.5e-8 --infsites \
        --ntimes 20 --maxtime 400e3 \
        -o test/tmp/test_prog_infsites/0""")

    make_clean_dir("test/tmp/test_prog_infsites/0.sample")
    run_cmd("""bin/arg-sample \
        -s test/tmp/test_prog_infsites/0.sites \
        -N 1e4 -r 1.5e-8 -m 2.5e-8 \
        --ntimes 5 --maxtime 100e3 -c 1 \
        --climb 0 -n 20 --infsites \
        -x 1 \
        -o test/tmp/test_prog_infsites/0.sample/out""")

    arg = argweaver.read_arg(
        "test/tmp/test_prog_infsites/0.sample/out.0.smc.gz")
    sites = argweaver.read_sites("test/tmp/test_prog_infsites/0.sites")
    print "names", sites.names
    print

    noncompats = []
    for block, tree in arglib.iter_local_trees(arg):
        tree = tree.get_tree()
        treelib.remove_single_children(tree)
        phylo.hash_order_tree(tree)
        for pos, col in sites.iter_region(block[0] + 1, block[1] + 1):
            assert block[0] + 1 <= pos <= block[1] + 1, (block, pos)
            split = sites_split(sites.names, col)
            node = arglib.split_to_tree_branch(tree, split)
            if node is None:
                noncompats.append(pos)
                print "noncompat", block, pos, col
                print phylo.hash_tree(tree)
                print tree.leaf_names()
                print "".join(col[sites.names.index(name)]
                              for name in tree.leaf_names())
                print split
                print
    print "num noncompats", len(noncompats)
Exemplo n.º 7
0
def arg_equal(arg, arg2):

    # test recomb points
    recombs = sorted(x.pos for x in arg if x.event == "recomb")
    recombs2 = sorted(x.pos for x in arg2 if x.event == "recomb")
    nose.tools.assert_equal(recombs, recombs2)

    # check local tree topologies
    for (start, end), tree in arglib.iter_local_trees(arg):
        pos = (start + end) / 2.0

        arglib.remove_single_lineages(tree)
        tree1 = tree.get_tree()

        tree2 = arg2.get_marginal_tree(pos)
        arglib.remove_single_lineages(tree2)
        tree2 = tree2.get_tree()

        hash1 = phylo.hash_tree(tree1)
        hash2 = phylo.hash_tree(tree2)
        nose.tools.assert_equal(hash1, hash2)

    # check sprs
    sprs1 = arglib.iter_arg_sprs(arg, use_leaves=True)
    sprs2 = arglib.iter_arg_sprs(arg2, use_leaves=True)

    for (pos1, recomb1, coal1), (pos2, recomb2, coal2) in zip(sprs1, sprs2):
        recomb1 = (sorted(recomb1[0]), recomb1[1])
        recomb2 = (sorted(recomb2[0]), recomb2[1])
        coal1 = (sorted(coal1[0]), coal1[1])
        coal2 = (sorted(coal2[0]), coal2[1])

        # check pos, leaves, time
        nose.tools.assert_equal(pos1, pos2)
        nose.tools.assert_equal(recomb1, recomb2)
        nose.tools.assert_equal(coal1, coal2)
Exemplo n.º 8
0
def arg_equal(arg, arg2):

    # test recomb points
    recombs = sorted(x.pos for x in arg if x.event == "recomb")
    recombs2 = sorted(x.pos for x in arg2 if x.event == "recomb")
    nose.tools.assert_equal(recombs, recombs2)

    # check local tree topologies
    for (start, end), tree in arglib.iter_local_trees(arg):
        pos = (start + end) / 2.0

        arglib.remove_single_lineages(tree)
        tree1 = tree.get_tree()

        tree2 = arg2.get_marginal_tree(pos)
        arglib.remove_single_lineages(tree2)
        tree2 = tree2.get_tree()

        hash1 = phylo.hash_tree(tree1)
        hash2 = phylo.hash_tree(tree2)
        nose.tools.assert_equal(hash1, hash2)

    # check sprs
    sprs1 = arglib.iter_arg_sprs(arg, use_leaves=True)
    sprs2 = arglib.iter_arg_sprs(arg2, use_leaves=True)

    for (pos1, recomb1, coal1), (pos2, recomb2, coal2) in izip(sprs1, sprs2):
        recomb1 = (sorted(recomb1[0]), recomb1[1])
        recomb2 = (sorted(recomb2[0]), recomb2[1])
        coal1 = (sorted(coal1[0]), coal1[1])
        coal2 = (sorted(coal2[0]), coal2[1])

        # check pos, leaves, time
        nose.tools.assert_equal(pos1, pos2)
        nose.tools.assert_equal(recomb1, recomb2)
        nose.tools.assert_equal(coal1, coal2)