Пример #1
0
def run_simulate(args):
    """
    Runs the simulation and outputs the results in text.
    """
    n = args.sample_size
    m = args.num_loci
    rho = args.recombination_rate
    num_populations = args.num_populations
    migration_matrix = [[
        args.migration_rate * int(j != k) for j in range(num_populations)
    ] for k in range(num_populations)]
    sample_configuration = [0 for j in range(num_populations)]
    population_growth_rates = [0 for j in range(num_populations)]
    population_sizes = [1 for j in range(num_populations)]
    sample_configuration[0] = n
    if args.sample_configuration is not None:
        sample_configuration = args.sample_configuration
    if args.population_growth_rates is not None:
        population_growth_rates = args.population_growth_rates
    if args.population_sizes is not None:
        population_sizes = args.population_sizes
    random.seed(args.random_seed)
    s = Simulator(n, m, rho, migration_matrix, sample_configuration,
                  population_growth_rates, population_sizes,
                  args.population_growth_rate_change,
                  args.population_size_change,
                  args.migration_matrix_element_change, args.bottleneck, 10000)
    s.simulate()
    nodes_file = StringIO()
    edgesets_file = StringIO()
    s.write_text(nodes_file, edgesets_file)
    nodes_file.seek(0)
    edgesets_file.seek(0)
    ts = msprime.load_text(nodes_file, edgesets_file)
    process_trees(ts)
Пример #2
0
 def test_node_times_stable(self):
     # build initial tree sequence with just a, b, c
     nodes = six.StringIO("""\
     id      is_sample   population      time
     0       0           -1              1.00000000000000
     1       1           -1              0.00000000000000
     2       1           -1              0.00000000000000
     """)
     edges = six.StringIO("""\
     id      left            right           parent  child
     0       0.00000000      1.00000000      0       1
     1       0.00000000      1.00000000      0       2
     """)
     init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)
     first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]}
     arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0)
     self.f(arg, 'b', 'a', 0.9, 'd', 2.0)
     self.f(arg, 'a', 'c', 0.1, 'e', 2.0)
     self.f(arg, 'd', 'e', 0.7, 'f', 3.0)
     self.f(arg, 'f', 'd', 0.8, 'g', 4.0)
     self.f(arg, 'e', 'f', 0.2, 'h', 4.0)
     self.f(arg, 'b', 'g', 0.6, 'i', 5.0)
     self.f(arg, 'g', 'h', 0.5, 'j', 5.0)
     self.f(arg, 'c', 'h', 0.4, 'k', 5.0)
     arg.update_times()
     node_times = {u: arg.nodes.time[arg.node_ids[u]] for u in arg.node_ids}
     print(arg)
     arg.simplify(self.sample_input_ids)
     print(arg)
     new_node_times = {
         u: arg.nodes.time[arg.node_ids[u]]
         for u in arg.node_ids
     }
     for u in self.sample_input_ids:
         self.assertEqual(node_times[u], new_node_times[u])
Пример #3
0
    def test_intermediate_simplify(self):
        # build initial tree sequence with just a, b, c
        nodes = six.StringIO("""\
        id      is_sample   population      time
        0       0           -1              1.00000000000000
        1       1           -1              0.00000000000000
        2       1           -1              0.00000000000000
        """)
        edges = six.StringIO("""\
        id      left            right           parent  children
        0       0.00000000      1.00000000      0       1,2
        """)
        init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)

        first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]}
        arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0)
        self.f(arg, 'b', 'a', 0.9, 'd', 2.0)
        self.f(arg, 'a', 'c', 0.1, 'e', 2.0)
        self.f(arg, 'd', 'e', 0.7, 'f', 3.0)
        self.f(arg, 'f', 'd', 0.8, 'g', 4.0)
        # simplify
        print(arg)
        arg.simplify(samples=[self.ids[u] for u in ['b', 'c', 'e', 'f', 'g']])
        print(arg)
        self.f(arg, 'e', 'f', 0.2, 'h', 4.0)
        self.f(arg, 'b', 'g', 0.6, 'i', 5.0)
        self.f(arg, 'g', 'h', 0.5, 'j', 5.0)
        self.f(arg, 'c', 'h', 0.4, 'k', 5.0)
        print(arg)
        tss = arg.tree_sequence(self.sample_input_ids)
        self.check_trees(tss, self.true_tss)
Пример #4
0
 def ts_txts_to_trees(ts_nodes, ts_edges, trees_outname=None):
     import shutil
     import msprime
     logging.info("== Converting new ts ARG to .trees ===")
     try:
         ts = msprime.load_text(nodes=ts_nodes, edges=ts_edges)
     except:
         logging.warning(
             "Can't load the texts file properly. Saved copied to 'bad.nodes' & 'bad.edges' for inspection"
         )
         shutil.copyfile(ts_nodes.name, "bad.nodes")
         shutil.copyfile(ts_edges.name, "bad.edges")
         raise
     logging.info("== loaded {}, {}===".format(ts_nodes.name,
                                               ts_edges.name))
     try:
         simple_ts = ts.simplify()
     except:
         ts.dump("bad.trees")
         logging.warning(
             "Can't simplify. .trees file dumped to 'bad.trees'")
         raise
     if trees_outname:
         simple_ts.dump(trees_outname)
     return (simple_ts)
Пример #5
0
 def simple_ex(self):
     # this will begin with a single diploid indiv
     nodes = six.StringIO("""\
     id      is_sample   population      time
     0       0           0               1.00000000000000
     1       1           1               0.00000000000000
     2       1           2               0.00000000000000
     """)
     edges = six.StringIO("""\
     id      left            right           parent  child
     0       0.00000000      3.00000000      0       1
     1       0.00000000      3.00000000      0       2
     """)
     init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)
     # diploid 0 maps initially to haploids 1 and 2 in init_ts:
     node_ids = {(0, 0): 1, (0, 1): 2}
     locus_position = [0.0, 1.0, 2.0, 3.0]
     rc = ftprime.RecombCollector(ts=init_ts,
                                  node_ids=node_ids,
                                  locus_position=locus_position,
                                  benchmark=True)
     assert rc.mode == 'text'
     rc2 = ftprime.RecombCollector(ts=init_ts,
                                   node_ids=node_ids,
                                   locus_position=locus_position,
                                   benchmark=True,
                                   mode='binary')
     assert rc2.mode == 'binary'
     return rc, node_ids
Пример #6
0
def main(ts, fastARG_executable, fa_in, fa_out, nodes_fh, edges_fh, sites_fh,
         muts_fh):
    """
    This is just to test if fastarg produces the same haplotypes
    """
    import subprocess
    seq_len = ts.get_sequence_length()
    ts_to_fastARG_in(ts, fa_in)
    subprocess.call([fastARG_executable, 'build', fa_in.name], stdout=fa_out)
    fastARG_out_to_ts_txts(fa_out,
                           variant_positions_from_fastARGin(fa_in),
                           nodes_fh,
                           edges_fh,
                           sites_fh,
                           muts_fh,
                           seq_len=seq_len)

    new_ts = msprime.load_text(nodes=nodes_fh,
                               edges=edges_fh,
                               sites=sites_fh,
                               mutations=muts_fh)
    simple_ts = new_ts.simplify()
    logging.debug(
        "Simplified num_records should always be < unsimplified num_records.\n"
        "For low mutationRate:recombinationRate ratio,"
        " the initial num records will probably be higher than the"
        " fastarg num_records, as the original simulation will have records"
        " which leave no mutational trace. As the mutation rate increases,"
        " we expect the fastarg num_records to equal, then exceed the original"
        " as fastarg starts inferring the wrong (less parsimonious) set of trees"
    )
    logging.debug(
        "Initial num records = {}, fastARG (simplified) = {}, fastARG (unsimplified) = {}"
        .format(ts.get_num_records(), simple_ts.get_num_records(),
                new_ts.get_num_records()))
Пример #7
0
    def test_build_ts(self):
        # build initial tree sequence with just a, b, c
        nodes = six.StringIO("""\
        id      is_sample   population      time
        0       0           -1              1.00000000000000
        1       1           -1              0.00000000000000
        2       1           -1              0.00000000000000
        """)
        edges = six.StringIO("""\
        id      left            right           parent  child
        0       0.00000000      1.00000000      0       1
        1       0.00000000      1.00000000      0       2
        """)
        init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)

        first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]}
        arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0)
        # 1. Begin with an individual `a` (and another anonymous one) at `t=0`.
        # taken care of in init_ts
        # arg.add_individual(self.ids['a'], 0.0)
        # # 2. `(a,?,1.0)->b` and `(a,?,1.0)->c` at `t=1`
        # self.f(arg, 'a', 'z', 1.0, 'b', 1.0)
        # self.f(arg, 'a', 'z', 1.0, 'c', 1.0)
        # 3. `(b,a,0.9)->d` and `(a,c,0.1)->e` and then `a` dies at `t=2`
        self.f(arg, 'b', 'a', 0.9, 'd', 2.0)
        self.f(arg, 'a', 'c', 0.1, 'e', 2.0)
        # 4. `(d,e,0.7)->f` at `t=3`
        self.f(arg, 'd', 'e', 0.7, 'f', 3.0)
        # 5. `(f,d,0.8)->g` and `(e,f,0.2)->h` at `t=4`.
        self.f(arg, 'f', 'd', 0.8, 'g', 4.0)
        self.f(arg, 'e', 'f', 0.2, 'h', 4.0)
        # 6. `(b,g,0.6)->i` and `(g,h,0.5)->j` and `(c,h,0.4)->k` at `t=5`.
        self.f(arg, 'b', 'g', 0.6, 'i', 5.0)
        self.f(arg, 'g', 'h', 0.5, 'j', 5.0)
        self.f(arg, 'c', 'h', 0.4, 'k', 5.0)
        # 7. We sample `i`, `j` and `k`.
        arg.mark_samples(samples=self.sample_input_ids)
        arg.update_times()

        arg_ids = {k: arg.node_ids[self.ids[k]] for k in self.ids}
        self.assertEqual(arg.tables.nodes.num_rows, len(self.ids))
        self.assertEqual(arg.max_time, 5.0)
        for x in self.ids:
            self.assertEqual(arg.tables.nodes.time[arg_ids[x]],
                             5.0 - self.true_times[self.ids[x]])
            if x in self.sample_ids:
                self.assertEqual(arg.tables.nodes.flags[arg_ids[x]],
                                 msprime.NODE_IS_SAMPLE)
            else:
                self.assertEqual(arg.tables.nodes.flags[arg_ids[x]], 0)

        tss = arg.tree_sequence(self.sample_input_ids)

        self.check_trees(tss, self.true_tss)
Пример #8
0
 def test_stick_tree(self):
     nodes = six.StringIO("""\
     id  is_sample   time
     0   1           0
     1   1           1
     2   1           2
     """)
     edges = six.StringIO("""\
     left    right   parent  child
     0       1       1       0
     0       1       2       1
     """)
     tree = ("2\n" "┃\n" "1\n" "┃\n" "0\n")
     ts = msprime.load_text(nodes, edges, strict=False)
     t = next(ts.trees())
     self.verify_text_rendering(t, tree)
Пример #9
0
 def test_trident_tree(self):
     nodes = six.StringIO("""\
     id  is_sample   time
     0   1           0
     1   1           0
     2   1           0
     3   1           2
     """)
     edges = six.StringIO("""\
     left    right   parent  child
     0       1       3       0
     0       1       3       1
     0       1       3       2
     """)
     tree = ("  3  \n" "┏━╋━┓\n" "0 1 2\n")
     ts = msprime.load_text(nodes, edges, strict=False)
     t = next(ts.trees())
     self.verify_text_rendering(t, tree)
Пример #10
0
def fastARG_out_to_ts(fastARG_out_filehandle, variant_positions, seq_len=None):
    """
    The same as fastARG_out_to_msprime_txts, but use temporary files and return a ts.
    """
    with tempfile.NamedTemporaryFile("w+") as nodes, \
        tempfile.NamedTemporaryFile("w+") as edges, \
        tempfile.NamedTemporaryFile("w+") as sites, \
        tempfile.NamedTemporaryFile("w+") as mutations:
        fastARG_out_to_ts_txts(fastARG_out_filehandle,
                               variant_positions,
                               nodes,
                               edges,
                               sites,
                               mutations,
                               seq_len=seq_len)
        ts = msprime.load_text(nodes=nodes,
                               edges=edges,
                               sites=sites,
                               mutations=mutations).simplify()
        return ts
Пример #11
0
 def test_simple_tree(self):
     nodes = six.StringIO("""\
     id  is_sample   time
     0   1           0
     1   1           0
     2   1           2
     """)
     edges = six.StringIO("""\
     left    right   parent  child
     0       1       2       0
     0       1       2       1
     """)
     tree = (
         " 2 \n"
         "┏┻┓\n"
         "0 1")
     ts = msprime.load_text(nodes, edges, strict=False)
     t = next(ts.trees())
     drawn = t.draw(format="unicode")
     self.verify_text_rendering(drawn, tree)
Пример #12
0
 def test_pitchfork_tree(self):
     nodes = six.StringIO("""\
     id  is_sample   time
     0   1           0
     1   1           0
     2   1           0
     3   1           0
     4   1           2
     """)
     edges = six.StringIO("""\
     left    right   parent  child
     0       1       4       0
     0       1       4       1
     0       1       4       2
     0       1       4       3
     """)
     tree = ("   4   \n" "┏━┳┻┳━┓\n" "0 1 2 3\n")
     ts = msprime.load_text(nodes, edges, strict=False)
     t = next(ts.trees())
     self.verify_text_rendering(t, tree)
Пример #13
0
 def test_odd_num_children_tree(self):
     nodes = six.StringIO("""\
     id  is_sample   time
     0   1           0
     1   1           1
     2   1           2
     3   1           1
     4   1           4
     5   1           5
     """)
     edges = six.StringIO("""\
     left    right   parent  child
     0       1       5       0
     0       1       5       1
     0       1       5       2
     0       1       5       3
     0       1       5       4
     """)
     ts = msprime.load_text(nodes, edges, strict=False)
     t = next(ts.trees())
     text = t.draw(format=self.drawing_format)
     self.verify_basic_text(text)
Пример #14
0
 def test_pitchfork_tree(self):
     nodes = six.StringIO("""\
     id  is_sample   time
     0   1           0
     1   1           0
     2   1           0
     3   1           0
     4   1           2
     """)
     edges = six.StringIO("""\
     left    right   parent  child
     0       1       4       0
     0       1       4       1
     0       1       4       2
     0       1       4       3
     """)
     tree = (
         "   4   \n"
         "┏━┳┻┳━┓\n"
         "0 1 2 3\n")
     ts = msprime.load_text(nodes, edges, strict=False)
     t = next(ts.trees())
     # No labels
     tree = (
         "   ┃   \n"
         "┏━┳┻┳━┓\n"
         "┃ ┃ ┃ ┃\n")
     drawn = t.draw(format="unicode", node_labels={})
     self.verify_text_rendering(drawn, tree)
     # Some lables
     tree = (
         "   ┃   \n"
         "┏━┳┻┳━┓\n"
         "0 ┃ ┃ 3\n")
     drawn = t.draw(format="unicode", node_labels={0: "0", 3: "3"})
     self.verify_text_rendering(drawn, tree)
Пример #15
0
    def test_case_2(self):
        # Here are the trees:
        # t                  |              |              |             |
        #
        # 0       --3--      |     --3--    |     --3--    |    --3--    |    --3--
        #        /  |  \     |    /  |  \   |    /     \   |   /     \   |   /     \
        # 1     4   |   5    |   4   |   5  |   4       5  |  4       5  |  4       5
        #       |\ / \ /|    |   |\   \     |   |\     /   |  |\     /   |  |\     /|
        # 2     | 6   7 |    |   | 6   7    |   | 6   7    |  | 6   7    |  | 6   7 |
        #       | |\ /| |    |   |  \  |    |   |  \  |    |  |  \       |  |  \    | ...
        # 3     | | 8 | |    |   |   8 |    |   |   8 |    |  |   8      |  |   8   |
        #       | |/ \| |    |   |  /  |    |   |  /  |    |  |  / \     |  |  / \  |
        # 4     | 9  10 |    |   | 9  10    |   | 9  10    |  | 9  10    |  | 9  10 |
        #       |/ \ / \|    |   |  \   \   |   |  \   \   |  |  \   \   |  |  \    |
        # 5     0   1   2    |   0   1   2  |   0   1   2  |  0   1   2  |  0   1   2
        #
        #                    |   0.0 - 0.1  |   0.1 - 0.2  |  0.2 - 0.4  |  0.4 - 0.5
        # ... continued:
        # t                  |             |             |             |
        #
        # 0         --3--    |    --3--    |    --3--    |    --3--    |    --3--
        #          /     \   |   /     \   |   /     \   |   /     \   |   /  |  \
        # 1       4       5  |  4       5  |  4       5  |  4       5  |  4   |   5
        #         |\     /|  |   \     /|  |   \     /|  |   \     /|  |     /   /|
        # 2       | 6   7 |  |    6   7 |  |    6   7 |  |    6   7 |  |    6   7 |
        #         |  \    |  |     \    |  |       /  |  |    |  /  |  |    |  /  |
        # 3  ...  |   8   |  |      8   |  |      8   |  |    | 8   |  |    | 8   |
        #         |  / \  |  |     / \  |  |     / \  |  |    |  \  |  |    |  \  |
        # 4       | 9  10 |  |    9  10 |  |    9  10 |  |    9  10 |  |    9  10 |
        #         |    /  |  |   /   /  |  |   /   /  |  |   /   /  |  |   /   /  |
        # 5       0   1   2  |  0   1   2  |  0   1   2  |  0   1   2  |  0   1   2
        #
        #         0.5 - 0.6  |  0.6 - 0.7  |  0.7 - 0.8  |  0.8 - 0.9  |  0.9 - 1.0

        # divergence betw 0 and 1
        true_diversity_01 = 2 * (0.6 * 4 + 0.2 * 2 + 0.2 * 5)
        # divergence betw 1 and 2
        true_diversity_12 = 2 * (0.2 * 5 + 0.2 * 2 + 0.3 * 5 + 0.3 * 4)
        # divergence betw 0 and 2
        true_diversity_02 = 2 * (0.2 * 5 + 0.2 * 4 + 0.3 * 5 + 0.1 * 4 +
                                 0.2 * 5)
        # mean divergence between 0, 1 and 0, 2
        true_mean_diversity = (0 + true_diversity_02 + true_diversity_01 +
                               true_diversity_12) / 4
        # Y(0;1, 2)
        true_Y = 0.2 * 4 + 0.2 * (4 + 2) + 0.2 * 4 + 0.2 * 2 + 0.2 * (5 + 1)

        nodes = six.StringIO("""\
        is_sample       time    population
        1       0.000000        0
        1       0.000000        0
        1       0.000000        0
        0       5.000000        0
        0       4.000000        0
        0       4.000000        0
        0       3.000000        0
        0       3.000000        0
        0       2.000000        0
        0       1.000000        0
        0       1.000000        0
        """)
        edgesets = six.StringIO("""\
        left    right   parent  children
        0.500000        1.000000        10      1
        0.000000        0.400000        10      2
        0.600000        1.000000        9       0
        0.000000        0.500000        9       1
        0.800000        1.000000        8       10
        0.200000        0.800000        8       9,10
        0.000000        0.200000        8       9
        0.700000        1.000000        7       8
        0.000000        0.200000        7       10
        0.800000        1.000000        6       9
        0.000000        0.700000        6       8
        0.400000        1.000000        5       2,7
        0.100000        0.400000        5       7
        0.600000        0.900000        4       6
        0.000000        0.600000        4       0,6
        0.900000        1.000000        3       4,5,6
        0.100000        0.900000        3       4,5
        0.000000        0.100000        3       4,5,7
        """)
        ts = msprime.load_text(nodes=nodes, edgesets=edgesets)

        self.check_pairwise_diversity(ts)
        self.check_pairwise_diversity_mutations(ts)
        self.check_Y_stat(ts)
        self.check_vectorization(ts)

        # divergence between 0 and 1
        A = [[0], [1]]

        def f(x):
            return (x[0] > 0) != (x[1] > 0)

        # branch lengths:
        self.assertAlmostEqual(branch_length_diversity(ts, [0], [1]),
                               true_diversity_01)
        self.assertAlmostEqual(ts.branch_stats(A, f), true_diversity_01)
        self.assertAlmostEqual(branch_stats_node_iter(ts, A, f),
                               true_diversity_01)

        # mean divergence between 0, 1 and 0, 2
        A = [[0, 1], [0, 2]]
        n = [len(a) for a in A]

        def f(x):
            return float(x[0] * (n[1] - x[1]) + (n[0] - x[0]) * x[1]) / 4.0

        # branch lengths:
        self.assertAlmostEqual(branch_length_diversity(ts, A[0], A[1]),
                               true_mean_diversity)
        self.assertAlmostEqual(ts.branch_stats(A, f), true_mean_diversity)
        self.assertAlmostEqual(branch_stats_node_iter(ts, A, f),
                               true_mean_diversity)

        # Y-statistic for (0/12)
        A = [[0], [1, 2]]

        def f(x):
            return ((x[0] == 1) and (x[1] == 0)) or ((x[0] == 0) and
                                                     (x[1] == 2))

        # branch lengths:
        self.assertAlmostEqual(branch_length_Y(ts, 0, 1, 2), true_Y)
        self.assertAlmostEqual(ts.branch_stats(A, f), true_Y)
        self.assertAlmostEqual(branch_stats_node_iter(ts, A, f), true_Y)
Пример #16
0
class BasicTestCase(FtprimeTestCase):
    """
    Test basic operations.
    """
    nodes = six.StringIO("""\
    id      is_sample   population      time
    0       0           -1              1.00000000000000
    1       1           -1              0.20000000000000
    2       1           -1              0.00000000000000
    """)
    edges = six.StringIO("""\
    id      left            right           parent  child
    0       0.00000000      1.00000000      0       1
    1       0.00000000      1.00000000      0       2
    """)
    init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)
    init_map = {0: 1, 1: 2}

    def test_init(self):
        records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map)
        for input_id in self.init_map:
            node_id = self.init_map[input_id]
            self.assertEqual(records.tables.nodes.time[node_id],
                             self.init_ts.node(node_id).time)
            self.assertEqual(records.node_ids[input_id], node_id)
            self.assertEqual(records.tables.edges.num_rows,
                             self.init_ts.num_edges)

    def test_add_individual(self):
        records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map)
        records.add_individual(5, 2.0, population=2)
        self.assertEqual(records.tables.nodes.num_rows,
                         self.init_ts.num_nodes + 1)
        self.assertEqual(records.tables.nodes.num_rows, 4)
        self.assertEqual(records.tables.nodes.time[records.node_ids[5]], 2.0)
        self.assertEqual(records.tables.nodes.population[records.node_ids[5]],
                         2)
        self.assertRaises(ValueError, records.add_individual, 1, 1.5)

    def test_add_record(self):
        records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map)
        records.add_individual(4, 2.0, population=2)
        records.add_individual(5, 2.0, population=2)
        # adding edges should not change number of nodes
        self.assertEqual(records.tables.nodes.num_rows,
                         self.init_ts.num_nodes + 2)
        records.add_record(0.0, 0.5, 0, (4, 5))
        records.add_record(0.5, 1.0, 0, (4, ))
        self.assertEqual(records.tables.nodes.num_rows,
                         self.init_ts.num_nodes + 2)
        print(records)
        self.assertEqual(records.tables.edges.num_rows,
                         5)  # initial 2 + 3 added above
        self.assertEqual(records.tables.edges.parent[2], records.node_ids[0])
        self.assertEqual(records.tables.edges.child[2], records.node_ids[4])
        self.assertEqual(records.tables.edges.child[3], records.node_ids[5])
        self.assertEqual(records.tables.edges.child[4], records.node_ids[4])
        # try adding record with parent who doesn't exist
        self.assertRaises(ValueError, records.add_record, 0.0, 0.5, 8, (0, 1))

    def test_update_times(self):
        records_a = ftprime.ARGrecorder(ts=self.init_ts,
                                        node_ids=self.init_map)
        # check doing update_times along the way doesn't change things
        records_a.update_times()
        records_b = ftprime.ARGrecorder(ts=self.init_ts,
                                        node_ids=self.init_map)
        for r in (records_a, records_b):
            r.add_individual(4, 2.0, population=2)
            r.add_individual(5, 2.0, population=2)
            r.add_record(0.0, 0.5, 0, (4, 5))
            r.add_record(0.5, 1.0, 0, (4, ))
        records_a.update_times()
        records_b.update_times()
        self.assertArrayEqual(records_a.tables.nodes.time,
                              records_b.tables.nodes.time)
        # check update_times is idempotent
        records_b.update_times()
        self.assertArrayEqual(records_a.tables.nodes.time,
                              records_b.tables.nodes.time)
        # and check is right answer
        self.assertArrayEqual(records_a.tables.nodes.time, [3, 2.2, 2, 0, 0])

    def test_simplify(self):
        # test that we get the same tree sequence by doing tree_sequence
        # and simplify -> tree_sequence
        records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map)
        records.add_individual(4, 2.0, population=2)
        records.add_individual(5, 2.0, population=2)
        records.add_record(0.0, 0.5, 0, (4, 5))
        records.add_record(0.5, 1.0, 0, (4, ))
        print(records)
        tsa = records.tree_sequence([4, 5])
        print("---------------- sequence a -----------")
        print(tsa.dump_tables())
        records.simplify([4, 5])
        tsb = records.tree_sequence([4, 5])
        print("---------------- sequence b -----------")
        print(tsb.dump_tables())
        self.check_trees(tsa, tsb)

    def test_simplify2(self):
        # test that nonsensical sequence_length gets caught
        self.assertRaises(ValueError,
                          ftprime.ARGrecorder,
                          ts=self.init_ts,
                          node_ids=self.init_map,
                          sequence_length=0.5)
Пример #17
0
# coding: utf-8
import msprime

ts = msprime.load_text(edges=open('edges_full.txt'),
                       nodes=open('nodes_full.txt'))
full_node_map = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H',
                 8: 'I', 9: 'J', 10: 'K'}

print("Full trees")
for t in ts.trees():
    print(t.draw(format='unicode', node_label_text=full_node_map))


print("Simplifed trees with to J (9), K (10) with `ts.simplify()`)")
tss = ts.simplify(samples=[9, 10])
for t in tss.trees():
    print(t.draw(format='unicode'))


print("Simplifed trees from tables in fig 5C",
      "with J (0) and K(1) marked as samples.")
tss2 = msprime.load_text(edges=open('edges.txt'), nodes=open('nodes.txt'))
for t in tss2.trees():
    print(t.draw(format='unicode'))

print("Raw tables from `ts.simplify()`:")
print(tss.dump_tables())
print("\n\n\n...and from the trees loaded from text tables:")
print(tss2.dump_tables())
Пример #18
0
import msprime
from VisualizeTrees import *

nodes = open("toyNodeTable.txt","r")
edges = open("toyEdgeTable.txt","r")
ts = msprime.load_text(nodes=nodes,edges=edges)
img = VisualizeNodes(ts,rescaled_time=False)
for t in ts.trees():
    print(t.draw(format="unicode"))
img.show()
Пример #19
0
class ExplicitTestCase(FtprimeTestCase):
    """
    An explicit test case.

    With `(i,j,x)->k` denoting that individual `k` inherits from `i` on `[0,x)` and from `j` on `[x,1)`:

    1. Begin with an individual `a` (and another anonymous one) at `t=0`.
    2. `(a,?,1.0)->b` and `(a,?,1.0)->c` at `t=1`
    3. `(b,a,0.9)->d` and `(a,c,0.1)->e` and then `a` dies at `t=2`
    4. `(d,e,0.7)->f` at `t=3`
    5. `(f,d,0.8)->g` and `(e,f,0.2)->h` at `t=4`.
    6. `(b,g,0.6)->i` and `(g,h,0.5)->j` and `(c,h,0.4)->k` at `t=5`.
    7. We sample `i`, `j` and `k`.


    Here are the trees:
    ```
    t                  |              |              |             |             |             |             |             |             |            
                                                                                                                                                      
    0       --a--      |     --a--    |     --a--    |    --a--    |    --a--    |    --a--    |    --a--    |    --a--    |    --a--    |    --a--   
           /  |  \     |    /  |  \   |    /     \   |   /     \   |   /     \   |   /     \   |   /     \   |   /     \   |   /     \   |   /  |  \  
    1     b   |   c    |   b   |   c  |   b       c  |  b       c  |  b       c  |  b       c  |  b       c  |  b       c  |  b       c  |  b   |   c 
          |\ / \ /|    |   |\   \  |  |   |\     /|  |  |\     /|  |  |\     /   |  |\     /   |   \     /   |   \     /   |   \     /   |     /   /  
    2     | d   e |    |   | d   e |  |   | d   e |  |  | d   e |  |  | d   e    |  | d   e    |    d   e    |    d   e    |    d   e    |    d   e   
          | |\ /| |    |   |  \  | |  |   |  \  | |  |  |  \    |  |  |  \       |  |  \       |     \       |       /     |    |  /     |    |  /    
    3     | | f | |    |   |   f | |  |   |   f | |  |  |   f   |  |  |   f      |  |   f      |      f      |      f      |    | f      |    | f     
          | |/ \| |    |   |  /  | |  |   |  /  | |  |  |  / \  |  |  |  / \     |  |  / \     |     / \     |     / \     |    |  \     |    |  \    
    4     | g   h |    |   | g   h |  |   | g   h |  |  | g   h |  |  | g   h    |  | g   h    |    g   h    |    g   h    |    g   h    |    g   h   
          |/ \ / \|    |   |  \    |  |   |  \    |  |  |  \    |  |  |  \   \   |  |    / \   |   /   / \   |   /   / \   |   /   / \   |   /   / \  
    5     i   j   k    |   i   j   k  |   i   j   k  |  i   j   k  |  i   j   k  |  i   j   k  |  i   j   k  |  i   j   k  |  i   j   k  |  i   j   k 
                                                                                                                                                      
                       |   0.0 - 0.1  |   0.1 - 0.2  |  0.2 - 0.4  |  0.4 - 0.5  |  0.5 - 0.6  |  0.6 - 0.7  |  0.7 - 0.8  |  0.8 - 0.9  |  0.9 - 1.0 
    ```

    and a labeling of the lineages
    ```
    t                  |              |              |             |             |             |             |             |             |            
                                                                                                                                                      
    0       --a--      |     --a--    |     --a--    |    --a--    |    --a--    |    --a--    |    --a--    |    --a--    |    --a--    |    --a--   
           /  |  \     |    /  |  \   |    /     \   |   /     \   |   /     \   |   /     \   |   /     \   |   /     \   |   /     \   |   /  |  \  
    1     b   |   c    |   b   |   c  |   b       c  |  b       c  |  b       c  |  b       c  |  b       c  |  b       c  |  b       c  |  b   |   c 
          |\ / \ /|    |   |\   \  |  |   |\     /|  |  |\     /|  |  |\     /   |  |\     /   |   \     /   |   \     /   |   \     /   |     /   /  
    2     | d   e |    |   | d   e |  |   | d   e |  |  | d   e |  |  | d   e    |  | d   e    |    d   e    |    d   e    |    d   e    |    d   e   
          | |\ /| |    |   |  \  | |  |   |  \  | |  |  |  \    |  |  |  \       |  |  \       |     \       |       /     |    |  /     |    |  /    
    3     | | f | |    |   |   f | |  |   |   f | |  |  |   f   |  |  |   f      |  |   f      |      f      |      f      |    | f      |    | f     
          | |/ \| |    |   |  /  | |  |   |  /  | |  |  |  / \  |  |  |  / \     |  |  / \     |     / \     |     / \     |    |  \     |    |  \    
    4     | g   h |    |   | g   h |  |   | g   h |  |  | g   h |  |  | g   h    |  | g   h    |    g   h    |    g   h    |    g   h    |    g   h   
          |/ \ / \|    |   |  \    |  |   |  \    |  |  |  \    |  |  |  \   \   |  |    / \   |   /   / \   |   /   / \   |   /   / \   |   /   / \  
    5     i   j   k    |   i   j   k  |   i   j   k  |  i   j   k  |  i   j   k  |  i   j   k  |  i   j   k  |  i   j   k  |  i   j   k  |  i   j   k 
                                                                                                                                                      
                       |   0.0 - 0.1  |   0.1 - 0.2  |  0.2 - 0.4  |  0.4 - 0.5  |  0.5 - 0.6  |  0.6 - 0.7  |  0.7 - 0.8  |  0.8 - 0.9  |  0.9 - 1.0 
    ```
    """
    def f(self, arg, lparent, rparent, breakpoint, child, btime):
        arg.add_individual(self.ids[child], btime)
        if breakpoint > 0.0:
            arg.add_record(0.0, breakpoint, self.ids[lparent],
                           (self.ids[child], ))
        if breakpoint < 1.0:
            arg.add_record(breakpoint, 1.0, self.ids[rparent],
                           (self.ids[child], ))

    # the correct tree sequence, unsimplified
    nodes = six.StringIO("""\
    id      is_sample   population      time
    0       0           -1              5.00000000000000  # a
    1       0           -1              4.00000000000000  # b
    2       0           -1              4.00000000000000  # c
    3       0           -1              3.00000000000000  # d
    4       0           -1              3.00000000000000  # e
    5       0           -1              2.00000000000000  # f
    6       0           -1              1.00000000000000  # g
    7       0           -1              1.00000000000000  # h
    8       1           -1              0.00000000000000  # i
    9       1           -1              0.00000000000000  # j
    10      1           -1              0.00000000000000  # k
    """)
    edges = six.StringIO("""\
    id      left            right           parent  child
    0       0.40000000      0.50000000      7       10
    0       0.50000000      1.00000000      7       9
    0       0.50000000      1.00000000      7       10
    0       0.00000000      0.50000000      6       9
    0       0.60000000      1.00000000      6       8
    0       0.00000000      0.20000000      5       6
    0       0.20000000      0.80000000      5       6
    0       0.20000000      0.80000000      5       7
    0       0.80000000      1.00000000      5       7
    0       0.00000000      0.20000000      4       7
    0       0.70000000      1.00000000      4       5
    0       0.00000000      0.70000000      3       5
    0       0.80000000      1.00000000      3       6
    0       0.00000000      0.10000000      2       10
    0       0.10000000      0.40000000      2       4
    0       0.10000000      0.40000000      2       10
    0       0.40000000      1.00000000      2       4
    0       0.00000000      0.60000000      1       3
    0       0.00000000      0.60000000      1       8
    0       0.60000000      0.90000000      1       3
    0       0.00000000      0.10000000      0       1
    0       0.00000000      0.10000000      0       2
    0       0.00000000      0.10000000      0       4
    0       0.10000000      0.90000000      0       1
    0       0.10000000      0.90000000      0       2
    0       0.90000000      1.00000000      0       1
    0       0.90000000      1.00000000      0       2
    0       0.90000000      1.00000000      0       3
    """)
    true_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)
    true_tss = true_ts.simplify()

    ids = dict([(y, x) for x, y in enumerate(
        ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k'])])
    true_times = [0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 5]
    sample_ids = ('i', 'j', 'k')
    sample_input_ids = [8, 9, 10]

    def test_build_ts(self):
        # build initial tree sequence with just a, b, c
        nodes = six.StringIO("""\
        id      is_sample   population      time
        0       0           -1              1.00000000000000
        1       1           -1              0.00000000000000
        2       1           -1              0.00000000000000
        """)
        edges = six.StringIO("""\
        id      left            right           parent  child
        0       0.00000000      1.00000000      0       1
        1       0.00000000      1.00000000      0       2
        """)
        init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)

        first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]}
        arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0)
        # 1. Begin with an individual `a` (and another anonymous one) at `t=0`.
        # taken care of in init_ts
        # arg.add_individual(self.ids['a'], 0.0)
        # # 2. `(a,?,1.0)->b` and `(a,?,1.0)->c` at `t=1`
        # self.f(arg, 'a', 'z', 1.0, 'b', 1.0)
        # self.f(arg, 'a', 'z', 1.0, 'c', 1.0)
        # 3. `(b,a,0.9)->d` and `(a,c,0.1)->e` and then `a` dies at `t=2`
        self.f(arg, 'b', 'a', 0.9, 'd', 2.0)
        self.f(arg, 'a', 'c', 0.1, 'e', 2.0)
        # 4. `(d,e,0.7)->f` at `t=3`
        self.f(arg, 'd', 'e', 0.7, 'f', 3.0)
        # 5. `(f,d,0.8)->g` and `(e,f,0.2)->h` at `t=4`.
        self.f(arg, 'f', 'd', 0.8, 'g', 4.0)
        self.f(arg, 'e', 'f', 0.2, 'h', 4.0)
        # 6. `(b,g,0.6)->i` and `(g,h,0.5)->j` and `(c,h,0.4)->k` at `t=5`.
        self.f(arg, 'b', 'g', 0.6, 'i', 5.0)
        self.f(arg, 'g', 'h', 0.5, 'j', 5.0)
        self.f(arg, 'c', 'h', 0.4, 'k', 5.0)
        # 7. We sample `i`, `j` and `k`.
        arg.mark_samples(samples=self.sample_input_ids)
        arg.update_times()

        arg_ids = {k: arg.node_ids[self.ids[k]] for k in self.ids}
        self.assertEqual(arg.tables.nodes.num_rows, len(self.ids))
        self.assertEqual(arg.max_time, 5.0)
        for x in self.ids:
            self.assertEqual(arg.tables.nodes.time[arg_ids[x]],
                             5.0 - self.true_times[self.ids[x]])
            if x in self.sample_ids:
                self.assertEqual(arg.tables.nodes.flags[arg_ids[x]],
                                 msprime.NODE_IS_SAMPLE)
            else:
                self.assertEqual(arg.tables.nodes.flags[arg_ids[x]], 0)

        tss = arg.tree_sequence(self.sample_input_ids)

        self.check_trees(tss, self.true_tss)

    def test_node_times_stable(self):
        # build initial tree sequence with just a, b, c
        nodes = six.StringIO("""\
        id      is_sample   population      time
        0       0           -1              1.00000000000000
        1       1           -1              0.00000000000000
        2       1           -1              0.00000000000000
        """)
        edges = six.StringIO("""\
        id      left            right           parent  child
        0       0.00000000      1.00000000      0       1
        1       0.00000000      1.00000000      0       2
        """)
        init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)
        first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]}
        arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0)
        self.f(arg, 'b', 'a', 0.9, 'd', 2.0)
        self.f(arg, 'a', 'c', 0.1, 'e', 2.0)
        self.f(arg, 'd', 'e', 0.7, 'f', 3.0)
        self.f(arg, 'f', 'd', 0.8, 'g', 4.0)
        self.f(arg, 'e', 'f', 0.2, 'h', 4.0)
        self.f(arg, 'b', 'g', 0.6, 'i', 5.0)
        self.f(arg, 'g', 'h', 0.5, 'j', 5.0)
        self.f(arg, 'c', 'h', 0.4, 'k', 5.0)
        arg.update_times()
        node_times = {
            u: arg.tables.nodes.time[arg.node_ids[u]]
            for u in arg.node_ids
        }
        print(arg)
        arg.simplify(self.sample_input_ids)
        print(arg)
        new_node_times = {
            u: arg.tables.nodes.time[arg.node_ids[u]]
            for u in arg.node_ids
        }
        for u in self.sample_input_ids:
            self.assertEqual(node_times[u], new_node_times[u])

    @unittest.skip
    def test_intermediate_simplify(self):
        # build initial tree sequence with just a, b, c
        nodes = six.StringIO("""\
        id      is_sample   population      time
        0       0           -1              1.00000000000000
        1       1           -1              0.00000000000000
        2       1           -1              0.00000000000000
        """)
        edges = six.StringIO("""\
        id      left            right           parent  children
        0       0.00000000      1.00000000      0       1,2
        """)
        init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)

        first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]}
        arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0)
        self.f(arg, 'b', 'a', 0.9, 'd', 2.0)
        self.f(arg, 'a', 'c', 0.1, 'e', 2.0)
        self.f(arg, 'd', 'e', 0.7, 'f', 3.0)
        self.f(arg, 'f', 'd', 0.8, 'g', 4.0)
        # simplify
        print(arg)
        arg.simplify(samples=[self.ids[u] for u in ['b', 'c', 'e', 'f', 'g']])
        print(arg)
        self.f(arg, 'e', 'f', 0.2, 'h', 4.0)
        self.f(arg, 'b', 'g', 0.6, 'i', 5.0)
        self.f(arg, 'g', 'h', 0.5, 'j', 5.0)
        self.f(arg, 'c', 'h', 0.4, 'k', 5.0)
        print(arg)
        tss = arg.tree_sequence(self.sample_input_ids)
        self.check_trees(tss, self.true_tss)
Пример #20
0
    def test_case_1(self):
        # With mutations:
        #
        # 1.0          6
        # 0.7         / \                                    5
        #            /   X                                  / \
        # 0.5       X     4                4               /   4
        #          /     / \              / \             /   X X
        # 0.4     X     X   \            X   3           X   /   \
        #        /     /     X          /   / X         /   /     \
        # 0.0   0     1       2        1   0   2       0   1       2
        #          (0.0, 0.2),        (0.2, 0.8),       (0.8, 1.0)
        #
        true_diversity_01 = 2 * (1 * (0.2 - 0) + 0.5 * (0.8 - 0.2) + 0.7 *
                                 (1.0 - 0.8))
        true_diversity_02 = 2 * (1 * (0.2 - 0) + 0.4 * (0.8 - 0.2) + 0.7 *
                                 (1.0 - 0.8))
        true_diversity_12 = 2 * (0.5 * (0.2 - 0) + 0.5 * (0.8 - 0.2) + 0.5 *
                                 (1.0 - 0.8))
        nodes = six.StringIO("""\
        id      is_sample   time
        0       1           0
        1       1           0
        2       1           0
        3       0           0.4
        4       0           0.5
        5       0           0.7
        6       0           1.0
        """)
        edgesets = six.StringIO("""\
        left    right   parent  children
        0.2     0.8     3       0,2
        0.0     0.2     4       1,2
        0.2     0.8     4       1,3
        0.8     1.0     4       1,2
        0.8     1.0     5       0,4
        0.0     0.2     6       0,4
        """)
        sites = six.StringIO("""\
        id  position    ancestral_state
        0   0.05        0
        1   0.1         0
        2   0.11        0
        3   0.15        0
        4   0.151       0
        5   0.3         0
        6   0.6         0
        7   0.9         0
        8   0.95        0
        9   0.951       0
        """)
        mutations = six.StringIO("""\
        site    node    derived_state
        0       4       1
        1       0       1
        2       2       1
        3       0       1
        4       1       1
        5       1       1
        6       2       1
        7       0       1
        8       1       1
        9       2       1
        """)
        ts = msprime.load_text(nodes=nodes,
                               edgesets=edgesets,
                               sites=sites,
                               mutations=mutations)
        self.check_pairwise_diversity(ts)
        self.check_pairwise_diversity_mutations(ts)
        self.check_Y_stat(ts)
        self.check_vectorization(ts)

        # diversity between 0 and 1
        A = [[0], [1]]

        def f(x):
            return float((x[0] > 0) != (x[1] > 0))

        # branch lengths:
        self.assertAlmostEqual(branch_length_diversity(ts, [0], [1]),
                               true_diversity_01)
        self.assertAlmostEqual(ts.branch_stats(A, f), true_diversity_01)
        self.assertAlmostEqual(branch_stats_node_iter(ts, A, f),
                               true_diversity_01)

        # mean diversity between [0, 1] and [0, 2]:
        true_mean_diversity = (0 + true_diversity_02 + true_diversity_01 +
                               true_diversity_12) / 4
        A = [[0, 1], [0, 2]]
        n = [len(a) for a in A]

        def f(x):
            return float(x[0] * (n[1] - x[1]) + (n[0] - x[0]) * x[1]) / 4.0

        # branch lengths:
        self.assertAlmostEqual(branch_length_diversity(ts, A[0], A[1]),
                               true_mean_diversity)
        self.assertAlmostEqual(ts.branch_stats(A, f), true_mean_diversity)
        self.assertAlmostEqual(branch_stats_node_iter(ts, A, f),
                               true_mean_diversity)

        # Y-statistic for (0/12)
        A = [[0], [1, 2]]

        def f(x):
            return ((x[0] == 1) and (x[1] == 0)) or ((x[0] == 0) and
                                                     (x[1] == 2))

        # branch lengths:
        true_Y = 0.2 * (1 + 0.5) + 0.6 * (0.4) + 0.2 * (0.7 + 0.2)
        self.assertAlmostEqual(branch_length_Y(ts, 0, 1, 2), true_Y)
        self.assertAlmostEqual(ts.branch_stats(A, f), true_Y)
        self.assertAlmostEqual(branch_stats_node_iter(ts, A, f), true_Y)