def test_node_times_stable(self): # build initial tree sequence with just a, b, c nodes = six.StringIO("""\ id is_sample population time 0 0 -1 1.00000000000000 1 1 -1 0.00000000000000 2 1 -1 0.00000000000000 """) edges = six.StringIO("""\ id left right parent child 0 0.00000000 1.00000000 0 1 1 0.00000000 1.00000000 0 2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]} arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0) self.f(arg, 'b', 'a', 0.9, 'd', 2.0) self.f(arg, 'a', 'c', 0.1, 'e', 2.0) self.f(arg, 'd', 'e', 0.7, 'f', 3.0) self.f(arg, 'f', 'd', 0.8, 'g', 4.0) self.f(arg, 'e', 'f', 0.2, 'h', 4.0) self.f(arg, 'b', 'g', 0.6, 'i', 5.0) self.f(arg, 'g', 'h', 0.5, 'j', 5.0) self.f(arg, 'c', 'h', 0.4, 'k', 5.0) arg.update_times() node_times = {u: arg.nodes.time[arg.node_ids[u]] for u in arg.node_ids} print(arg) arg.simplify(self.sample_input_ids) print(arg) new_node_times = { u: arg.nodes.time[arg.node_ids[u]] for u in arg.node_ids } for u in self.sample_input_ids: self.assertEqual(node_times[u], new_node_times[u])
def test_intermediate_simplify(self): # build initial tree sequence with just a, b, c nodes = six.StringIO("""\ id is_sample population time 0 0 -1 1.00000000000000 1 1 -1 0.00000000000000 2 1 -1 0.00000000000000 """) edges = six.StringIO("""\ id left right parent children 0 0.00000000 1.00000000 0 1,2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]} arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0) self.f(arg, 'b', 'a', 0.9, 'd', 2.0) self.f(arg, 'a', 'c', 0.1, 'e', 2.0) self.f(arg, 'd', 'e', 0.7, 'f', 3.0) self.f(arg, 'f', 'd', 0.8, 'g', 4.0) # simplify print(arg) arg.simplify(samples=[self.ids[u] for u in ['b', 'c', 'e', 'f', 'g']]) print(arg) self.f(arg, 'e', 'f', 0.2, 'h', 4.0) self.f(arg, 'b', 'g', 0.6, 'i', 5.0) self.f(arg, 'g', 'h', 0.5, 'j', 5.0) self.f(arg, 'c', 'h', 0.4, 'k', 5.0) print(arg) tss = arg.tree_sequence(self.sample_input_ids) self.check_trees(tss, self.true_tss)
def test_add_individual(self): records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) records.add_individual(5, 2.0, population=2) self.assertEqual(records.nodes.num_rows, self.init_ts.num_nodes + 1) self.assertEqual(records.nodes.num_rows, 4) self.assertEqual(records.nodes.time[records.node_ids[5]], 2.0) self.assertEqual(records.nodes.population[records.node_ids[5]], 2) self.assertRaises(ValueError, records.add_individual, 1, 1.5)
def test_init(self): records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) for input_id in self.init_map: node_id = self.init_map[input_id] self.assertEqual(records.nodes.time[node_id], self.init_ts.node(node_id).time) self.assertEqual(records.node_ids[input_id], node_id) self.assertEqual(records.edges.num_rows, self.init_ts.num_edges)
def test_build_ts(self): # build initial tree sequence with just a, b, c nodes = six.StringIO("""\ id is_sample population time 0 0 -1 1.00000000000000 1 1 -1 0.00000000000000 2 1 -1 0.00000000000000 """) edges = six.StringIO("""\ id left right parent child 0 0.00000000 1.00000000 0 1 1 0.00000000 1.00000000 0 2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]} arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0) # 1. Begin with an individual `a` (and another anonymous one) at `t=0`. # taken care of in init_ts # arg.add_individual(self.ids['a'], 0.0) # # 2. `(a,?,1.0)->b` and `(a,?,1.0)->c` at `t=1` # self.f(arg, 'a', 'z', 1.0, 'b', 1.0) # self.f(arg, 'a', 'z', 1.0, 'c', 1.0) # 3. `(b,a,0.9)->d` and `(a,c,0.1)->e` and then `a` dies at `t=2` self.f(arg, 'b', 'a', 0.9, 'd', 2.0) self.f(arg, 'a', 'c', 0.1, 'e', 2.0) # 4. `(d,e,0.7)->f` at `t=3` self.f(arg, 'd', 'e', 0.7, 'f', 3.0) # 5. `(f,d,0.8)->g` and `(e,f,0.2)->h` at `t=4`. self.f(arg, 'f', 'd', 0.8, 'g', 4.0) self.f(arg, 'e', 'f', 0.2, 'h', 4.0) # 6. `(b,g,0.6)->i` and `(g,h,0.5)->j` and `(c,h,0.4)->k` at `t=5`. self.f(arg, 'b', 'g', 0.6, 'i', 5.0) self.f(arg, 'g', 'h', 0.5, 'j', 5.0) self.f(arg, 'c', 'h', 0.4, 'k', 5.0) # 7. We sample `i`, `j` and `k`. arg.mark_samples(samples=self.sample_input_ids) arg.update_times() arg_ids = {k: arg.node_ids[self.ids[k]] for k in self.ids} self.assertEqual(arg.tables.nodes.num_rows, len(self.ids)) self.assertEqual(arg.max_time, 5.0) for x in self.ids: self.assertEqual(arg.tables.nodes.time[arg_ids[x]], 5.0 - self.true_times[self.ids[x]]) if x in self.sample_ids: self.assertEqual(arg.tables.nodes.flags[arg_ids[x]], msprime.NODE_IS_SAMPLE) else: self.assertEqual(arg.tables.nodes.flags[arg_ids[x]], 0) tss = arg.tree_sequence(self.sample_input_ids) self.check_trees(tss, self.true_tss)
def test_update_times(self): records_a = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) # check doing update_times along the way doesn't change things records_a.update_times() records_b = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) for r in (records_a, records_b): r.add_individual(4, 2.0, population=2) r.add_individual(5, 2.0, population=2) r.add_record(0.0, 0.5, 0, (4, 5)) r.add_record(0.5, 1.0, 0, (4, )) records_a.update_times() records_b.update_times() self.assertArrayEqual(records_a.nodes.time, records_b.nodes.time) # check update_times is idempotent records_b.update_times() self.assertArrayEqual(records_a.nodes.time, records_b.nodes.time) # and check is right answer self.assertArrayEqual(records_a.nodes.time, [3, 2.2, 2, 0, 0])
def test_simplify(self): # test that we get the same tree sequence by doing tree_sequence # and simplify -> tree_sequence records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) records.add_individual(4, 2.0, population=2) records.add_individual(5, 2.0, population=2) records.add_record(0.0, 0.5, 0, (4, 5)) records.add_record(0.5, 1.0, 0, (4, )) print(records) tsa = records.tree_sequence([4, 5]) print("---------------- sequence a -----------") print(tsa.dump_tables()) records.simplify([4, 5]) tsb = records.tree_sequence([4, 5]) print("---------------- sequence b -----------") print(tsb.dump_tables()) self.check_trees(tsa, tsb)
def test_add_record(self): records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) records.add_individual(4, 2.0, population=2) records.add_individual(5, 2.0, population=2) # adding edges should not change number of nodes self.assertEqual(records.nodes.num_rows, self.init_ts.num_nodes + 2) records.add_record(0.0, 0.5, 0, (4, 5)) records.add_record(0.5, 1.0, 0, (4, )) self.assertEqual(records.nodes.num_rows, self.init_ts.num_nodes + 2) print(records) self.assertEqual(records.edges.num_rows, 5) # initial 2 + 3 added above self.assertEqual(records.edges.parent[2], records.node_ids[0]) self.assertEqual(records.edges.child[2], records.node_ids[4]) self.assertEqual(records.edges.child[3], records.node_ids[5]) self.assertEqual(records.edges.child[4], records.node_ids[4]) # try adding record with parent who doesn't exist self.assertRaises(ValueError, records.add_record, 0.0, 0.5, 8, (0, 1))
if any(i < min_child_id or i >= max_child_id for i in edges_gen['child']) is True: raise RuntimeError("Bad child") assert (float(gen) == nodes['generation'].max()) if __name__ == "__main__": popsize = int(sys.argv[1]) theta = float(sys.argv[2]) nsam = int(sys.argv[3]) # sample size to take and add mutations to seed = int(sys.argv[4]) np.random.seed(seed) tracker = MockAncestryTracker() args = ftprime.ARGrecorder(node_ids=enumerate(range(2 * popsize)), ts=msprime.simulate(2 * popsize)) samples = wf(popsize, tracker, 10 * popsize, args) args.simplify(samples=range(10 * popsize * 2 * popsize, (10 * popsize + 1) * 2 * popsize)) ts = args.tree_sequence() # for x in ts.dump_tables(): # print(x) MRCAS = [t.get_time(t.get_root()) for t in ts.trees()] print("ARGrecorder MRCAS:", MRCAS) # Check that our sample IDs are as expected: if __debug__: min_sample = 10 * popsize * 2 * popsize max_sample = 10 * popsize * 2 * popsize + 2 * popsize if any(i < min_sample or i >= max_sample for i in samples) is True:
def ind_to_time(k): return 1 + generations - math.floor((k - 1) / N) def i2c(k, p): # individual ID to chromsome # "1+" is for the universal common ancestor added below out = 1 + 2 * nsamples + ftprime.ind_to_chrom(k, ftprime.mapa_labels[p]) return out # Input is of this form: # offspringID parentID startingPloidy rec1 rec2 .... # ... coming in *pairs* args = ftprime.ARGrecorder() # Add the ancestor of everyone, labeled nsamples universal_ancestor = 2 * nsamples args.add_individual(name=universal_ancestor, time=float(1 + generations + ancestor_age)) # add initial generation first_gen = [i2c(k, p) for k in range(1, N + 1) for p in [0, 1]] first_gen.sort() args.add_record(0.0, length, universal_ancestor, tuple(first_gen)) for k in range(1, N + 1): for p in [0, 1]: args.add_individual(i2c(k, p), ind_to_time(k)) nlines = 0 log_lines = 10000