def test_TreeIterator_iterate_mutations(self): tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)]) sites = np.array(self.pop.tables.sites, copy=False) nsites_visited = 0 for tree in tv: for m in tree.mutations(): self.assertTrue(sites['position'][m.site] >= tree.left) self.assertTrue(sites['position'][m.site] < tree.right) nsites_visited += 1 self.assertEqual(len(self.pop.tables.sites), nsites_visited) for i in np.arange(0., 1., 0.1): tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], False, i, i + 0.1) nsites_visited = 0 idx = np.where((sites['position'] >= i) & (sites['position'] < i + 0.1))[0] nsites_in_interval = len(idx) for tree in tv: for m in tree.mutations(): self.assertTrue(sites['position'][m.site] >= tree.left) self.assertTrue(sites['position'][m.site] < tree.right) nsites_visited += 1 self.assertEqual(nsites_visited, nsites_in_interval)
def test_TreeIterator(self): # The first test ensures that TreeIterator # simply holds a reference to the input tables, # rather than a (deep) copy, which would have # a different address tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], True, 0, 1) self.assertTrue(tv.tables is self.pop.tables) with self.assertRaises(ValueError): tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], True, 1, 0) with self.assertRaises(ValueError): tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], False, 1, 0) for i in np.arange(0., 1., 0.1): tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], True, i, i + 0.1) for ti in tv: a = ti.left < i + 0.1 b = i < ti.right self.assertTrue(a and b) tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], False, i, i + 0.1) for ti in tv: a = ti.left < i + 0.1 b = i < ti.right self.assertTrue(a and b)
def test_TreeIterator_iterate_sites(self): # TODO: need test of empty tree sequence # and tree sequence where mutations aren't # on every tree tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)]) nsites_visited = 0 for tree in tv: for s in tree.sites(): self.assertTrue(s.position >= tree.left) self.assertTrue(s.position < tree.right) nsites_visited += 1 self.assertEqual(len(self.pop.tables.sites), nsites_visited) site_table = np.array(self.pop.tables.sites, copy=False) for i in np.arange(0., 1., 0.1): tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], False, i, i + 0.1) nsites_visited = 0 idx = np.where((site_table['position'] >= i) & (site_table['position'] < i + 0.1))[0] nsites_in_interval = len(idx) for tree in tv: for s in tree.sites(): self.assertTrue(s.position >= tree.left) self.assertTrue(s.position < tree.right) nsites_visited += 1 self.assertEqual(nsites_visited, nsites_in_interval)
def test_TreeIterator(self): with self.assertRaises(ValueError): tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], True, 1, 0) with self.assertRaises(ValueError): tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], False, 1, 0) for i in np.arange(0., 1., 0.1): tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], True, i, i + 0.1) for ti in tv: a = ti.left < i + 0.1 b = i < ti.right self.assertTrue(a and b) tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)], False, i, i + 0.1) for ti in tv: a = ti.left < i + 0.1 b = i < ti.right self.assertTrue(a and b)
def test_simplify_to_sample(self): """ Simplify to a sample using fwdpy11 and tskit, then test that total time on output is the same from both sources and that the mutation tables contain the same positions after simplification. """ dumped_ts = self.pop.dump_tables_to_tskit() tt = 0.0 for i in self.pop.tables.nodes: tt += i.time samples = np.arange(0, 2 * self.pop.N, 50, dtype=np.int32) mspts = dumped_ts.simplify(samples=samples.tolist()) fp11ts, idmap = fwdpy11.simplify(self.pop, samples) for i in range(len(fp11ts.edges)): self.assertTrue(fp11ts.edges[i].parent < len(fp11ts.nodes)) self.assertTrue(fp11ts.edges[i].child < len(fp11ts.nodes)) for s in samples: self.assertEqual(fp11ts.nodes[idmap[s]].time, self.pop.generation) tt_fwd = 0.0 tv = fwdpy11.TreeIterator(fp11ts, [i for i in range(len(samples))]) for t in tv: tt_fwd += t.total_time(fp11ts.nodes) tt_tskit = 0.0 for t in mspts.trees(): tt_tskit += t.get_total_branch_length() self.assertEqual(tt_fwd, tt_tskit) self.assertEqual(len(fp11ts.mutations), len(mspts.tables.mutations)) fp11_pos = np.array( [self.pop.mutations[i.key].pos for i in fp11ts.mutations]) fp11_pos = np.sort(fp11_pos) msp_pos = np.sort(mspts.tables.sites.position) self.assertTrue(np.array_equal(fp11_pos, msp_pos))
def test_mutation_counts_with_indexing_suppressed_no_neutral_muts_in_genomes( self): """ A sim w/ and w/o putting neutral variants in genomes should give the same mutation counts. """ pop2 = copy.deepcopy(self.pop) rng = fwdpy11.GSLrng(101 * 45 * 110 * 210) # Use same seed!!! self.params.prune_selected = False params = copy.deepcopy(self.params) fwdpy11.evolvets(rng, pop2, params, 100, suppress_table_indexing=True) fwdpy11.evolvets(self.rng, self.pop, self.params, 100, suppress_table_indexing=True) ti = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)]) mc = _count_mutations_from_diploids(self.pop) for t in ti: for m in t.mutations(): # Have to skip neutral mutations b/c they won't # end up in mc b/c it is obtained from genomes if pop2.mutations[m.key].neutral is False: self.assertEqual(mc[m.key], self.pop.mcounts[m.key]) self.assertEqual(t.leaf_counts(m.node), pop2.mcounts[m.key])
def test_dump_to_tskit(self): # TODO: test leaf counts of mutations in msprmie # vs fwdpy11 and cross-references with self.pop.mcounts dumped_ts = self.pop.dump_tables_to_tskit() self.assertEqual(len(dumped_ts.tables.nodes), len(self.pop.tables.nodes)) self.assertEqual(len(dumped_ts.tables.edges), len(self.pop.tables.edges)) self.assertEqual(len(dumped_ts.tables.mutations), len(self.pop.tables.mutations)) eview = np.array(self.pop.tables.edges, copy=False) self.assertEqual(eview['parent'].sum(), dumped_ts.tables.edges.parent.sum()) self.assertEqual(eview['child'].sum(), dumped_ts.tables.edges.child.sum()) self.assertEqual(eview['left'].sum(), dumped_ts.tables.edges.left.sum()) self.assertEqual(eview['right'].sum(), dumped_ts.tables.edges.right.sum()) tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)]) tt_fwd = 0 for t in tv: tt_fwd += t.total_time(self.pop.tables.nodes) tt_tskit = 0 for t in dumped_ts.trees(): tt_tskit += t.get_total_branch_length() self.assertEqual(tt_fwd, tt_tskit)
def runsim(argtuple): seed = argtuple rng = fwdpy11.GSLrng(seed) pdict = { 'gvalue': fwdpy11.Multiplicative(2.), 'rates': (0., U / 2., R), # The U/2. is from their eqn. 2. 'nregions': [], 'sregions': [ fwdpy11.ConstantS(0, 1. / 3., 1, -0.02, 1.), fwdpy11.ConstantS(2. / 3., 1., 1, -0.02, 1.) ], 'recregions': [fwdpy11.Region(0, 1. / 3., 1), fwdpy11.Region(2. / 3., 1., 1)], 'demography': np.array([N] * 20 * N, dtype=np.uint32) } params = fwdpy11.ModelParams(**pdict) pop = fwdpy11.DiploidPopulation(N, GENOME_LENGTH) fwdpy11.evolvets(rng, pop, params, 100, suppress_table_indexing=True) rdips = np.random.choice(N, NSAM, replace=False) md = np.array(pop.diploid_metadata, copy=False) rdip_nodes = md['nodes'][rdips].flatten() nodes = np.array(pop.tables.nodes, copy=False) # Only visit trees spanning the # mutation-free segment of the genome tv = fwdpy11.TreeIterator(pop.tables, rdip_nodes, begin=1. / 3., end=2. / 3.) plist = np.zeros(len(nodes), dtype=np.int8) sum_pairwise_tmrca = 0 for t in tv: for i in range(len(rdip_nodes) - 1): u = rdip_nodes[i] while u != fwdpy11.NULL_NODE: plist[u] = 1 u = t.parent(u) for j in range(i + 1, len(rdip_nodes)): u = rdip_nodes[j] while u != fwdpy11.NULL_NODE: if plist[u] == 1: sum_pairwise_tmrca += 2 * \ (pop.generation-nodes['time'][u]) u = fwdpy11.NULL_NODE else: u = t.parent(u) plist.fill(0) return 2 * sum_pairwise_tmrca / (len(rdip_nodes) * (len(rdip_nodes) - 1))
def test_leaf_counts_vs_mcounts(self): tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)]) mv = np.array(self.pop.tables.mutations, copy=False) muts = self.pop.mutations_ndarray p = muts['pos'] for t in tv: l, r = t.left, t.right mt = [i for i in mv if p[i[1]] >= l and p[i[1]] < r] for i in mt: self.assertEqual(t.leaf_counts(i[0]), self.pop.mcounts[i[1]])
def process_pop(pop): data = [] nodes = np.array(pop.tables.nodes, copy=False) # Get the metadata for ancient samples amd = np.array(pop.ancient_sample_metadata, copy=False) # Get the ancient sample time points times = nodes['time'][amd['nodes'][:, 0]] utimes = np.unique(times) fpos = [] ftimes = [] origins = [] sites = np.array(pop.tables.sites, copy=False) muts = np.array(pop.tables.mutations, copy=False) for i, j in zip(pop.fixation_times, pop.fixations): if i > 10 * pop.N and j.g <= 10 * pop.N + 200: fpos.append(j.pos) ftimes.append((i, j.pos)) origins.append((j.g, j.pos)) ftimes = np.array([i[0] for i in sorted(ftimes, key=lambda x: x[1])]) origins = np.array([i[0] for i in sorted(origins, key=lambda x: x[1])]) fpos = np.array(sorted(fpos)) for ut in utimes: mdidx = np.where(times == ut)[0] samples = amd['nodes'][mdidx].flatten() tables, idmap = fwdpy11.simplify_tables(pop.tables, samples) sites = np.array(tables.sites, copy=False) muts = np.array(tables.mutations, copy=False) tv = fwdpy11.TreeIterator(tables, idmap[samples], update_samples=True) for t in tv: l = t.left r = t.right f = np.where((fpos >= l) & (fpos < r))[0] for i in f: idx = np.where(sites['position'] == fpos[i])[0] if len(idx) > 0: mut_idx = np.where(muts['site'] == idx)[0] assert len(mut_idx == 1), "Bad mutation table error" sbelow = t.samples_below(muts['node'][mut_idx]) individuals = np.unique(sbelow // 2) mg = amd['g'][mdidx[individuals]].mean() gbar = amd['g'][mdidx].mean() vg = amd['g'][mdidx].var() mw = amd['w'][mdidx[individuals]].mean() wbar = amd['w'][mdidx].mean() esize = pop.mutations[muts['key'][mut_idx[0]]].s data.append( Datum(ut, fpos[i], origins[i], ftimes[i], esize, len(sbelow), mw, mg, wbar, gbar, vg)) return data
def count_frequencies(pop, num_neutral): N = pop.N fixations = [] for i, j in zip(pop.fixation_times, pop.fixations): if i >= 10*pop.N and j.g <= 10*pop.N + 200: fixations.append((j.g, j.pos)) if num_neutral > 0: # Add neutral fixations, if there are any # NOTE: we have no idea about fixation times # for neutral mutations, as they were not simulated! for i, j in enumerate(pop.mcounts): if j == 2*pop.N and pop.mutations[i].neutral is True: # Neutral mutations have an origin time # randomly assigned based on branch lengths. # It is just uniform along the branch. g = pop.mutations[i].g if g >= 10*pop.N and g <= 10*pop.N + 200: fixations.append((g, pop.mutations[i].pos)) data = [] pop_data = [] # Iterate over all samaple nodes. # False means to exclude the "alive" individuals # at time pop.generation for t, n, metadata in pop.sample_timepoints(False): if t%N == 0: print(t) tables, idmap = fwdpy11.simplify_tables(pop.tables, n) muts_visited = 0 # we remap the input/output nodes to get the leaves at a given timepoint that carries a mutation # below in ws_samples remap_nodes = [] for i in range(len(metadata)): remap_nodes.append(idmap[metadata[i]['nodes']]) remap_nodes = np.ravel(remap_nodes) # Get the distribution of diploid fitnesses for this generation ws = metadata['w'] pop_data.append(PopFitness(t, ws.mean(), ws.var())) # NOTE: do not update samples lists below nodes. for tree in fwdpy11.TreeIterator(tables, idmap[n], update_samples=True): for m in tree.mutations(): assert m.key < len(pop.mutations) muts_visited += 1 mut_node = m.node # NOTE: infinite sites means # leaf counts are the frequencies dac = tree.leaf_counts(mut_node) pos = tables.sites[m.site].position assert pos == pop.mutations[m.key].pos assert pos >= tree.left and pos < tree.right origin = pop.mutations[m.key].g fixed = int((origin, pos) in fixations) # Get the ws for the samples carrying the mutation m_samples = tree.samples_below(m.node) # There are two nodes per individual, hence the //2 ws_samples = [metadata[np.where(remap_nodes == samp)[0][0]//2]['w'] for samp in m_samples] w_m = np.mean(ws_samples) # Edge case: we skip mutations that fixed prior # to any of these time points fixed_before = False if dac == 2*pop.N and fixed == 0: fixed_before = True if not fixed_before: data.append(AlleleFreq(t, pos, origin, dac, fixed, int(m.neutral), pop.mutations[m.key].label, pop.mutations[m.key].s, w_m)) assert muts_visited == len(tables.mutations) return data, pop_data
def testEvolve(self): # TODO: actually test something here :) fwdpy11.evolvets(self.rng, self.pop, self.params, 1, self.recorder) samples = [i for i in range(2*self.pop.N)] + \ self.pop.tables.preserved_nodes vi = fwdpy11.TreeIterator(self.pop.tables, samples)
def test_dump_to_tskit(self): import tskit dumped_ts = self.pop.dump_tables_to_tskit() self.assertEqual(len(dumped_ts.tables.nodes), len(self.pop.tables.nodes)) self.assertEqual(len(dumped_ts.tables.edges), len(self.pop.tables.edges)) self.assertEqual(len(dumped_ts.tables.mutations), len(self.pop.tables.mutations)) eview = np.array(self.pop.tables.edges, copy=False) self.assertEqual(eview['parent'].sum(), dumped_ts.tables.edges.parent.sum()) self.assertEqual(eview['child'].sum(), dumped_ts.tables.edges.child.sum()) self.assertEqual(eview['left'].sum(), dumped_ts.tables.edges.left.sum()) self.assertEqual(eview['right'].sum(), dumped_ts.tables.edges.right.sum()) tv = fwdpy11.TreeIterator(self.pop.tables, [i for i in range(2 * self.pop.N)]) tt_fwd = 0 for t in tv: tt_fwd += t.total_time(self.pop.tables.nodes) tt_tskit = 0 for t in dumped_ts.trees(): tt_tskit += t.get_total_branch_length() self.assertEqual(tt_fwd, tt_tskit) # Now, we make sure that the metadata can # be decoded md = tskit.unpack_bytes(dumped_ts.tables.individuals.metadata, dumped_ts.tables.individuals.metadata_offset) for i, j in zip(self.pop.diploid_metadata, md): d = eval(j) self.assertEqual(i.g, d['g']) self.assertEqual(i.w, d['w']) self.assertEqual(i.e, d['e']) self.assertEqual(i.label, d['label']) self.assertEqual(i.parents, d['parents']) self.assertEqual(i.sex, d['sex']) self.assertEqual(i.deme, d['deme']) self.assertEqual(i.geography, d['geography']) # Test that we can go backwards from node table to individuals samples = np.where( dumped_ts.tables.nodes.flags == tskit.NODE_IS_SAMPLE)[0] self.assertEqual(len(samples), 2 * self.pop.N) for i in samples[::2]: ind = i // 2 d = eval(md[ind]) fwdpy11_md = self.pop.diploid_metadata[ind] self.assertEqual(fwdpy11_md.g, d['g']) self.assertEqual(fwdpy11_md.w, d['w']) self.assertEqual(fwdpy11_md.e, d['e']) self.assertEqual(fwdpy11_md.label, d['label']) self.assertEqual(fwdpy11_md.parents, d['parents']) self.assertEqual(fwdpy11_md.sex, d['sex']) self.assertEqual(fwdpy11_md.deme, d['deme']) self.assertEqual(fwdpy11_md.geography, d['geography']) md = tskit.unpack_bytes(dumped_ts.tables.mutations.metadata, dumped_ts.tables.mutations.metadata_offset) for i, j, k in zip(self.pop.tables.mutations, dumped_ts.tables.mutations.site, md): d = eval(k) self.assertEqual(i.key, d['key']) site = dumped_ts.tables.sites[j] m = self.pop.mutations[d['key']] self.assertEqual(site.position, m.pos) self.assertEqual(d['s'], m.s) self.assertEqual(d['h'], m.h) self.assertTrue(np.array_equal(np.array(d['esizes']), m.esizes)) self.assertTrue(np.array_equal(np.array(d['heffects']), m.heffects)) self.assertEqual(d['label'], m.label) self.assertEqual(d['neutral'], m.neutral) self.assertEqual(mcounts_comparison(self.pop, dumped_ts), True)