def test_gaps(self): # SMC simulations should never have adjacent edgesets with # a non-zero distance between them and the same parent. # First we do a simulation with the standard model to make sure # we have plausible parameter values. sample_size = 10 recombination_rate = 20 random_seed = 1 ts = msprime.simulate( sample_size=sample_size, recombination_rate=recombination_rate, random_seed=random_seed) edgesets = sorted(ts.edgesets(), key=lambda e: (e.parent, e.left)) num_found = 0 for j in range(1, len(edgesets)): r = edgesets[j - 1] s = edgesets[j] if r.right != s.left and r.parent == s.parent: num_found += 1 self.assertGreater(num_found, 10) # Make a reasonable threshold # Now do the same for SMC and SMC'. for model in ["smc", "smc_prime"]: ts = msprime.simulate( sample_size=sample_size, recombination_rate=recombination_rate, random_seed=random_seed, model=model) edgesets = sorted(ts.edgesets(), key=lambda e: (e.parent, e.left)) num_found = 0 for j in range(1, len(edgesets)): r = edgesets[j - 1] s = edgesets[j] if r.right != s.left and r.parent == s.parent: num_found += 1 self.assertEqual(num_found, 0)
def test_gaps(self): # SMC simulations should never have adjacent coalescence records with # a non-zero distance between them and the same time/node value. # First we do a simulation with the standard model to make sure # we have plausible parameter values. sample_size = 10 recombination_rate = 20 random_seed = 1 ts = msprime.simulate( sample_size=sample_size, recombination_rate=recombination_rate, random_seed=random_seed) records = list(ts.records()) num_found = 0 for j in range(1, len(records)): r = records[j - 1] s = records[j] if r.right != s.left and r.node == s.node: num_found += 1 self.assertGreater(num_found, 10) # Make a reasonable threshold # Now do the same for SMC and SMC'. for model in ["smc", "smc_prime"]: ts = msprime.simulate( sample_size=sample_size, recombination_rate=recombination_rate, random_seed=random_seed, model=model) records = list(ts.records()) num_found = 0 for j in range(1, len(records)): r = records[j - 1] s = records[j] if r.right != s.left and r.node == s.node: num_found += 1 self.assertEqual(num_found, 0)
def test_simple_cases(self): for n in range(2, 10): st = next(msprime.simulate(n).trees()) self.verify_sparse_tree(st) for n in [11, 13, 19, 101]: st = next(msprime.simulate(n).trees()) self.verify_sparse_tree(st)
def test_two_populations_no_migration_one_locus(self): seed = 1234 ts1 = msprime.simulate( population_configurations=[ msprime.PopulationConfiguration(10), msprime.PopulationConfiguration(10)], migration_matrix=np.zeros((2, 2)), __tmp_max_time=0.1, random_seed=seed) ts2 = msprime.simulate( population_configurations=[ msprime.PopulationConfiguration(), msprime.PopulationConfiguration()], migration_matrix=np.zeros((2, 2)), from_ts=ts1, demographic_events=[ msprime.MassMigration(100, 0, 1, 1.0)], random_seed=seed) tree = ts2.first() # We should have two children at the root, and every node below # should be in one population. root_children = tree.children(tree.root) self.assertEqual(len(root_children), 2) populations = {ts2.node(u).population: u for u in root_children} self.assertEqual(len(populations), 2) for pop in [0, 1]: for node in tree.nodes(populations[pop]): self.assertEqual(ts2.node(node).population, pop)
def test_two_populations_migration(self): n = 10 seed = 1234 ts1 = msprime.simulate( population_configurations=[ msprime.PopulationConfiguration(n), msprime.PopulationConfiguration(0)], migration_matrix=[[0, 1], [1, 0]], random_seed=seed) tables = msprime.TableCollection(1) tables.populations.add_row() tables.populations.add_row() for _ in range(n): tables.nodes.add_row( flags=msprime.NODE_IS_SAMPLE, time=0, population=0) ts2 = msprime.simulate( from_ts=tables.tree_sequence(), start_time=0, population_configurations=[ msprime.PopulationConfiguration(), msprime.PopulationConfiguration()], migration_matrix=[[0, 1], [1, 0]], random_seed=seed) tables1 = ts1.dump_tables() tables2 = ts2.dump_tables() tables1.provenances.clear() tables2.provenances.clear() self.assertEqual(tables1, tables2)
def test_single_locus_max_time(self): from_ts = msprime.simulate(20, __tmp_max_time=1, random_seed=5) self.assertGreater(max(tree.num_roots for tree in from_ts.trees()), 1) start_time = from_ts.tables.nodes.time.max() final_ts = msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2) self.verify_from_tables(from_ts, final_ts, start_time) self.verify_simulation_completed(final_ts)
def test_from_single_locus_decapitated(self): ts = msprime.simulate(10, random_seed=5) from_ts = tsutil.decapitate(ts, ts.num_edges // 2) start_time = from_ts.tables.nodes.time.max() final_ts = msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2) self.verify_from_tables(from_ts, final_ts, start_time) self.verify_simulation_completed(final_ts)
def test_provenance(self): ts = msprime.simulate(10) self.assertEqual(ts.num_provenances, 1) self.verify_provenance(ts.provenance(0)) # TODO check the form of the dictionary for ts in msprime.simulate(10, num_replicates=10): self.assertEqual(ts.num_provenances, 1) self.verify_provenance(ts.provenance(0))
def test_sequence_length(self): from_ts = msprime.simulate( 5, __tmp_max_time=0.1, random_seed=5, length=5) start_time = from_ts.tables.nodes.time.max() final_ts = msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2, length=5) self.verify_from_tables(from_ts, final_ts, start_time) self.verify_simulation_completed(final_ts)
def test_individuals(self): from_ts = msprime.simulate(25, random_seed=5, __tmp_max_time=0.5) self.assertTrue(any(tree.num_roots > 1 for tree in from_ts.trees())) from_ts = tsutil.insert_random_ploidy_individuals(from_ts, seed=2) start_time = from_ts.tables.nodes.time.max() final_ts = msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2) self.verify_from_tables(from_ts, final_ts, start_time) self.verify_simulation_completed(final_ts)
def test_sequence_length_mismatch(self): base_ts = self.get_example_base(length=5) for bad_length in [1, 4.99, 5.01, 100]: with self.assertRaises(ValueError): msprime.simulate(from_ts=base_ts, start_time=100, length=bad_length) recomb_map = msprime.RecombinationMap.uniform_map(bad_length, 1) with self.assertRaises(ValueError): msprime.simulate( from_ts=base_ts, start_time=100, recombination_map=recomb_map)
def test_decapitated_mutations(self): ts = msprime.simulate(10, random_seed=5, mutation_rate=10) from_ts = tsutil.decapitate(ts, ts.num_edges // 2) self.assertGreater(from_ts.num_mutations, 0) start_time = from_ts.tables.nodes.time.max() final_ts = msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2) self.verify_from_tables(from_ts, final_ts, start_time) self.verify_simulation_completed(final_ts)
def test_from_multi_locus_old_recombination(self): ts = msprime.simulate(10, recombination_rate=2, random_seed=5) self.assertGreater(ts.num_trees, 1) from_ts = tsutil.decapitate(ts, ts.num_edges // 2) start_time = from_ts.tables.nodes.time.max() final_ts = msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2, recombination_rate=2) self.verify_from_tables(from_ts, final_ts, start_time) self.verify_simulation_completed(final_ts)
def test_models(self): # Exponential growth of 0 and constant model should be identical. for n in [2, 10, 100]: m1 = msprime.PopulationConfiguration(n, growth_rate=0) m2 = msprime.PopulationConfiguration(n, initial_size=1.0) st1 = next(msprime.simulate( random_seed=1, population_configurations=[m1]).trees()) st2 = next(msprime.simulate( random_seed=1, population_configurations=[m2]).trees()) self.assertEqual(st1.parent_dict, st2.parent_dict)
def test_population_mismatch(self): for N in range(1, 5): base_ts = self.get_example_base(num_populations=N) start_time = max(node.time for node in base_ts.nodes()) for k in range(1, N): if k != N: with self.assertRaises(ValueError): msprime.simulate( from_ts=base_ts, start_time=start_time, population_configurations=[ msprime.PopulationConfiguration() for _ in range(k)])
def test_zero_recombination_rate(self): from_ts = msprime.simulate( sample_size=4, __tmp_max_time=1, random_seed=5, recombination_rate=1) self.assertGreater(max(tree.num_roots for tree in from_ts.trees()), 1) start_time = from_ts.tables.nodes.time.max() with self.assertRaises(_msprime.InputError): # Raises: # The specified recombination map is too coarse to translate... msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2, recombination_rate=0)
def test_simple_cases(self): m = 1 r = 0.1 for n in range(2, 10): self.verify_sparse_trees(msprime.simulate(n, m, r)) n = 4 for m in range(1, 10): self.verify_sparse_trees(msprime.simulate(n, m, r)) m = 100 for r in [0.001, 0.01]: self.verify_sparse_trees(msprime.simulate(n, m, r))
def test_small_num_loci(self): for m in [1, 10, 16, 100]: recombination_map = msprime.RecombinationMap.uniform_map(10, 1, num_loci=m) from_ts = msprime.simulate( sample_size=4, __tmp_max_time=1, random_seed=5, recombination_map=recombination_map) self.assertGreater(max(tree.num_roots for tree in from_ts.trees()), 1) start_time = from_ts.tables.nodes.time.max() final_ts = msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2, recombination_map=recombination_map) self.verify_from_tables(from_ts, final_ts, start_time) self.verify_simulation_completed(final_ts)
def test_fine_to_coarse_map(self): from_ts = msprime.simulate( sample_size=4, __tmp_max_time=1, random_seed=5, recombination_rate=1) self.assertGreater(max(tree.num_roots for tree in from_ts.trees()), 1) self.assertGreater(from_ts.num_edges, 2) start_time = from_ts.tables.nodes.time.max() final_ts = msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2, recombination_map=msprime.RecombinationMap.uniform_map(1, 1, num_loci=10)) max_roots = max(tree.num_roots for tree in final_ts.trees()) # We don't correctly finish the tree sequence when we get mapping problems. # This must be documented as a "known issue". self.assertGreater(max_roots, 1)
def test_nonuniform_recombination_map(self): positions = [0, 0.25, 0.5, 0.75, 1] rates = [1, 2, 1, 3, 0] num_loci = 100 recomb_map = msprime.RecombinationMap(positions, rates, num_loci) from_ts = msprime.simulate( 5, __tmp_max_time=0.1, random_seed=5, recombination_map=recomb_map) start_time = from_ts.tables.nodes.time.max() final_ts = msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2, recombination_map=recomb_map) self.verify_from_tables(from_ts, final_ts, start_time) self.verify_simulation_completed(final_ts)
def test_from_subclass(self): from_ts = msprime.simulate(20, __tmp_max_time=1, random_seed=5) class MockTreeSequence(msprime.TreeSequence): pass subclass_instance = MockTreeSequence(from_ts.ll_tree_sequence) self.assertTrue(type(subclass_instance), MockTreeSequence) self.assertIsInstance(subclass_instance, msprime.TreeSequence) self.assertGreater(max(tree.num_roots for tree in subclass_instance.trees()), 1) start_time = subclass_instance.tables.nodes.time.max() final_ts = msprime.simulate( from_ts=subclass_instance, start_time=start_time, random_seed=2) self.verify_from_tables(subclass_instance, final_ts, start_time) self.verify_simulation_completed(final_ts)
def test_single_population_id_null(self): base_ts = self.get_example_base() tables = base_ts.dump_tables() nodes = tables.nodes for j in range(base_ts.num_nodes): population = np.zeros_like(nodes.population) population[j] = -1 nodes.set_columns( flags=nodes.flags, population=population, time=nodes.time) with self.assertRaises(_msprime.InputError): msprime.simulate( from_ts=tables.tree_sequence(), start_time=nodes.time.max())
def test_low_recombination_rate_interval(self): from_ts = msprime.simulate( sample_size=4, __tmp_max_time=1, random_seed=16, recombination_rate=1, length=10) self.assertGreater(max(tree.num_roots for tree in from_ts.trees()), 1) start_time = from_ts.tables.nodes.time.max() recombination_map = msprime.RecombinationMap( positions=[0, 3, 7, 10], rates=[1, 1e-6, 1, 0], num_loci=100) with self.assertRaises(_msprime.InputError): # Raises: # The specified recombination map is too coarse to translate... msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2, recombination_map=recombination_map)
def main(): before = time.clock() # Run the actual simulations tree_sequence = msprime.simulate( sample_size=10 ** 5, num_loci=100 * 10 ** 6, scaled_recombination_rate=0.001, scaled_mutation_rate=0.001, max_memory="5G", random_seed=1, # Arbitrary - make this reproducible. ) duration = time.clock() - before print("Simulated 100k genomes in {0:.3f} seconds.".format(duration)) # Write the results to file, which is small and can be quickly reloaded # to avoid the cost of re-running the simulation. We can reload the # file in a few seconds using msprime.load(filename). tree_sequence.dump("large-example.hdf5") # Now write the haplotypes to a file. # WARNING! This takes a lot of memory (>100G), so make sure you don't # crash your server. This memory requirement will be drastically reduced # in future versions. before = time.clock() with open("large-example-haplotypes.txt", "w") as f: for h in tree_sequence.haplotypes(): print(h, file=f) duration = time.clock() - before print("Wrote 100k haplotypes to file in {0:.3f} seconds".format(duration))
def leaf_count_example(): # This is an implementation of the algorithm to count the number # of leaves under each node. n = 20 ts = msprime.simulate(n, 10000, scaled_recombination_rate=0.1, random_seed=1) pi = [0 for j in range(ts.get_num_nodes() + 1)] nu = [0 for j in range(ts.get_num_nodes() + 1)] for j in range(1, n + 1): nu[j] = 1 for l, records_out, records_in in ts.diffs(): for node, children, time in records_out: # print("out:", node, children) for c in children: pi[c] = 0 leaves_lost = nu[node] u = node while u != 0: nu[u] -= leaves_lost # print("Setting nu[", u, "] = ", nu[u]) u = pi[u] for node, children, time in records_in: # print("in:", node, children) num_leaves = 0 for c in children: pi[c] = node num_leaves += nu[c] u = node while u != 0: nu[u] += num_leaves # print("setting nu[", u, "] = ", nu[u]) u = pi[u] nup = count_leaves(pi, n) assert nup == nu
def variants_example(): tree_sequence = msprime.simulate( sample_size=20, Ne=1e4, length=5e3, recombination_rate=2e-8, mutation_rate=2e-8, random_seed=10) print("Simulated ", tree_sequence.get_num_mutations(), "mutations") for variant in tree_sequence.variants(): print(variant.index, variant.position, variant.genotypes, sep="\t")
def test_individuals(self): n = 10 ts = msprime.simulate(n, mutation_rate=1, random_seed=2) tables = ts.dump_tables() for j in range(n): tables.individuals.add_row(flags=j, location=(j, j), metadata=b"x" * j) self.verify(tables)
def variable_recomb_example(): infile = "../hapmap/genetic_map_GRCh37_chr22.txt" # Read in the recombination map using the read_hapmap method, recomb_map = msprime.RecombinationMap.read_hapmap(infile) # Now we get the positions and rates from the recombination # map and plot these using 500 bins. positions = np.array(recomb_map.get_positions()[1:]) rates = np.array(recomb_map.get_rates()[1:]) num_bins = 500 v, bin_edges, _ = scipy.stats.binned_statistic( positions, rates, bins=num_bins) x = bin_edges[:-1][np.logical_not(np.isnan(v))] y = v[np.logical_not(np.isnan(v))] fig, ax1 = pyplot.subplots(figsize=(16, 6)) ax1.plot(x, y, color="blue") ax1.set_ylabel("Recombination rate") ax1.set_xlabel("Chromosome position") # Now we run the simulation for this map. We assume Ne=10^4 # and have a sample of 100 individuals tree_sequence = msprime.simulate( sample_size=100, Ne=10**4, recombination_map=recomb_map) # Now plot the density of breakpoints along the chromosome breakpoints = np.array(list(tree_sequence.breakpoints())) ax2 = ax1.twinx() v, bin_edges = np.histogram(breakpoints, num_bins, density=True) ax2.plot(bin_edges[:-1], v, color="green") ax2.set_ylabel("Breakpoint density") ax2.set_xlim(1.5e7, 5.3e7) fig.savefig("_static/hapmap_chr22.svg")
def verify(): """ Checks that simplify() does the right thing, by comparing to the implementation in msprime. """ for n in [10, 100, 1000]: ts = msprime.simulate(n, recombination_rate=1, random_seed=1) nodes = ts.tables.nodes edges = ts.tables.edges print("simulated for ", n) for N in range(2, 10): sample = list(range(N)) ts1 = simplify(sample, nodes, edges, ts.sequence_length) ts2 = ts.simplify(sample) n1 = ts1.tables.nodes n2 = ts2.tables.nodes assert np.array_equal(n1.time, n2.time) assert np.array_equal(n1.flags, n2.flags) e1 = ts1.tables.edges e2 = ts2.tables.edges assert np.array_equal(e1.left, e2.left) assert np.array_equal(e1.right, e2.right) assert np.array_equal(e1.parent, e1.parent) assert np.array_equal(e1.child, e1.child)
def test_interleaved_migrations(self): t1 = 1.5 t2 = 10.5 t3 = 50.5 ts = msprime.simulate( Ne=1/4, samples=[ msprime.Sample(0, 0), msprime.Sample(1, t1), msprime.Sample(2, t2), msprime.Sample(3, t3)], population_configurations=[ msprime.PopulationConfiguration(), msprime.PopulationConfiguration(), msprime.PopulationConfiguration(), msprime.PopulationConfiguration()], demographic_events=[ msprime.MassMigration(time=t1, source=0, destination=1), msprime.MassMigration(time=t2, source=1, destination=2), msprime.MassMigration(time=t3, source=2, destination=3)]) t = next(ts.trees()) self.assertEqual(t.get_time(0), 0) self.assertEqual(t.get_time(1), t1) self.assertEqual(t.get_time(2), t2) self.assertEqual(t.get_time(3), t3) self.assertEqual(t.get_population(0), 0) self.assertEqual(t.get_population(1), 1) self.assertEqual(t.get_population(2), 2) self.assertEqual(t.get_population(3), 3) self.assertEqual(t.get_population(4), 1) self.assertEqual(t.get_population(5), 2) self.assertEqual(t.get_population(6), 3) self.assertTrue(t1 < t.get_time(4) < t2) self.assertTrue(t2 < t.get_time(5) < t3) self.assertGreater(t.get_time(6), t3)
def test_population_mismatch_no_population_configs(self): for N in range(2, 5): base_ts = self.get_example_base(num_populations=N) start_time = max(node.time for node in base_ts.nodes()) with self.assertRaises(ValueError): msprime.simulate(from_ts=base_ts, start_time=start_time)
def test_no_mutations_with_start_time(self): with self.assertRaises(ValueError): msprime.simulate(10, mutation_rate=10, start_time=3) # But fine if we set start_time = None ts = msprime.simulate(10, mutation_rate=10, start_time=None, random_seed=1) self.assertGreater(ts.num_sites, 0)
def test_single_tree(self): ts = msprime.simulate(6, random_seed=1) S = [range(3), range(3, 6)] self.verify(ts, S)
def test_jukes_cantor_n20(self): ts = msprime.simulate(20, random_seed=2) ts = tsutil.jukes_cantor(ts, 5, 1, seed=2) self.verify_jukes_cantor(ts)
def get_tree_sequence(self): ts = msprime.simulate( 10, length=10, recombination_rate=1, mutation_rate=10, random_seed=3) self.assertGreater(ts.get_num_mutations(), 10) return ts
source=0, destination=36, proportion=1) ], #t 45k: Migrate lineages 36 > 37: HG and basal Eurasians merge [ msprime.MassMigration(time=45000, source=36, destination=37, proportion=1) ] ] demog = [item for sublist in demog_list for item in sublist] demog = [item for sublist in demog_list for item in sublist] ts = msprime.simulate(Ne=10000, population_configurations=population_configurations, migration_matrix=mig_mat, mutation_rate=args.mu, recombination_rate=args.rho, length=args.length, demographic_events=demog) print("writing genotype to vcf file") with open(args.outpre + "_chr" + args.chr + ".vcf", "w") as vcf_file: ts.write_vcf(vcf_file, ploidy=2, contig_id=args.chr)
def out_of_africa(nbp,seed,samsize): ''' Gutenkunst et al. out-of-Africa model :return: ''' # First we set out the maximum likelihood values of the various parameters # given in Table 1. N_A = 7300 N_B = 2100 N_AF = 12300 N_EU0 = 1000 N_AS0 = 510 # Times are provided in years, so we convert into generations. generation_time = 25 T_AF = 220e3 / generation_time T_B = 140e3 / generation_time T_EU_AS = 21.2e3 / generation_time # We need to work out the starting (diploid) population sizes based on # the growth rates provided for these two populations r_EU = 0.004 r_AS = 0.0055 N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS) N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS) # Migration rates during the various epochs. m_AF_B = 25e-5 m_AF_EU = 3e-5 m_AF_AS = 1.9e-5 m_EU_AS = 9.6e-5 # Population IDs correspond to their indexes in the population # configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB # initially. population_configurations = [ msprime.PopulationConfiguration( sample_size=0, initial_size=N_AF), msprime.PopulationConfiguration( #sample_size=1, initial_size=N_EU, growth_rate=r_EU), sample_size = 0, initial_size = N_EU, growth_rate = r_EU), msprime.PopulationConfiguration( #sample_size=1, initial_size=N_AS, growth_rate=r_AS) sample_size = samsize, initial_size = N_AS, growth_rate = r_AS) ] migration_matrix = [ [ 0, m_AF_EU, m_AF_AS], [m_AF_EU, 0, m_EU_AS], [m_AF_AS, m_EU_AS, 0], ] demographic_events = [ # CEU and CHB merge into B with rate changes at T_EU_AS msprime.MassMigration( time=T_EU_AS, source=2, destination=1, proportion=1.0), msprime.MigrationRateChange(time=T_EU_AS, rate=0), msprime.MigrationRateChange( time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange( time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)), msprime.PopulationParametersChange( time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1), # Population B merges into YRI at T_B msprime.MassMigration( time=T_B, source=1, destination=0, proportion=1.0), # Size changes to N_A at T_AF msprime.PopulationParametersChange( time=T_AF, initial_size=N_A, population_id=0) ] # Use the demography debugger to print out the demographic history # that we have just described. # dd = msprime.DemographyDebugger( # population_configurations=population_configurations, # migration_matrix=migration_matrix, # demographic_events=demographic_events) # dd.print_history() scale = 100 rho = 1e-8 / scale mu = 1.25e-8 / scale treeseq = msprime.simulate(population_configurations=population_configurations, migration_matrix=migration_matrix, \ demographic_events=demographic_events, length=nbp, recombination_rate=rho, \ mutation_rate=mu, random_seed=seed) with sys.stdout as vcffile: treeseq.write_vcf(vcffile, 2)
def test_many_trees_all_nodes(self): ts = msprime.simulate(6, length=4, recombination_rate=2, random_seed=1) S = [np.arange(ts.num_nodes, dtype=np.int32)] self.verify(ts, S, np.arange(ts.num_nodes, dtype=np.int32))
def test_many_trees_all_sample_sets(self): ts = msprime.simulate(6, recombination_rate=2, random_seed=1) self.assertGreater(ts.num_trees, 2) for S in set_partitions(list(range(ts.num_samples))): self.verify(ts, S)
def test_single_tree_all_sample_sets(self): ts = msprime.simulate(6, random_seed=1) for S in set_partitions(list(range(ts.num_samples))): self.verify(ts, S)
def test_single_tree_partial_samples(self): ts = msprime.simulate(6, random_seed=1) S = [range(3), range(3, 4)] self.verify(ts, S)
def test_single_tree_all_nodes(self): ts = msprime.simulate(10, random_seed=1) S = [np.arange(ts.num_nodes, dtype=np.int32)] self.verify(ts, S, np.arange(ts.num_nodes, dtype=np.int32))
def f(n, m, r): return msprime.simulate( sample_size=n, length=m, recombination_rate=r)
def test_simple_recombination(self): ts = msprime.simulate(10, recombination_rate=0.1, random_seed=1, end_time=0.5) self.verify(ts)
def run(self, ngens): L = 1 if self.num_loci is not None: L = self.num_loci tables = tskit.TableCollection(sequence_length=L) tables.populations.add_row() if self.deep_history: # initial population init_ts = msprime.simulate(self.N, recombination_rate=1.0, length=L, random_seed=self.seed) init_tables = init_ts.dump_tables() flags = init_tables.nodes.flags if not self.initial_generation_samples: flags = np.zeros_like(init_tables.nodes.flags) tables.nodes.set_columns(time=init_tables.nodes.time + ngens, flags=flags) tables.edges.set_columns( left=init_tables.edges.left, right=init_tables.edges.right, parent=init_tables.edges.parent, child=init_tables.edges.child, ) else: flags = 0 if self.initial_generation_samples: flags = tskit.NODE_IS_SAMPLE for _ in range(self.N): tables.nodes.add_row(flags=flags, time=ngens, population=0) pop = list(range(self.N)) for t in range(ngens - 1, -1, -1): if self.debug: print("t:", t) print("pop:", pop) dead = [self.rng.random() > self.survival for k in pop] # sample these first so that all parents are from the previous gen new_parents = [(self.rng.choice(pop), self.rng.choice(pop)) for k in range(sum(dead))] k = 0 if self.debug: print("Replacing", sum(dead), "individuals.") for j in range(self.N): if dead[j]: # this is: offspring ID, lparent, rparent, breakpoint offspring = len(tables.nodes) tables.nodes.add_row(time=t, population=0) lparent, rparent = new_parents[k] k += 1 bp = self.random_breakpoint() if self.debug: print("--->", offspring, lparent, rparent, bp) pop[j] = offspring if bp > 0.0: tables.edges.add_row(left=0.0, right=bp, parent=lparent, child=offspring) if bp < L: tables.edges.add_row(left=bp, right=L, parent=rparent, child=offspring) if self.debug: print("Done! Final pop:") print(pop) flags = tables.nodes.flags flags[pop] = tskit.NODE_IS_SAMPLE tables.nodes.set_columns(flags=flags, time=tables.nodes.time, population=tables.nodes.population) return tables
def test_no_recombination(self): ts = msprime.simulate(10, random_seed=2, end_time=0.5) self.verify(ts)
def test_many_trees_infinite_sites(self): ts = msprime.simulate(6, recombination_rate=2, mutation_rate=2, random_seed=1) self.assertGreater(ts.num_sites, 0) self.assertGreater(ts.num_trees, 2) self.verify(ts)
def test_no_recombination_time_zero(self): ts = msprime.simulate(10, random_seed=3, end_time=0.0) self.verify(ts)
def test_no_mutations(self): ts = msprime.simulate(10) self.assertEqual(ts.get_num_mutations(), 0) variants = list(ts.variants()) self.assertEqual(len(variants), 0)
def test_single_tree_simulated_mutations(self): ts = msprime.simulate(20, mutation_rate=10, random_seed=15) ts = tsutil.subsample_sites(ts, self.num_test_sites) self.verify_matrix(ts) self.verify_max_distance(ts)
def test_fails_deletion_mutations(self): ts = msprime.simulate(10, random_seed=2) tables = ts.tables tables.sites.add_row(0, "") tsp = tables.tree_sequence() self.assertRaises(TypeError, list, tsp.haplotypes())
def test_single_tree_regular_mutations(self): ts = msprime.simulate(self.num_test_sites, length=self.num_test_sites) ts = tsutil.insert_branch_mutations(ts) # We don't support back mutations, so this should fail. self.assertRaises(_tskit.LibraryError, self.verify_matrix, ts) self.assertRaises(_tskit.LibraryError, self.verify_max_distance, ts)
def test_bug_instance2(self): ts = msprime.simulate(10, recombination_rate=10, end_time=1.0, random_seed=61) self.verify(ts)
def test_mutation_generator_unsupported(self): n = 10 mutgen = msprime.MutationGenerator(msprime.RandomGenerator(1), 1) with self.assertRaises(ValueError): msprime.simulate(n, mutation_generator=mutgen)
def test_no_sites(self): ts = msprime.simulate(12, random_seed=3) self.assertEqual(ts.num_sites, 0) self.verify(ts, 3, random_seed=7)
def test_start_time_less_than_zero(self): base_ts = self.get_example_base() with self.assertRaises(ValueError): msprime.simulate(from_ts=base_ts, start_time=-1)
def test_large_recombination(self): ts = msprime.simulate(15, recombination_rate=1.0, random_seed=2, end_time=0.25) self.verify(ts)
def test_start_time_less_than_base_nodes(self): base_ts = self.get_example_base() max_time = max(node.time for node in base_ts.nodes()) for x in [0, max_time - 1, max_time - 1e-6]: with self.assertRaises(_msprime.InputError): msprime.simulate(from_ts=base_ts, start_time=x)
def test_single_tree_internal_reference_sets(self): ts = msprime.simulate(10, random_seed=1) tree = ts.first() S = [[u] for u in tree.children(tree.root)] self.verify(ts, S, ts.samples())
def test_numpy_random_seed(self): seed = np.array([12345], dtype=np.int64)[0] self.assertEqual(seed.dtype, np.int64) ts1 = msprime.simulate(10, random_seed=seed) ts2 = msprime.simulate(10, random_seed=seed) self.assertEqual(ts1.tables.nodes, ts2.tables.nodes)