def test_keep_metadata(self): ts = msprime.simulate(12, random_seed=3) ts = msprime.mutate(ts, rate=1, random_seed=1) self.assertGreater(ts.num_sites, 2) # Set metadata on this ts so that we can be sure we keep the original # mutations. ts = tsutil.add_random_metadata(ts) other = msprime.mutate(ts, rate=1, random_seed=1, keep=True) self.verify_sites(ts, other)
def test_identical_seed_alphabets(self): ts = msprime.simulate(10, random_seed=2) binary = msprime.mutate(ts, rate=1, random_seed=2) nucleotides = msprime.mutate( ts, rate=1, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)) self.assertGreater(binary.num_sites, 0) self.assertGreater(binary.num_mutations, 0) self.assertEqual(binary.num_sites, nucleotides.num_sites) self.assertEqual(binary.num_mutations, nucleotides.num_mutations) for s1, s2 in zip(binary.sites(), nucleotides.sites()): self.assertEqual(s1.position, s2.position) self.assertEqual(s1.mutations[0].node, s2.mutations[0].node)
def verify(self, ts, rate, random_seed): no_keep = msprime.mutate(ts, rate=rate, random_seed=random_seed) self.assertGreater(no_keep.num_sites, 0) keep = msprime.mutate(ts, rate=rate, random_seed=random_seed, keep=True) # Can assume there's no collisions here, very unlikely. self.assertEqual(ts.num_sites + no_keep.num_sites, keep.num_sites) # Mutations are all infinite sites, so must be equal self.assertEqual(ts.num_mutations + no_keep.num_mutations, keep.num_mutations) old = {site.position for site in ts.sites()} new = {site.position for site in no_keep.sites()} both = {site.position for site in keep.sites()} self.assertEqual(old | new, both) self.verify_sites(ts, keep)
def test_same_seeds(self): ts = msprime.simulate(12, random_seed=3) self.assertEqual(ts.num_sites, 0) ts = msprime.mutate(ts, rate=1, random_seed=1) updated = msprime.mutate(ts, rate=1, random_seed=1, keep=True) # We should rejection sample away all the sites that we have already # generated so we just get another random sample. sites = set() for site in updated.sites(): self.assertNotIn(site.position, sites) sites.add(site.position) for site in ts.sites(): self.assertIn(site.position, sites)
def main(): index = sys.argv[1] directory = '/exports/csce/eddie/biology/groups/lohselab/sims/output/msprime/current_winner/' #directory = '/users/s1854903/sims/scripts/HeliconiusPopHist/msprime/test/' num_replicates = 1 sample_size = 5 mig = 3.8866e-7 seqLength = 32e3 recr = 1.84675e-8 Ne0 = 2.3241e6 #Cydno = ancestral Ne1 = 9.8922e5 #Mel_rosina = derived splitT = 4.8580e6 secT = 1e5 proportion = 0.1 mu = 1.9e-9 population_configurations = [ msprime.PopulationConfiguration(sample_size=sample_size, initial_size=Ne0), msprime.PopulationConfiguration(sample_size=sample_size, initial_size=Ne1), ] #demographic events: specify in the order they occur backwards in time demographic_events = [ msprime.MassMigration(time=secT, source=1, destination=0, proportion=proportion), msprime.PopulationParametersChange(time=splitT, initial_size=Ne0, population_id=0), msprime.MassMigration(time=splitT, source=1, destination=0, proportion=1.0) ] seed = np.random.randint(1, 2**32 - 1, num_replicates) replicates = msprime.simulate( num_replicates = 1, length = seqLength, recombination_rate = recr, population_configurations = population_configurations, demographic_events = demographic_events, migration_matrix = [[0,0], [mig,0]], mutation_rate = mu, random_seed=seed) for ts in replicates: msprime.mutate(ts, rate=mu, keep=True) with open(directory+'sim{}.vcf'.format(str(index)), 'w') as vcf_file: ts.write_vcf(vcf_file, ploidy=2) ts.dump(directory+'sim{}.trees'.format(str(index)))
def guess_gen_time(ts): t=pyslim.load(ts) slim_muts=t.num_mutations g=4 msp_t=msprime.mutate(t,1e-8/g,keep=False) #slim mutation rate was set to 1e-8 msp_muts=msp_t.num_mutations while abs(msp_muts-slim_muts) > 0.01*slim_muts: new_g=g+np.random.uniform(-1,1,1) new_msp_t=msprime.mutate(t,1e-8/new_g,keep=False) new_msp_muts=new_msp_t.num_mutations if abs(new_msp_muts-slim_muts) < abs(msp_muts-slim_muts): msp_muts=new_msp_muts g=new_g return g[0]
def _recap_and_rescale(self, ts, seed, recap_epoch, contig, mutation_rate, slim_frac, slim_scaling_factor): """ Apply post-SLiM transformations to ``ts``. This rescales node times, does recapitation, simplification, and adds neutral mutations. """ # Node times come from SLiM generation numbers, which may have been # divided by a scaling factor for computational tractability. tables = ts.dump_tables() for table in (tables.nodes, tables.migrations): table.time *= slim_scaling_factor ts = pyslim.SlimTreeSequence.load_tables(tables) ts.slim_generation *= slim_scaling_factor rng = random.Random(seed) s1, s2 = rng.randrange(1, 2**32), rng.randrange(1, 2**32) population_configurations = [ msprime.PopulationConfiguration(initial_size=pop.start_size, growth_rate=pop.growth_rate) for pop in recap_epoch.populations ] ts = ts.recapitate(recombination_rate=contig.recombination_map. mean_recombination_rate, population_configurations=population_configurations, migration_matrix=recap_epoch.migration_matrix, random_seed=s1) ts = self._simplify_remembered(ts) if slim_frac < 1: # Add mutations to SLiM part of trees. rate = (1 - slim_frac) * mutation_rate ts = pyslim.SlimTreeSequence( msprime.mutate(ts, rate=rate, keep=True, random_seed=s2, end_time=ts.slim_generation)) # Add mutations to recapitated part of trees. s3 = rng.randrange(1, 2**32) ts = pyslim.SlimTreeSequence( msprime.mutate(ts, rate=mutation_rate, keep=True, random_seed=s3, start_time=ts.slim_generation)) return ts
def test_keep_multichar_muts(self): ts = msprime.simulate(12, random_seed=3) ts = msprime.mutate(ts, rate=1, random_seed=1) self.assertGreater(ts.num_sites, 2) tables = ts.dump_tables() tables.sites.clear() tables.mutations.clear() for site in ts.sites(): tables.sites.add_row(position=site.position, ancestral_state="A" * site.id) for mutation in site.mutations: tables.mutations.add_row( site=site.id, node=mutation.node, derived_state="T" * site.id) original = tables.tree_sequence() updated = msprime.mutate(original, rate=1, random_seed=1, keep=True) self.verify_sites(original, updated)
def test_identical_seed_alphabets(self): ts = msprime.simulate(10, random_seed=2) binary = msprime.mutate(ts, rate=1, random_seed=2) nucleotides = msprime.mutate(ts, rate=1, random_seed=2, model=msprime.InfiniteSites( msprime.NUCLEOTIDES)) self.assertGreater(binary.num_sites, 0) self.assertGreater(binary.num_mutations, 0) self.assertEqual(binary.num_sites, nucleotides.num_sites) self.assertEqual(binary.num_mutations, nucleotides.num_mutations) for s1, s2 in zip(binary.sites(), nucleotides.sites()): self.assertEqual(s1.position, s2.position) self.assertEqual(s1.mutations[0].node, s2.mutations[0].node)
def test_current_ts(self): ts1 = msprime.simulate(5, random_seed=1) ts2 = msprime.mutate(ts1) command, prov = msprime.provenance.parse_provenance( ts2.provenance(1), ts1) assert command == "mutate" assert prov["tree_sequence"] == ts1
def test_infinite_sites_acgt_n2(self): ts = msprime.simulate(2, random_seed=1) ts = msprime.mutate(ts, rate=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES), random_seed=1) self.verify(ts)
def test_alphabet_binary(self): ts = msprime.simulate(10, random_seed=2) mutated = msprime.mutate(ts, rate=1, random_seed=2, model=msprime.InfiniteSites(msprime.BINARY)) self.verify_binary_alphabet(mutated)
def test_keep_mutation_parent(self): ts = msprime.simulate(12, recombination_rate=3, random_seed=3) ts = tsutil.insert_branch_mutations(ts) self.assertGreater(ts.num_sites, 2) other = msprime.mutate(ts, rate=1, random_seed=1, keep=True) self.assertGreater(other.num_sites, ts.num_sites) self.verify_sites(ts, other)
def go(self): """ A wrapper for the admixture simulation.""" print(self.slim_out) print('Simulating recent history with SLiM...') self.simulate_recent_history() if not os.path.isfile(self.slim_out): raise StringError("The supplied SLiM outfile does not match the one specified\ in the script.") ts = tskit.load(self.slim_out) if self.need_to_subsample: print('Taking samples from present day populations...') ts = TreeSequenceToSample(ts, populations_to_sample_from = self.populations, sample_sizes = self.sample_sizes) ts = ts.subsample() # tabs = ts.tables ts = pyslim.SlimTreeSequence.load_tables(ts.tables) print('Simulating ancient history with msprime...') ts = ts.recapitate( recombination_rate = self.ancient_recombination_rate, population_configurations = self.ancient_population_configurations, demographic_events = self.ancient_demographic_events, keep_first_generation = True # needed to get local ancestors ) print('Adding variation...') ts = pyslim.SlimTreeSequence(msprime.mutate(ts, rate=self.neutral_mutation_rate, keep=True)) if self.out_file is not None: ts.dump(self.out_file) return(ts)
def test_keep(self): ts = msprime.simulate(10, random_seed=1) for keep in [True, False]: mutated = msprime.mutate(ts, rate=1, keep=keep) record = json.loads(mutated.provenance(mutated.num_provenances - 1).record) self.assertEqual(record["parameters"]["command"], "mutate") self.assertEqual(record["parameters"]["keep"], keep)
def test_current_ts(self): ts1 = msprime.simulate(5, random_seed=1) ts2 = msprime.mutate(ts1) command, prov = msprime.provenance.parse_provenance( ts2.provenance(1), ts1) self.assertEquals(command, "mutate") self.assertEquals(prov["tree_sequence"], ts1)
def test_simple_nucleotide(self): ts = msprime.mutate(msprime.simulate(10, random_seed=2), rate=1, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)) self.assertGreater(ts.num_sites, 0) self.verify(ts, 2, random_seed=3)
def benchmark_on_trees(): """ Run mutations simulations on all the tree files generated by the generate_trees command. """ cpu = cpuinfo.get_cpu_info() with open("data/mutations_benchmark_cpu.txt", "w") as f: for k, v in cpu.items(): print(k, "\t", v, file=f) data = [] for path in pathlib.Path("tmp/mutations").glob("*.trees"): ts = tskit.load(path) for rate in [1e-7, 1e-8, 1e-9]: run_time = [] num_mutations = [] for _ in range(10): before = time.perf_counter() mts = msprime.mutate(ts, rate=rate) duration = time.perf_counter() - before run_time.append(duration) num_mutations.append(mts.num_mutations) data.append({ "n": ts.num_samples, "L": ts.sequence_length, "time": np.mean(run_time), "rate": rate, "num_mutations": np.mean(num_mutations), "num_trees": ts.num_trees, "num_edges": ts.num_edges, }) df = pd.DataFrame(data) print(data[-1]) df.to_csv("data/mutations_perf.csv")
def produce_data(queue, thread_id, sample_size, time_midpoints, lower_recombination_rate, upper_recombination_rate, lower_mutation_rate, upper_mutation_rate, mutations_per_tree): npr.seed(thread_id) while True: pop_sizes = np.exp(brownian_motion(0.0, time_midpoints, 1)) demo_events = [ msprime.PopulationParametersChange(t, size) for t, size in zip(time_midpoints, pop_sizes) ] recombination_rate = 10**npr.uniform( np.log10(lower_recombination_rate), np.log10(upper_recombination_rate)) trees = msprime.simulate(sample_size=sample_size, Ne=1, length=chromosome_length, recombination_rate=recombination_rate) for i in range(mutations_per_tree): mutation_rate = 10**npr.uniform(np.log10(lower_mutation_rate), np.log10(upper_mutation_rate)) mut_trees = msprime.mutate(trees, rate=mutation_rate) gens = mut_trees.genotype_matrix() afs = gens.sum(1) / sample_size norm_afs = (afs - af_mean) / af_std positions = np.array( [var.site.position for var in mut_trees.variants()]) norm_positions = (positions - position_mean) / position_std tdat = np.transpose(np.vstack((norm_afs, norm_positions))) queue.put((tdat, pop_sizes))
def slimcoal(self): self.tscoal = pyslim.SlimTreeSequence( msprime.mutate(self.ts, rate=1e-9, keep=True)) print( f"The tree sequence now has {self.tscoal.num_mutations} mutations, " f"and mean pairwise nucleotide diversity is {self.tscoal.diversity()}." )
def test_keep(self): ts = msprime.simulate(10, random_seed=1) for keep in [True, False]: mutated = msprime.mutate(ts, rate=1, keep=keep) record = json.loads( mutated.provenance(mutated.num_provenances - 1).record) self.assertEqual(record["parameters"]["command"], "mutate") self.assertEqual(record["parameters"]["keep"], keep)
def test_simple_nucleotide(self): ts = msprime.mutate( msprime.simulate(10, random_seed=2), rate=1, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)) self.assertGreater(ts.num_sites, 0) self.verify(ts, 2, random_seed=3)
def test_mutation_rate(self): ts = msprime.simulate(10, random_seed=1) for mutation_rate in [0, 1, 1e-5]: mutated = msprime.mutate(ts, mutation_rate) record = json.loads(mutated.provenance(mutated.num_provenances - 1).record) self.assertEqual(record["parameters"]["command"], "mutate") self.assertEqual(record["parameters"]["rate"], mutation_rate) self.assertTrue(record["parameters"]["random_seed"] >= 0)
def test_mutate_model(self): ts = msprime.simulate(5, random_seed=1) ts = msprime.mutate(ts, model=msprime.JukesCantor()) decoded = self.decode(ts.provenance(1).record) self.assertEqual(decoded.schema_version, "1.0.0") self.assertEqual(decoded.parameters.command, "mutate") self.assertEqual(decoded.parameters.model["__class__"], "msprime.mutations.JukesCantor")
def test_default_seeds(self): ts = msprime.simulate(20, random_seed=2) seeds = [] for _ in range(10): mutated = msprime.mutate(ts, 0) record = json.loads(mutated.provenance(mutated.num_provenances - 1).record) seeds.append(record["parameters"]["random_seed"]) self.assertEqual(len(seeds), len(set(seeds)))
def test_seed(self): ts = msprime.simulate(10, random_seed=1) for seed in range(1, 10): mutated = msprime.mutate(ts, rate=1, random_seed=seed) record = json.loads(mutated.provenance(mutated.num_provenances - 1).record) self.assertEqual(record["parameters"]["command"], "mutate") self.assertEqual(record["parameters"]["rate"], 1) self.assertEqual(record["parameters"]["random_seed"], seed)
def test_start_time(self): ts = msprime.simulate(10, random_seed=1) for start_time in [0, 1, -1]: mutated = msprime.mutate(ts, 1, start_time=start_time) record = json.loads(mutated.provenance(mutated.num_provenances - 1).record) self.assertEqual(record["parameters"]["command"], "mutate") self.assertEqual(record["parameters"]["start_time"], start_time) self.assertTrue(record["parameters"]["random_seed"] >= 0)
def verify(self, ts, rate, random_seed): no_keep = msprime.mutate(ts, rate=rate, random_seed=random_seed) self.assertGreater(no_keep.num_sites, 0) keep = msprime.mutate(ts, rate=rate, random_seed=random_seed, keep=True) # Can assume there's no collisions here, very unlikely. self.assertEqual(ts.num_sites + no_keep.num_sites, keep.num_sites) # Mutations are all infinite sites, so must be equal self.assertEqual(ts.num_mutations + no_keep.num_mutations, keep.num_mutations) old = set(site.position for site in ts.sites()) new = set(site.position for site in no_keep.sites()) both = set(site.position for site in keep.sites()) self.assertEqual(old | new, both) self.verify_sites(ts, keep)
def test_mutate_model(self): ts = msprime.simulate(5, random_seed=1) ts = msprime.mutate(ts, model="jc69") decoded = self.decode(ts.provenance(1).record) assert decoded.schema_version == "1.0.0" assert decoded.parameters.command == "mutate" assert (decoded.parameters.model["__class__"] == "msprime.mutations.JC69MutationModel")
def test_alphabet_nucleotide(self): ts = msprime.simulate(10, random_seed=2) mutated = msprime.mutate(ts, rate=1, random_seed=2, model=msprime.InfiniteSites( msprime.NUCLEOTIDES)) self.verify_nucleotides_alphabet(mutated)
def test_keep_multichar_muts(self): ts = msprime.simulate(12, random_seed=3) ts = msprime.mutate(ts, rate=1, random_seed=1) self.assertGreater(ts.num_sites, 2) tables = ts.dump_tables() tables.sites.clear() tables.mutations.clear() for site in ts.sites(): tables.sites.add_row(position=site.position, ancestral_state="A" * site.id) for mutation in site.mutations: tables.mutations.add_row(site=site.id, node=mutation.node, derived_state="T" * site.id) original = tables.tree_sequence() updated = msprime.mutate(original, rate=1, random_seed=1, keep=True) self.verify_sites(original, updated)
def parsimony(): tree = msprime.simulate(6, random_seed=42).first() alleles = ["red", "blue", "green"] genotypes = [0, 0, 0, 0, 1, 2] node_colours = {j: alleles[g] for j, g in enumerate(genotypes)} ancestral_state, mutations = tree.map_mutations(genotypes, alleles) print("Ancestral state = ", ancestral_state) for mut in mutations: print(f"Mutation: node = {mut.node} derived_state = {mut.derived_state}") tree.draw("_static/parsimony1.svg", node_colours=node_colours) ts = msprime.simulate(6, random_seed=23) ts = msprime.mutate( ts, rate=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES), random_seed=2) tree = ts.first() tables = ts.dump_tables() # Reinfer the sites and mutations from the variants. tables.sites.clear() tables.mutations.clear() for var in ts.variants(): ancestral_state, mutations = tree.map_mutations(var.genotypes, var.alleles) tables.sites.add_row(var.site.position, ancestral_state=ancestral_state) parent_offset = len(tables.mutations) for mutation in mutations: parent = mutation.parent if parent != tskit.NULL: parent += parent_offset tables.mutations.add_row( var.index, node=mutation.node, parent=parent, derived_state=mutation.derived_state) assert tables.sites == ts.tables.sites assert tables.mutations == ts.tables.mutations print(tables.sites) print(tables.mutations) tree = msprime.simulate(6, random_seed=42).first() alleles = ["red", "blue", "green", "white"] genotypes = [-1, 0, 0, 0, 1, 2] node_colours = {j: alleles[g] for j, g in enumerate(genotypes)} ancestral_state, mutations = tree.map_mutations(genotypes, alleles) print("Ancestral state = ", ancestral_state) for mut in mutations: print(f"Mutation: node = {mut.node} derived_state = {mut.derived_state}") tree.draw("_static/parsimony2.svg", node_colours=node_colours) tree = msprime.simulate(6, random_seed=42).first() alleles = ["red", "blue", "white"] genotypes = [1, -1, 0, 0, 0, 0] node_colours = {j: alleles[g] for j, g in enumerate(genotypes)} ancestral_state, mutations = tree.map_mutations(genotypes, alleles) print("Ancestral state = ", ancestral_state) for mut in mutations: print(f"Mutation: node = {mut.node} derived_state = {mut.derived_state}") tree.draw("_static/parsimony3.svg", node_colours=node_colours)
def test_default_seeds(self): ts = msprime.simulate(20, random_seed=2) seeds = [] for _ in range(10): mutated = msprime.mutate(ts, 0) record = json.loads( mutated.provenance(mutated.num_provenances - 1).record) seeds.append(record["parameters"]["random_seed"]) self.assertEqual(len(seeds), len(set(seeds)))
def test_zero_mutation_rate(self): ts = msprime.simulate(10, random_seed=1) mutated = msprime.mutate(ts, 0) t1 = ts.dump_tables() t2 = mutated.dump_tables() self.verify_topology(t1, t2) self.verify_provenance(t1, t2) self.assertEqual(t1.sites, t2.sites) self.assertEqual(t1.mutations, t2.mutations)
def test_mutation_rate(self): ts = msprime.simulate(10, random_seed=1) for mutation_rate in [0, 1, 1e-5]: mutated = msprime.mutate(ts, mutation_rate) record = json.loads( mutated.provenance(mutated.num_provenances - 1).record) self.assertEqual(record["parameters"]["command"], "mutate") self.assertEqual(record["parameters"]["rate"], mutation_rate) self.assertTrue(record["parameters"]["random_seed"] >= 0)
def test_identical_seed(self): ts = msprime.simulate(10, random_seed=2) mutated = [ msprime.mutate(ts, rate=1, random_seed=2) for _ in range(1, 10)] self.assertGreater(mutated[0].num_sites, 0) self.assertGreater(mutated[0].num_mutations, 0) tables = [other_ts.dump_tables() for other_ts in mutated] self.assertTrue(all(tables[0].sites == t.sites for t in tables[1:])) self.assertTrue(all(tables[0].mutations == t.mutations for t in tables[1:]))
def test_wright_fisher_simplified(self): tables = wf.wf_sim( 9, 10, seed=1, deep_history=True, initial_generation_samples=False, num_loci=5) tables.sort() ts = tables.tree_sequence().simplify() ts = msprime.mutate(ts, rate=0.01, random_seed=1234) self.assertGreater(ts.num_sites, 0) self.verify(ts)
def test_mutate_round_trip(self): ts = msprime.simulate(5, random_seed=1) ts = msprime.mutate(ts, rate=2, random_seed=1, start_time=0, end_time=100, keep=False) self.verify(ts)
def test_wright_fisher_initial_generation(self): tables = wf.wf_sim( 6, 5, seed=3, deep_history=True, initial_generation_samples=True, num_loci=2) tables.sort() tables.simplify() ts = msprime.mutate(tables.tree_sequence(), rate=0.08, random_seed=2) self.assertGreater(ts.num_sites, 0) self.verify(ts)
def test_start_time(self): ts = msprime.simulate(10, random_seed=1) for start_time in [0, 1, -1]: mutated = msprime.mutate(ts, 1, start_time=start_time) record = json.loads( mutated.provenance(mutated.num_provenances - 1).record) self.assertEqual(record["parameters"]["command"], "mutate") self.assertEqual(record["parameters"]["start_time"], start_time) self.assertTrue(record["parameters"]["random_seed"] >= 0)
def test_seed(self): ts = msprime.simulate(10, random_seed=1) for seed in range(1, 10): mutated = msprime.mutate(ts, rate=1, random_seed=seed) record = json.loads( mutated.provenance(mutated.num_provenances - 1).record) self.assertEqual(record["parameters"]["command"], "mutate") self.assertEqual(record["parameters"]["rate"], 1) self.assertEqual(record["parameters"]["random_seed"], seed)
def test_keep_mutation_parent_zero_rate(self): ts = msprime.simulate(12, recombination_rate=3, random_seed=3) ts = tsutil.insert_branch_mutations(ts) self.assertGreater(ts.num_sites, 2) other = msprime.mutate(ts, rate=0, random_seed=1, keep=True) t1 = ts.dump_tables() t2 = other.dump_tables() t1.provenances.clear() t2.provenances.clear() self.assertEqual(t1, t2)
def verify(self, ts, rate=100): root_time = max(node.time for node in ts.nodes()) leaf_time = min(node.time for node in ts.nodes()) length = root_time - leaf_time end = root_time - length / 2 tsm = msprime.mutate(ts, rate=rate, end_time=end) self.verify_mutations(tsm, None, end) start = leaf_time + length / 4 end = root_time - length / 2 tsm = msprime.mutate(ts, rate=rate, start_time=start, end_time=end) self.verify_mutations(tsm, start, end) start = root_time - length / 2 end = root_time tsm = msprime.mutate(ts, rate=rate, start_time=start, end_time=end) self.verify_mutations(tsm, start, end) tsm = msprime.mutate(ts, rate=rate, start_time=start) self.verify_mutations(tsm, start, None)
def test_mutation_overwrite(self): ts = msprime.simulate(10, mutation_rate=5, random_seed=2) self.assertGreater(ts.num_sites, 0) self.assertGreater(ts.num_mutations, 0) mutated = msprime.mutate(ts, 0) t1 = ts.dump_tables() self.assertEqual(len(t1.sites), ts.num_sites) t2 = mutated.dump_tables() self.verify_topology(t1, t2) self.verify_provenance(t1, t2) self.assertEqual(len(t2.sites), 0) self.assertEqual(len(t2.mutations), 0)
def test_mutate(self): ts = msprime.simulate(5, random_seed=1) ts = msprime.mutate( ts, rate=2, random_seed=1, start_time=0, end_time=100, keep=False) decoded = self.decode(ts.provenance(1).record) self.assertEqual(decoded.schema_version, "1.0.0") self.assertEqual(decoded.parameters.command, "mutate") self.assertEqual(decoded.parameters.random_seed, 1) self.assertEqual(decoded.parameters.rate, 2) self.assertEqual(decoded.parameters.start_time, 0) self.assertEqual(decoded.parameters.end_time, 100) self.assertEqual(decoded.parameters.keep, False)
def test_stick_tree(self): tables = msprime.TableCollection(1.0) tables.nodes.add_row(flags=msprime.NODE_IS_SAMPLE, time=0) tables.nodes.add_row(flags=0, time=1) tables.nodes.add_row(flags=0, time=2) tables.edges.add_row(0, 1, 1, 0) tables.edges.add_row(0, 1, 2, 1) ts = tables.tree_sequence() tsm = msprime.mutate(ts, rate=100, end_time=1, random_seed=1) self.assertGreater(tsm.num_sites, 0) self.assertTrue(all(mut.node == 0 for mut in ts.mutations())) tsm = msprime.mutate(ts, rate=100, start_time=0, end_time=1, random_seed=1) self.assertGreater(tsm.num_sites, 0) self.assertTrue(all(mut.node == 0 for mut in ts.mutations())) tsm = msprime.mutate(ts, rate=100, start_time=0.5, end_time=1, random_seed=1) self.assertGreater(tsm.num_sites, 0) self.assertTrue(all(mut.node == 0 for mut in ts.mutations())) tsm = msprime.mutate(ts, rate=100, start_time=1, random_seed=1) self.assertGreater(tsm.num_sites, 0) self.assertTrue(all(mut.node == 1 for mut in ts.mutations())) tsm = msprime.mutate(ts, rate=100, start_time=1, end_time=2, random_seed=1) self.assertGreater(tsm.num_sites, 0) self.assertTrue(all(mut.node == 1 for mut in ts.mutations())) tsm = msprime.mutate(ts, rate=100, start_time=1.5, end_time=2, random_seed=1) self.assertGreater(tsm.num_sites, 0) self.assertTrue(all(mut.node == 0 for mut in ts.mutations()))
def test_populations(self): ts = msprime.simulate( population_configurations=[ msprime.PopulationConfiguration(10), msprime.PopulationConfiguration(10)], migration_matrix=[[0, 1], [1, 0]], record_migrations=True, random_seed=1) mutated = msprime.mutate(ts, 0) t1 = ts.dump_tables() self.assertEqual(len(t1.populations), 2) self.assertGreater(len(t1.migrations), 0) t2 = mutated.dump_tables() self.verify_topology(t1, t2) self.verify_provenance(t1, t2) self.assertEqual(t1.sites, t2.sites) self.assertEqual(t1.mutations, t2.mutations)
def test_mutate_numpy(self): ts = msprime.simulate(5, random_seed=1) ts = msprime.mutate( ts, rate=np.array([2])[0], random_seed=np.array([1])[0], start_time=np.array([0])[0], end_time=np.array([100][0]), keep=np.array([False][0])) decoded = self.decode(ts.provenance(1).record) self.assertEqual(decoded.schema_version, "1.0.0") self.assertEqual(decoded.parameters.command, "mutate") self.assertEqual(decoded.parameters.random_seed, 1) self.assertEqual(decoded.parameters.rate, 2) self.assertEqual(decoded.parameters.start_time, 0) self.assertEqual(decoded.parameters.end_time, 100) self.assertEqual(decoded.parameters.keep, False)
def test_mutate(self): ts = msprime.simulate(5, random_seed=1) ts = msprime.mutate(ts, rate=1, random_seed=1) prov = json.loads(ts.provenance(1).record) tskit.validate_provenance(prov) self.assertEqual(prov["parameters"]["command"], "mutate")
def test_default_alphabet(self): ts = msprime.simulate(10, random_seed=2) mutated = msprime.mutate(ts, rate=1, random_seed=2) self.verify_binary_alphabet(mutated)
def test_errors(self): ts = msprime.simulate(10, random_seed=2) for start, end in [(-2, -3), (1, 0), (1e6, 1e5)]: with self.assertRaises(ValueError): msprime.mutate(ts, start_time=-2.0, end_time=-3.0)
def test_alphabet_binary(self): ts = msprime.simulate(10, random_seed=2) mutated = msprime.mutate( ts, rate=1, random_seed=2, model=msprime.InfiniteSites(msprime.BINARY)) self.verify_binary_alphabet(mutated)
def test_alphabet_nucleotide(self): ts = msprime.simulate(10, random_seed=2) mutated = msprime.mutate( ts, rate=1, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)) self.verify_nucleotides_alphabet(mutated)