def test_bad_params(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr1") model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) for scaling_factor in (0, -1, -1e-6): with self.assertRaises(ValueError): engine.simulate( demographic_model=model, contig=contig, samples=samples, slim_scaling_factor=scaling_factor, dry_run=True, ) for burn_in in (-1, -1e-6): with self.assertRaises(ValueError): engine.simulate( demographic_model=model, contig=contig, samples=samples, slim_burn_in=burn_in, dry_run=True, )
def test_script_generation(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr1") model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) model.generation_time = species.generation_time out, _ = capture_output(engine.simulate, demographic_model=model, contig=contig, samples=samples, slim_script=True) self.assertTrue("sim.registerLateEvent" in out) model = species.get_demographic_model("AncientEurasia_9K19") samples = model.get_samples(1, 2, 3, 4, 5, 6, 7) out, _ = capture_output(engine.simulate, demographic_model=model, contig=contig, samples=samples, slim_script=True) self.assertTrue("sim.registerLateEvent" in out) model = species.get_demographic_model("AmericanAdmixture_4B11") samples = model.get_samples(10, 10, 10) out, _ = capture_output(engine.simulate, demographic_model=model, contig=contig, samples=samples, slim_script=True) self.assertTrue("sim.registerLateEvent" in out)
def run_simulation(args): if args.demographic_model is None: model = stdpopsim.PiecewiseConstantSize(species.population_size) model.generation_time = species.generation_time model.citations.extend(species.population_size_citations) model.citations.extend(species.generation_time_citations) qc_complete = True else: model = get_model_wrapper(species, args.demographic_model) qc_complete = model.qc_model is not None if len(args.samples) > model.num_sampling_populations: exit( f"Cannot sample from more than {model.num_sampling_populations} " "populations") samples = model.get_samples(*args.samples) contig = species.get_contig( args.chromosome, genetic_map=args.genetic_map, length_multiplier=args.length_multiplier, length=args.length, inclusion_mask=args.inclusion_mask, exclusion_mask=args.exclusion_mask, ) engine = stdpopsim.get_engine(args.engine) logger.info(f"Running simulation model {model.id} for {species.id} on " f"{contig} with {len(samples)} samples using {engine.id}.") write_simulation_summary(engine=engine, model=model, contig=contig, samples=samples, seed=args.seed) if not qc_complete: warnings.warn( stdpopsim.QCMissingWarning( f"{model.id} has not been QCed. Use at your own risk! " "Demographic models that have not undergone stdpopsim's " "Quality Control procedure may contain implementation " "errors, leading to differences between simulations " "and the model described in the original publication. " "More information about the QC process can be found in " "the developer documentation. " "https://stdpopsim.readthedocs.io/en/latest/development.html" "#demographic-model-review-process")) # extract simulate() parameters from CLI args accepted_params = inspect.signature(engine.simulate).parameters.keys() kwargs = {k: v for k, v in vars(args).items() if k in accepted_params} kwargs.update(demographic_model=model, contig=contig, samples=samples) ts = engine.simulate(**kwargs) summarise_usage() if ts is not None: write_output(ts, args) # Non-QCed models shouldn't be used in publications, so we skip the # "If you use this simulation in published work..." citation request. if qc_complete: write_citations(engine, model, contig, species) if args.bibtex_file is not None: write_bibtex(engine, model, contig, species, args.bibtex_file)
def test_number_of_calls(self): # Test that genetic map citations are converted. species = stdpopsim.get_species("HomSap") genetic_map = species.get_genetic_map("HapMapII_GRCh37") contig = species.get_contig("chr22", genetic_map=genetic_map.id) model = stdpopsim.PiecewiseConstantSize(species.population_size) engine = stdpopsim.get_default_engine() cites_and_cites = [ genetic_map.citations, model.citations, engine.citations, species.genome.mutation_rate_citations, species.genome.recombination_rate_citations, species.genome.assembly_citations, ] ncite = len(set([ref.doi for cites in cites_and_cites for ref in cites])) # Patch out writing to a file, then # ensure that the method is called # the correct number of times. with mock.patch("builtins.open", mock.mock_open()): with open('tmp.bib', 'w') as bib: with mock.patch.object( stdpopsim.citations.Citation, "fetch_bibtex") as mock_bib: cli.write_bibtex(engine, model, contig, species, bib) self.assertEqual(mock_bib.call_count, ncite)
def run_simulation(args): if args.demographic_model is None: model = stdpopsim.PiecewiseConstantSize(species.population_size) model.generation_time = species.generation_time model.citations.extend(species.population_size_citations) model.citations.extend(species.generation_time_citations) else: model = get_model_wrapper(species, args.demographic_model) if len(args.samples) > model.num_sampling_populations: exit( f"Cannot sample from more than {model.num_sampling_populations} " "populations") samples = model.get_samples(*args.samples) contig = species.get_contig( args.chromosome, genetic_map=args.genetic_map, length_multiplier=args.length_multiplier) engine = stdpopsim.get_engine(args.engine) logger.info( f"Running simulation model {model.id} for {species.id} on " f"{contig} with {len(samples)} samples using {engine.id}.") kwargs = vars(args) kwargs.update(demographic_model=model, contig=contig, samples=samples) if not args.quiet: write_simulation_summary(engine=engine, model=model, contig=contig, samples=samples, seed=args.seed) if not args.dry_run: ts = engine.simulate(**kwargs) summarise_usage() if ts is not None: write_output(ts, args) if not args.quiet: write_citations(engine, model, contig, species) if args.bibtex_file is not None: write_bibtex(engine, model, contig, species, args.bibtex_file)
def test_number_of_calls(self): # Test that genetic map citations are converted. species = stdpopsim.get_species("HomSap") genetic_map = species.get_genetic_map("HapMapII_GRCh37") contig = species.get_contig("chr20", genetic_map=genetic_map.id) model = stdpopsim.PiecewiseConstantSize(species.population_size) engine = stdpopsim.get_default_engine() local_cites = stdpopsim.Citation.merge( [stdpopsim.citations._stdpopsim_citation] + genetic_map.citations + model.citations + engine.citations + species.genome.citations + species.citations ) dois = set([ref.doi for ref in local_cites]) ncite = len(dois) assert ncite == len(local_cites) cli_cites = cli.get_citations(engine, model, contig, species) assert len(cli_cites) == len(local_cites) # Patch out writing to a file, then # ensure that the method is called # the correct number of times. with mock.patch("builtins.open", mock.mock_open()): with open("tmp.bib", "w") as bib: with mock.patch.object( stdpopsim.citations.Citation, "fetch_bibtex", autospec=True ) as mock_bib: cli.write_bibtex(engine, model, contig, species, bib) assert mock_bib.call_count == ncite
def generic_models_example(): species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr22", length_multiplier=0.1) model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) engine = stdpopsim.get_default_engine() ts = engine.simulate(model, contig, samples)
class PiecewiseConstantSizeMixin: """ Mixin that sets up a simple demographic model. """ species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr22", length_multiplier=0.001) # ~50 kb N0 = 1000 # size in the present N1 = 500 # ancestral size T = 500 # generations since size change occurred T_mut = 300 # introduce a mutation at this generation model = stdpopsim.PiecewiseConstantSize(N0, (T, N1)) model.generation_time = 1 samples = model.get_samples(100) mutation_types = [ stdpopsim.ext.MutationType(convert_to_substitution=False) ] mut_id = len(mutation_types) def allele_frequency(self, ts): """ Get the allele frequency of the drawn mutation. """ # surely there's a simpler way! assert ts.num_mutations == 1 samples = ts.samples() mut = next(ts.mutations()) tree = ts.at(ts.site(mut.site).position) have_mut = [u for u in samples if tree.is_descendant(u, mut.node)] af = len(have_mut) / len(samples) return af
def test_no_msprime_DFE(self): # test we cannot simulate a non-neutral DFE with msprime m1 = stdpopsim.ext.MutationType( dominance_coeff=0.2, distribution_type="e", distribution_args=[0.1], ) desc = "test test" long_desc = "test test 🐢" dfe = stdpopsim.DFE( id="abc", description=desc, long_description=long_desc, mutation_types=[m1], ) contig = stdpopsim.Contig.basic_contig( length=10000, mutation_rate=1e-6, ) contig.clear_genomic_mutation_types() contig.add_DFE( intervals=np.array([[0, contig.length / 2]], dtype="int"), DFE=dfe, ) model = stdpopsim.PiecewiseConstantSize(1000) samples = model.get_samples(2) engine = stdpopsim.get_engine("msprime") with pytest.raises(ValueError): _ = engine.simulate( model, contig, samples, )
def test_bad_population_size_addSubPop(self): engine, species, contig = self.triplet() model = stdpopsim.PiecewiseConstantSize(100) samples = model.get_samples(2) with self.assertWarns(stdpopsim.UnspecifiedSLiMWarning): engine.simulate( demographic_model=model, contig=contig, samples=samples, slim_scaling_factor=10, dry_run=True)
def test_sample_size_too_big(self): engine, species, contig = self.triplet() model = stdpopsim.PiecewiseConstantSize(1000) samples = model.get_samples(300) with self.assertRaises(stdpopsim.SLiMException): engine.simulate( demographic_model=model, contig=contig, samples=samples, slim_scaling_factor=10, dry_run=True)
def test_no_populations_in_generation_1(self): engine, species, contig = self.triplet() model = stdpopsim.PiecewiseConstantSize(100) samples = model.get_samples(2) with self.assertRaises(stdpopsim.SLiMException): engine.simulate( demographic_model=model, contig=contig, samples=samples, slim_scaling_factor=200, dry_run=True)
def test_genetic_map(self): species = stdpopsim.get_species("homsap") contig = species.get_contig("chr22", genetic_map="HapmapII_GRCh37") model = stdpopsim.PiecewiseConstantSize(species.population_size) stdout, stderr = capture_output(cli.write_citations, contig, model) self.assertEqual(len(stderr), 0) # TODO Parse out the output for the model and check that the text is # in there. self.assertGreater(len(stdout), 0)
def test_recombination_map(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr1", genetic_map="HapMapII_GRCh37") model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) out, _ = capture_output( engine.simulate, demographic_model=model, contig=contig, samples=samples, dry_run=True)
def test_genetic_map_citations(self): species = stdpopsim.get_species("HomSap") genetic_map = species.get_genetic_map("HapMapII_GRCh37") contig = species.get_contig("chr22", genetic_map=genetic_map.id) model = stdpopsim.PiecewiseConstantSize(species.population_size) engine = stdpopsim.get_default_engine() stdout, stderr = capture_output( cli.write_citations, engine, model, contig, species) self.assertEqual(len(stdout), 0) self.check_citations(engine, species, genetic_map, model, stderr)
def test_bad_population_size_addSubPop(self): engine, species, contig = self.triplet() model = stdpopsim.PiecewiseConstantSize(100) samples = model.get_samples(2) with mock.patch("warnings.warn", autospec=True) as mock_warning: engine.simulate( demographic_model=model, contig=contig, samples=samples, slim_scaling_factor=10, dry_run=True) mock_warning.assert_called_once()
def test_simulate(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("AraTha") contig = species.get_contig("chr5", length_multiplier=0.001) model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) ts = engine.simulate( demographic_model=model, contig=contig, samples=samples, slim_scaling_factor=10, slim_burn_in=0) self.assertEqual(ts.num_samples, 10) self.assertTrue(all(tree.num_roots == 1 for tree in ts.trees()))
def test_genetic_map_citations(self, caplog): species = stdpopsim.get_species("HomSap") genetic_map = species.get_genetic_map("HapMapII_GRCh37") contig = species.get_contig("chr20", genetic_map=genetic_map.id) model = stdpopsim.PiecewiseConstantSize(species.population_size) engine = stdpopsim.get_default_engine() dfe = None stdout, stderr = capture_output(cli.write_citations, engine, model, contig, species, dfe) assert len(stdout) == 0 self.check_citations(engine, species, genetic_map, model, caplog.text)
def test_dfe_citations(self, caplog): species = stdpopsim.get_species("HomSap") genetic_map = species.get_genetic_map("HapMapII_GRCh37") dfe = species.get_genetic_map("HapMapII_GRCh37") contig = species.get_contig("chr20", genetic_map=genetic_map.id) model = stdpopsim.PiecewiseConstantSize(species.population_size) engine = stdpopsim.get_default_engine() dfe = species.get_dfe("Gamma_K17") stdout, stderr = capture_output(cli.write_citations, engine, model, contig, species, dfe) assert len(stdout) == 0 assert "[distribution of fitness effects]" in caplog.text assert "Kim et al., 2017" in caplog.text
def _onepop_PC(engine_id, out_dir, seed, N0=1000, *size_changes, **sim_kwargs): species = stdpopsim.get_species("CanFam") contig = species.get_contig("chr35", length_multiplier=0.01) # ~265 kb model = stdpopsim.PiecewiseConstantSize(N0, *size_changes) model.generation_time = species.generation_time samples = model.get_samples(100) engine = stdpopsim.get_engine(engine_id) t0 = time.perf_counter() ts = engine.simulate(model, contig, samples, seed=seed, **sim_kwargs) t1 = time.perf_counter() out_file = out_dir / f"{seed}.trees" ts.dump(out_file) return out_file, t1 - t0
def test_recombination_map(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr1", genetic_map="HapMapII_GRCh37") model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) model.generation_time = species.generation_time out, _ = capture_output(engine.simulate, demographic_model=model, contig=contig, samples=samples, slim_script=True) self.assertTrue("sim.registerLateEvent" in out)
def run_simulation(args): if args.model is None: model = stdpopsim.PiecewiseConstantSize(species.population_size) model.citations = species.population_size_citations else: model = species.get_model(args.model) if len(args.samples) > model.num_sampling_populations: exit( f"Cannot sample from more than {model.num_sampling_populations} " "populations") samples = model.get_samples(*args.samples) contig = species.get_contig(args.chromosome, genetic_map=args.genetic_map, length_multiplier=args.length_multiplier) logger.info( f"Running simulation model {model.name} for {species.name} on " f"{contig} with {len(samples)} samples") ts = model.simulate(contig, samples, args.seed) summarise_usage() write_output(ts, args) if not args.quiet: write_citations(contig, model)
def run_simulation(args): if args.demographic_model is None: model = stdpopsim.PiecewiseConstantSize(species.population_size) model.generation_time = species.generation_time for citation in species.citations: reasons = { stdpopsim.CiteReason.POP_SIZE, stdpopsim.CiteReason.GEN_TIME, } if len(citation.reasons & reasons) > 0: model.citations.append(citation) qc_complete = True else: model = get_model_wrapper(species, args.demographic_model) qc_complete = model.qc_model is not None if len(args.samples) > model.num_sampling_populations: exit( f"Cannot sample from more than {model.num_sampling_populations} " "populations" ) samples = model.get_samples(*args.samples) contig = species.get_contig( args.chromosome, genetic_map=args.genetic_map, length_multiplier=args.length_multiplier, length=args.length, inclusion_mask=args.inclusion_mask, exclusion_mask=args.exclusion_mask, mutation_rate=model.mutation_rate, ) engine = stdpopsim.get_engine(args.engine) logger.info( f"Running simulation model {model.id} for {species.id} on " f"{contig} with {len(samples)} samples using {engine.id}." ) # DFE assignment dfe = None intervals_summary_str = None if args.dfe is None: if args.dfe_interval is not None: exit( "A DFE interval has been assigned without a DFE. " "Please specify a DFE." ) if args.dfe_annotation is not None: exit( "A DFE annotation has been assigned without a DFE. " "Please specify a DFE." ) if args.dfe_bed_file is not None: exit( "A DFE bed file has been assigned without a DFE. " "Please specify a DFE." ) else: if args.dfe_interval is not None: if args.dfe_annotation is not None: exit( "A DFE annotation and a DFE interval have been " "selected. Please only use one." ) if args.dfe_bed_file is not None: exit( "A DFE bed file and a DFE interval have been " "selected. Please only use one." ) left, right = args.dfe_interval.split(",") intervals = np.array([[int(left), int(right)]]) intervals_summary_str = f"[{left}, {right})" if args.dfe_annotation is not None: if args.dfe_bed_file is not None: exit( "A DFE bed file and a DFE annotation have been " "selected. Please only use one." ) annot = species.get_annotations(args.dfe_annotation) intervals = annot.get_chromosome_annotations(args.chromosome) intervals_summary_str = f"{annot.id} elements on {args.chromosome}" if args.dfe_bed_file is not None: intervals = np.loadtxt(args.dfe_bed_file, usecols=[1, 2], dtype="int") left = np.min(intervals) right = np.max(intervals) intervals_summary_str = f"[{left}, {right})" else: # case where no intervals specified but we have a DFE intervals = np.array( [[0, int(contig.recombination_map.sequence_length)]] ) intervals_summary_str = f"[{intervals[0][0]}, {intervals[0][1]})" dfe = species.get_dfe(args.dfe) contig.add_dfe( intervals=intervals, DFE=dfe, ) logger.info( f"Applying selection under the DFE model {dfe.id} " f"in intervals {intervals_summary_str}." ) write_simulation_summary( engine=engine, model=model, contig=contig, samples=samples, dfe=args.dfe, dfe_interval=intervals_summary_str, seed=args.seed, ) if not qc_complete: warnings.warn( stdpopsim.QCMissingWarning( f"{model.id} has not been QCed. Use at your own risk! " "Demographic models that have not undergone stdpopsim's " "Quality Control procedure may contain implementation " "errors, leading to differences between simulations " "and the model described in the original publication. " "More information about the QC process can be found in " "the developer documentation. " "https://popsim-consortium.github.io/stdpopsim-docs/" "latest/development.html#demographic-model-review-process" ) ) # extract simulate() parameters from CLI args accepted_params = inspect.signature(engine.simulate).parameters.keys() kwargs = {k: v for k, v in vars(args).items() if k in accepted_params} kwargs.update(demographic_model=model, contig=contig, samples=samples) ts = engine.simulate(**kwargs) summarise_usage() if ts is not None: write_output(ts, args) # Non-QCed models shouldn't be used in publications, so we skip the # "If you use this simulation in published work..." citation request. if qc_complete: write_citations(engine, model, contig, species, dfe) if args.bibtex_file is not None: write_bibtex(engine, model, contig, species, args.bibtex_file, dfe)
assert len(mut_info.keys()) > 0 # number of mutations assert num_nonneutral > 0 # nonneutral mutations @pytest.mark.skipif(IS_WINDOWS, reason="SLiM not available on windows") class CatalogDFEModelTestMixin(DFEModelTestMixin): """ Mixin for demographic models in the catalog. """ def test_id_valid(self): assert utils.is_valid_dfe_id(self.dfe.id) qc_test_classes = [] for species in stdpopsim.all_species(): for dfe in species.dfes: model = stdpopsim.PiecewiseConstantSize(1000) superclasses = [] superclasses.append(CatalogDFEModelTestMixin) classname = f"Test{species.id}{model.id}{dfe.id}" cls = type(classname, tuple(superclasses), dict(model=model, dfe=dfe)) qc_test_classes.append(cls) # Basic sanity checks to double check that no errors get introduced # that lead to these qc tests being skipped silently. assert len(qc_test_classes) > 0 for cls in qc_test_classes: assert issubclass(cls, DFEModelTestMixin) # Insert the class into the current test module's namespace. setattr(sys.modules[__name__], cls.__name__, cls)
#!/usr/bin/env python3 """script to simulate data based on the recombination map of human chromosome 2.""" import numpy as np import scipy.stats import stdpopsim import matplotlib.pyplot as pyplot import collections # specify genome of interest species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr2", genetic_map="HapMapII_GRCh37") model = stdpopsim.PiecewiseConstantSize(species.population_size) mutation_rate = np.array(contig.mutation_rate) # specify simulation properties num_samples = 40 seed = 48 lenght_bin_sim = 7000 # simulate tree sequence samples = model.get_samples(num_samples) engine = stdpopsim.get_engine("msprime") ts = engine.simulate(model, contig, samples, seed=seed) # compute genotype matrix: G = ts.genotype_matrix() assert (G.shape[1] == num_samples) # compute site frequency spectrum: sum over columns of the genotype matrix a = G.sum(axis=1)
def generic_models_example(): species = stdpopsim.get_species("homsap") contig = species.get_contig("chr22", length_multiplier=0.1) model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) ts = model.run(contig, samples)
ax1.legend() ax1.set_title("Selected mutations") ax2 = plt.subplot(1, 2, 2) ax2.semilogy(fs_neu, "-o", ms=6, lw=1, mfc="w", label="Slim") ax2.semilogy(exp_neu, "-o", ms=3, lw=1, label="Moments") ax2.set_xlabel("Allele frequency") ax2.set_title("Nuetral mutations") fig.tight_layout() plt.savefig(args.plotting) if __name__ == "__main__": parser = make_parser() args = parser.parse_args(sys.argv[1:]) species = stdpopsim.get_species("HomSap") model = stdpopsim.PiecewiseConstantSize(args.population_size) samples = model.get_samples(args.num_samples) contig = set_up_contig(args, species) engine = stdpopsim.get_engine("slim") np.random.seed(args.seed) seeds = np.random.randint(0, np.iinfo(np.uint32).max, args.replicates) fs_sel = np.zeros(args.num_samples + 1) fs_neu = np.zeros(args.num_samples + 1) for ii, random_seed in enumerate(seeds): print("running replicate", ii + 1, "of", args.replicates) ts = engine.simulate( model,