id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=7e-9, recombination_rate=_recombination_rate_data[name], ) ) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], mutation_rate_citations=[ stdpopsim.Citation( author="Ossowski et al.", year="2010", doi="https://doi.org/10.1126/science.1180677", reasons={stdpopsim.CiteReason.MUT_RATE}, ) ], recombination_rate_citations=[ stdpopsim.Citation( author="Huber et al.", year="2014", doi="https://doi.org/10.1093/molbev/msu247", reasons={stdpopsim.CiteReason.REC_RATE}, ) ], assembly_citations=[ stdpopsim.Citation( doi="https://doi.org/10.1093/nar/gkm965", year="2007",
class _SLiMEngine(stdpopsim.Engine): id = "slim" #: description = "SLiM forward-time Wright-Fisher simulator" #: citations = [ stdpopsim.Citation( doi="https://doi.org/10.1111/1755-0998.12968", year=2019, author="Haller et al.", reasons={stdpopsim.CiteReason.ENGINE}), ] def slim_path(self): return os.environ.get("SLIM", "slim") def get_version(self): s = subprocess.check_output([self.slim_path(), "-v"]) return s.split()[2].decode("ascii").rstrip(",") def simulate( self, demographic_model=None, contig=None, samples=None, seed=None, verbosity=0, slim_path=None, slim_script=False, slim_scaling_factor=10, slim_no_recapitation=False, slim_no_burnin=False, **kwargs): """ Simulate the demographic model using SLiM. See :meth:`.Engine.simulate()` for definitions of the ``demographic_model``, ``contig``, and ``samples`` parameters. :param seed: The seed for the random number generator. :type seed: int :param slim_path: The full path to the slim executable, or the name of a command in the current PATH. :type slim_path: str :param slim_script: If true, the simulation will not be executed. Instead the generated SLiM script will be printed to stdout. :type slim_script: bool :param slim_scaling_factor: Rescale model parameters by the given value, to speed up simulation. Population sizes and generation times are divided by this factor, whereas the mutation rate, recombination rate, and growth rates are multiplied by the factor. See SLiM manual: `5.5 Rescaling population sizes to improve simulation performance.` :type slim_scaling_factor: float :param slim_no_recapitation: Do an explicit burn in, and add mutations, within the SLiM simulation. This may be much slower than the defaults (recapitation and neutral mutation overlay with msprime). The burn in behaviour is to wait until all individuals in the ancestral populations have a common ancestor within their respective population, and then wait another 10*N generations. :type slim_no_recapitation: bool :param slim_no_burnin: Do not perform a burn in at the start of the simulation. This option is only relevant when ``slim_no_recapitation=True``. :type slim_no_burnin: bool """ run_slim = not slim_script do_recap = not slim_no_recapitation check_coalescence = slim_no_recapitation and not slim_no_burnin if slim_path is None: slim_path = self.slim_path() if do_recap: mutation_rate = contig.mutation_rate # Ensure no mutations are introduced by SLiM. contig = stdpopsim.Contig( recombination_map=contig.recombination_map, mutation_rate=0, genetic_map=contig.genetic_map) slim_cmd = [slim_path] if seed is not None: slim_cmd.extend(["-s", f"{seed}"]) mktemp = functools.partial(tempfile.NamedTemporaryFile, mode="w") @contextlib.contextmanager def script_file_f(): f = mktemp(suffix=".slim") if not slim_script else sys.stdout yield f # Don't close sys.stdout. if not slim_script: f.close() with script_file_f() as script_file, mktemp(suffix=".ts") as ts_file: recap_epoch = slim_makescript( script_file, ts_file.name, demographic_model, contig, samples, slim_scaling_factor, check_coalescence, verbosity) script_file.flush() if not run_slim: return None slim_cmd.append(script_file.name) stdout = subprocess.DEVNULL if verbosity == 0 else None subprocess.check_call(slim_cmd, stdout=stdout) ts = pyslim.load(ts_file.name) # Node times come from SLiM generation numbers, which may have been # divided by a scaling factor for computational tractibility. tables = ts.dump_tables() for table in (tables.nodes, tables.migrations): table.time *= slim_scaling_factor ts = pyslim.SlimTreeSequence.load_tables(tables) ts.slim_generation *= slim_scaling_factor if do_recap: rng = random.Random(seed) s1, s2 = rng.randrange(1, 2**32), rng.randrange(1, 2**32) population_configurations = [ msprime.PopulationConfiguration( initial_size=pop.start_size, growth_rate=pop.growth_rate) for pop in recap_epoch.populations] ts = ts.recapitate( recombination_rate=contig.recombination_map.mean_recombination_rate, population_configurations=population_configurations, migration_matrix=recap_epoch.migration_matrix, random_seed=s1) ts = simplify_remembered(ts) if do_recap: # Add neutral mutations. ts = pyslim.SlimTreeSequence(msprime.mutate( ts, rate=mutation_rate, keep=True, random_seed=s2)) return ts
import collections import msprime import stdpopsim from . import genome_data ########################################################### # # Genome definition # ########################################################### # citations _LiAndStephan = stdpopsim.Citation( author="Li et al.", year=2006, doi="https://doi.org/10.1371/journal.pgen.0020166") _SchriderEtAl = stdpopsim.Citation( author="Schrider et al.", year=2013, doi="https://doi.org/10.1534/genetics.113.151670") _DosSantosEtAl = stdpopsim.Citation(doi="https://doi.org/10.1093/nar/gku1099", year="2015", author="dos Santos et al.", reasons={stdpopsim.CiteReason.ASSEMBLY}) _genome_wide_estimate = 8.4e-9 # WRONG, underestimate used in S&S! _recombination_rate_data = collections.defaultdict(
import stdpopsim from . import genome_data _LiAndStephan = stdpopsim.Citation( author="Li et al.", year=2006, doi="https://doi.org/10.1371/journal.pgen.0020166", reasons={stdpopsim.CiteReason.GEN_TIME, stdpopsim.CiteReason.POP_SIZE}, ) _SchriderEtAl = stdpopsim.Citation( author="Schrider et al.", year=2013, doi="https://doi.org/10.1534/genetics.113.151670", ) _DosSantosEtAl = stdpopsim.Citation( doi="https://doi.org/10.1093/nar/gku1099", year=2015, author="dos Santos et al.", reasons={stdpopsim.CiteReason.ASSEMBLY}, ) _HoskinsEtAl = stdpopsim.Citation( doi="https://doi.org/10.1101/gr.185579.114", year=2015, author="Hoskins et al.", reasons={stdpopsim.CiteReason.ASSEMBLY}, )
_an = stdpopsim.Annotation( species=_species, id="ensembl_havana_104_exons", description="Ensembl Havana exon annotations on GRCh38", url=("ftp://ftp.ensembl.org/pub/release-104/" "gff3/homo_sapiens/Homo_sapiens.GRCh38.104.gff3.gz"), gff_sha256= "313ad46bd4af78b45b9f5d8407bbcbd3f87f4be0747060e84b3b5eb931530ec1", intervals_url=("https://stdpopsim.s3-us-west-2.amazonaws.com/" "annotations/HomSap/ensembl_havana_104_exons.tar.gz"), intervals_sha256= "5c356d092b31fa40bfce434994de276e9040ed9a80fc047a5e3b94410157f1cf", citations=[ stdpopsim.Citation( year=2018, author="Hunt et al", doi="https://doi.org/10.1093/database/bay119", reasons={stdpopsim.CiteReason.ANNOTATION}, ) ], file_pattern="ensembl_havana_exons_{id}.txt", annotation_source="ensembl_havana", annotation_type="exon", ) _species.add_annotations(_an) # add CDS _an2 = stdpopsim.Annotation( species=_species, id="ensembl_havana_104_CDS", description="Ensembl Havana CDS annotations on GRCh38", url=("ftp://ftp.ensembl.org/pub/release-104/"
import msprime import stdpopsim from . import genome_data ########################################################### # # Genome definition # ########################################################### # De novo assembly of the cattle reference genome with single-molecule sequencing. _RosenEtAl = stdpopsim.Citation( doi="https://doi.org/10.1093/gigascience/giaa021", year="2020", author="Rosen et al.", ) # Frequency of mosaicism points towards mutation-prone early cleavage # cell divisions in cattle. _HarlandEtAl = stdpopsim.Citation( author="Harland et al.", year="2017", # BioRxiv preprint doi="https://doi.org/10.1101/079863", ) # Cattle Sex-Specific Recombination and Genetic Control from a # Large Pedigree Analysis. _MaEtAl = stdpopsim.Citation(
class _MsprimeEngine(Engine): id = "msprime" #: description = "Msprime coalescent simulator" #: citations = [ stdpopsim.Citation( doi="https://doi.org/10.1371/journal.pcbi.1004842", year="2016", author="Kelleher et al.", reasons={stdpopsim.CiteReason.ENGINE}, ) ] # We default to the first model in the list. supported_models = ["hudson", "dtwf", "smc", "smc_prime"] model_citations = { "dtwf": [ stdpopsim.Citation( doi="https://doi.org/10.1371/journal.pgen.1008619", year="2020", author="Nelson et al.", reasons={stdpopsim.CiteReason.ENGINE}, ) ] } def simulate( self, demographic_model=None, contig=None, samples=None, seed=None, msprime_model=None, msprime_change_model=None, dry_run=False, ): """ Simulate the demographic model using msprime. See :meth:`.Engine.simulate()` for definitions of parameters defined for all engines. :param msprime_model: The msprime simulation model to be used. One of ``hudson``, ``dtwf``, ``smc``, or ``smc_prime``. See msprime API documentation for details. :type msprime_model: str :param msprime_change_model: A list of (time, model) tuples, which changes the simulation model to the new model at the time specified. :type msprime_change_model: list of (float, str) tuples :param dry_run: If True, ``end_time=0`` is passed to :meth:`msprime.simulate()` to initialise the simulation and then immediately return. :type dry_run: bool """ if msprime_model is None: msprime_model = self.supported_models[0] else: if msprime_model not in self.supported_models: raise ValueError(f"Unrecognised model '{msprime_model}'") if msprime_model in self.model_citations: self.citations.extend(self.model_citations[msprime_model]) demographic_events = demographic_model.demographic_events.copy() if msprime_change_model is not None: for t, model in msprime_change_model: if model not in self.supported_models: raise ValueError(f"Unrecognised model '{model}'") model_change = msprime.SimulationModelChange(t, model) demographic_events.append(model_change) if model in self.model_citations: self.citations.extend(self.model_citations[model]) demographic_events.sort(key=lambda x: x.time) ts = msprime.simulate( samples=samples, recombination_map=contig.recombination_map, mutation_rate=contig.mutation_rate, population_configurations=demographic_model. population_configurations, migration_matrix=demographic_model.migration_matrix, demographic_events=demographic_events, random_seed=seed, model=msprime_model, end_time=0 if dry_run else None, ) if contig.inclusion_mask is not None: ts = stdpopsim.utils.mask_tree_sequence(ts, contig.inclusion_mask, False) if contig.exclusion_mask is not None: ts = stdpopsim.utils.mask_tree_sequence(ts, contig.exclusion_mask, True) if dry_run: ts = None return ts def get_version(self): return msprime.__version__
"15": 1.3835785893339787e-08, "16": 1.4834607113882717e-08, "17": 1.582489036239487e-08, "18": 1.5075956950023575e-08, "19": 1.8220141872466202e-08, "20": 1.7178269031631664e-08, "21": 1.3045214034879191e-08, "22": 1.4445022767788226e-08, "X": 1.164662223273842e-08, "Y": 0.0, "MT": 0.0, } _genome2001 = stdpopsim.Citation( doi="http://dx.doi.org/10.1038/35057062", year=2001, author="International Human Genome Sequencing Consortium", reasons={stdpopsim.CiteReason.ASSEMBLY}, ) _hapmap2007 = stdpopsim.Citation( doi="https://doi.org/10.1038/nature06258", year=2007, author="The International HapMap Consortium", ) _takahata1993 = stdpopsim.Citation( doi="https://doi.org/10.1093/oxfordjournals.molbev.a039995", year=1993, author="Takahata", reasons={stdpopsim.CiteReason.POP_SIZE}, )
import stdpopsim _hapmap2007 = stdpopsim.Citation( doi="https://doi.org/10.1038/nature06258", year=2007, author="The International HapMap Consortium", ) _species = stdpopsim.get_species("HomSap") _gm = stdpopsim.GeneticMap( species=_species, id="HapMapII_GRCh37", description="HapMap Phase II lifted over to GRCh37", long_description=""" This genetic map is from the Phase II Hapmap project and based on 3.1 million genotyped SNPs from 270 individuals across four populations (YRI, CEU, CHB and JPT). Genome wide recombination rates were estimated using LDHat. This version of the HapMap genetic map was lifted over to GRCh37 (and adjusted in regions where the genome assembly had rearranged) for use in the 1000 Genomes project. Please see the README file on the 1000 Genomes download site for details of these adjustments. ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20110106_recombination_hotspots """, url=( "https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/" "HomSap/HapmapII_GRCh37_RecombinationHotspots.tar.gz" ), sha256="80f22d9e6cb0e497074ed1bc277e765fa9d8e22f21b2f66c3b10286520f6b68f",
import math import msprime import stdpopsim _species = stdpopsim.get_species("PonAbe") _locke2011 = stdpopsim.Citation( author="Locke et al.", year=2011, doi="http://doi.org/10.1038/nature09687" ) def _orangutan(): id = "TwoSpecies_2L11" description = "Two population orangutan model" long_description = """ The two orang-utan species, Sumatran (Pongo abelii) and Bornean (Pongo pygmaeus) inferred from the joint-site frequency spectrum with ten individuals from each population. This model is an isolation-with- migration model, with exponential growth or decay in each population after the split. The Sumatran population grows in size, while the Bornean population slightly declines. """ citations = [_locke2011.because(stdpopsim.CiteReason.DEM_MODEL)] populations = [ stdpopsim.Population("Bornean", "Pongo pygmaeus (Bornean) population"),
import msprime import stdpopsim _species = stdpopsim.get_species("DroMel") _LiAndStephan = stdpopsim.Citation( author="Li et al.", year=2006, doi="https://doi.org/10.1371/journal.pgen.0020166") # population definitions that are reused. _afr_population = stdpopsim.Population( id="AFR", description="African D. melanogaster population") _eur_population = stdpopsim.Population( id="EUR", description="European D. melanogaster population") def _afr_3epoch(): id = "African3Epoch_1S16" description = "Three epoch African population" long_description = """ The three epoch (modern, bottleneck, ancestral) model estimated for a single African Drosophila Melanogaster population from Sheehan and Song (2016). Population sizes are estimated by a deep learning model trained on simulation data. NOTE: Due to differences in coalescence units between PSMC (2N) and msms (4N) the number of generations were doubled from PSMC estimates when simulating data from msms in the original publication. We have faithfully represented the published model here. """ populations = [_afr_population]
import collections import stdpopsim from . import genome_data # De novo assembly of the cattle reference genome with single-molecule sequencing. _RosenEtAl = stdpopsim.Citation( author="Rosen et al.", year=2020, doi="https://doi.org/10.1093/gigascience/giaa021", reasons={stdpopsim.CiteReason.ASSEMBLY}, ) # Frequency of mosaicism points towards mutation-prone early cleavage # cell divisions in cattle. _HarlandEtAl = stdpopsim.Citation( author="Harland et al.", year=2017, doi="https://doi.org/10.1101/079863", reasons={stdpopsim.CiteReason.MUT_RATE}, ) # Cattle Sex-Specific Recombination and Genetic Control from a # Large Pedigree Analysis. _MaEtAl = stdpopsim.Citation( author="Ma et al.", year=2015, doi="https://doi.org/10.1371/journal.pgen.1005387", reasons={stdpopsim.CiteReason.REC_RATE}, )
class _MsprimeEngine(Engine): id = "msprime" #: description = "Msprime coalescent simulator" #: citations = [ stdpopsim.Citation( doi="https://doi.org/10.1371/journal.pcbi.1004842", year="2016", author="Kelleher et al.", reasons={stdpopsim.CiteReason.ENGINE}, ) ] # We default to the first model in the list. supported_models = ["hudson", "dtwf", "smc", "smc_prime"] model_citations = { "dtwf": [ stdpopsim.Citation( doi="https://doi.org/10.1371/journal.pgen.1008619", year="2020", author="Nelson et al.", reasons={stdpopsim.CiteReason.ENGINE}, ) ] } def simulate( self, demographic_model, contig, samples, *, seed=None, msprime_model=None, msprime_change_model=None, dry_run=False, **kwargs, ): """ Simulate the demographic model using msprime. See :meth:`.Engine.simulate()` for definitions of parameters defined for all engines. :param msprime_model: The msprime simulation model to be used. One of ``hudson``, ``dtwf``, ``smc``, or ``smc_prime``. See msprime API documentation for details. :type msprime_model: str :param msprime_change_model: A list of (time, model) tuples, which changes the simulation model to the new model at the time specified. :type msprime_change_model: list of (float, str) tuples :param dry_run: If True, ``end_time=0`` is passed to :meth:`msprime.simulate()` to initialise the simulation and then immediately return. :type dry_run: bool :param \\**kwargs: Further arguments passed to :meth:`msprime.simulate()` """ if msprime_model is None: msprime_model = self.supported_models[0] else: if msprime_model not in self.supported_models: raise ValueError(f"Unrecognised model '{msprime_model}'") if msprime_model in self.model_citations: self.citations.extend(self.model_citations[msprime_model]) if msprime_change_model is not None: msprime_model = [msprime_model] for t, model in msprime_change_model: if model not in self.supported_models: raise ValueError(f"Unrecognised model '{model}'") msprime_model.append((t, model)) if model in self.model_citations: self.citations.extend(self.model_citations[model]) if "random_seed" in kwargs.keys(): if seed is None: seed = kwargs["random_seed"] del kwargs["random_seed"] else: raise ValueError("Cannot set both seed and random_seed") # TODO: remove this after a release or two. See #745. self._warn_zigzag(demographic_model) rng = np.random.default_rng(seed) seeds = rng.integers(1, 2**31 - 1, size=2) ts = msprime.sim_ancestry( samples=samples, recombination_rate=contig.recombination_map, demography=demographic_model.model, ploidy=2, random_seed=seeds[0], model=msprime_model, end_time=0 if dry_run else None, **kwargs, ) ts = msprime.sim_mutations( ts, end_time=0 if dry_run else None, random_seed=seeds[1], rate=contig.mutation_rate, ) if contig.inclusion_mask is not None: ts = stdpopsim.utils.mask_tree_sequence(ts, contig.inclusion_mask, False) if contig.exclusion_mask is not None: ts = stdpopsim.utils.mask_tree_sequence(ts, contig.exclusion_mask, True) if dry_run: ts = None return ts def get_version(self): return msprime.__version__
def _sma_1pop(): # the size during the interval times[k] to times[k+1] = sizes[k] times = np.array( [ 699, 2796, 6068, 9894, 14370, 19606, 25730, 32894, 41275, 51077, 62544, 75958, 91648, 110001, 131471, 156584, 185960, 220324, 260520, 307540, 362541, 426879, 502139, 590173, 693151, 813610, 954517, 1119341, 1312147, 1537686, 1801500, 2110100, ] ) sizes = np.array( [ 42252426, 42252426, 60323, 72174, 40591, 21158, 21442, 39942, 78908, 111132, 110745, 96283, 87661, 83932, 83829, 91813, 111644, 143456, 181571, 217331, 241400, 246984, 238593, 228222, 217752, 198019, 165210, 121796, 121796, 73989, 73989, 73989, ] ) # MSMC is accurate from 40Kya-1.6Mya for A.thaliana (Durvasula et al 2017) # set the first 7 sizes # equal to the size at 8 (~40Kya) sizes[:8] = sizes[8] # set the last 2 entries equal # to the size at 30 (~1.6Mya) sizes[30:32] = sizes[30] demographic_events = [] for sz, t in zip(sizes, times): demographic_events.append( msprime.PopulationParametersChange(time=t, initial_size=sz, population_id=0) ) populations = [ stdpopsim.Population( id="SouthMiddleAtlas", description="Arabidopsis Thaliana South Middle Atlas population", ) ] return stdpopsim.DemographicModel( id="SouthMiddleAtlas_1D17", description="South Middle Atlas piecewise constant size", long_description=""" This model comes from MSMC using two randomly sampled homozygous individuals (Khe32 and Ifr4) from the South Middle Atlas region from the Middle Atlas Mountains in Morocco. The model is estimated with 32 time periods. Because estimates from the recent and ancient past are less accurate, we set the population size in the first 7 time periods equal to the size at the 8th time period and the size during last 2 time periods equal to the size in the 30th time period. """, populations=populations, citations=[ stdpopsim.Citation( author="Durvasula et al.", year=2017, doi="https://doi.org/10.1073/pnas.1616736114", reasons={stdpopsim.CiteReason.DEM_MODEL}, ) ], generation_time=1, demographic_events=demographic_events, population_configurations=[ msprime.PopulationConfiguration( initial_size=sizes[0], metadata=populations[0].asdict() ) ], )
"31": 1.1397713284329192e-08, "32": 1.1555927931648279e-08, "33": 1.3339402745926785e-08, "34": 1.0483812411227089e-08, "35": 1.4299102611645524e-08, "36": 1.187517782077471e-08, "37": 1.3834580623461596e-08, "38": 1.4363726512881696e-08, "X": 9.506483722244087e-09, "MT": 0, } _LindbladTohEtAl = stdpopsim.Citation( # Genome sequence, comparative analysis and haplotype structure of the # domestic dog. author="Lindblad-Toh et al.", year=2005, doi="https://doi.org/10.1038/nature04338", ) _SkoglundEtAl = stdpopsim.Citation( # Ancient wolf genome reveals an early divergence of domestic dog # ancestors and admixture into high-latitude breeds. author="Skoglund et al.", year=2015, doi="https://doi.org/10.1016/j.cub.2015.04.019", ) _FranzEtAl = stdpopsim.Citation( # Genomic and archaeological evidence suggest a dual origin of # domestic dogs.
"14": 4.70e-9, "15": 4.82e-9, "16": 6.12e-9, "17": 7.26e-9, "18": 4.57e-9, "19": 7.56e-9, "20": 5.83e-9, "21": 4.98e-9, "22": 6.03e-9, "X": 9.50e-9, "MT": 0, } _locke2011 = stdpopsim.Citation( author="Locke et al.", year=2011, doi="http://doi.org/10.1038/nature09687", reasons={stdpopsim.CiteReason.GEN_TIME, stdpopsim.CiteReason.POP_SIZE}, ) _nater2017 = stdpopsim.Citation( author="Nater et al.", year=2017, doi="https://doi.org/10.1016/j.cub.2017.09.047", reasons={stdpopsim.CiteReason.MUT_RATE, stdpopsim.CiteReason.REC_RATE}, ) _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name,
# https://www.ncbi.nlm.nih.gov/genome/?term=drosophila+melanogaster. # FIXME: add mean mutation and recombination rate data to this table. _chromosome_data = """\ chrX 23542271 chr2L 23513712 chr2R 25286936 chr3L 28110227 chr3R 32079331 chr4 1348131 chrY 3667352 chrM 19524 """ # citations _LiAndStephan = stdpopsim.Citation( author="Li et al.", year=2006, doi="https://doi.org/10.1371/journal.pgen.0020166") _SchriderEtAl = stdpopsim.Citation( author="Schrider et al.", year=2013, doi="https://doi.org/10.1534/genetics.113.151670") _chromosomes = [] for line in _chromosome_data.splitlines(): name, length = line.split()[:2] _chromosomes.append( stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=5.49e-9, # citation: _SchriderEtAl
inbred lines of D. melanogaster. This is based on the products of 5,860 female meioses from whole genome sequencing data. Recombination rates were calculated from the density of individual recombination events that were detected in crosses. This map was subsequently lifted over to the dm6 assembly. """, url=( "https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/" "DroMel/comeron2012_maps.tar.gz" ), sha256="08185a0e3b0ad26eefe69fc6bdb8f3f599a760e11e87dd343335b33d1563f62a", file_pattern="genetic_map_comeron2012_dm6_chr{id}.txt", citations=[ stdpopsim.Citation( author="Comeron et al", doi="https://doi.org/10.1371/journal.pgen.1002905", year=2012, reasons={stdpopsim.CiteReason.GEN_MAP}, ) ], ) _species.add_genetic_map(_gm) _gm = stdpopsim.GeneticMap( species=_species, id="ComeronCrossoverV2_dm6", description="Crossover map from meioses products of 8 lab crosses", long_description=""" The crossover map from a study of 8 crosses of 12 highly inbred lines of D. melanogaster. This is based on the products of 5,860 female meioses from whole genome sequencing data. Recombination rates were calculated from the density of individual
""" Genome and demographic model definitions for Escherichia coli. """ import stdpopsim ########################################################### # # Genome definition # ########################################################### _lapierre_et_al = stdpopsim.Citation( author="Lapierre et al.", year="2016", doi="https://doi.org/10.1093/molbev/msw048") _sezonov_et_al = stdpopsim.Citation( author="Sezonov et al.", year="2007", doi="https://doi.org/10.1128/JB.01368-07") _perfeito_et_al = stdpopsim.Citation( author="Perfeito et al.", year="2007", doi="https://doi.org/10.1126/science.1142284") _kibota_and_lynch = stdpopsim.Citation( author="Kibota and Lynch", year="1996", doi="https://doi.org/10.1038/381694a0")
import stdpopsim _species = stdpopsim.get_species("CanFam") _CampbellEtAl = stdpopsim.Citation( # A Pedigree-Based Map of Recombination in the Domestic Dog Genome. author="Campbell et al.", year=2016, doi="https://doi.org/10.1534/g3.116.034678", ) _gm = stdpopsim.GeneticMap( species=_species, id="Campbell2016_CanFam3_1", description="Pedigree-based crossover map from 237 individuals", long_description=""" Sex-averaged crossover frequency map based on 163,400 autosomal SNPs genotyped in a pedigree of 237 Labrador Retriever x Greyhound crosses. Genotypes were phased without respect to the pedigree, using SHAPEIT2, recombinations were called using duoHMM, and genetic distances were obtained using Haldane's map function. """, url="https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/" "CanFam/dog_genetic_maps.tar.gz", sha256="585afb424615e2fb0825d807db0b10fe1c797a6dbb804ecbb3fef5e8387d194f", file_pattern="chr{id}_average_canFam3.1.txt", citations=[_CampbellEtAl.because(stdpopsim.CiteReason.GEN_MAP)], ) _species.add_genetic_map(_gm)
class _MsprimeEngine(Engine): id = "msprime" #: description = "Msprime coalescent simulator" #: citations = [ stdpopsim.Citation( doi="https://doi.org/10.1371/journal.pcbi.1004842", year="2016", author="Kelleher et al.", reasons={stdpopsim.CiteReason.ENGINE}, ) ] # We default to the first model in the list. model_class_map = { "hudson": msprime.StandardCoalescent, "dtwf": msprime.DiscreteTimeWrightFisher, "smc": msprime.SmcApproxCoalescent, "smc_prime": msprime.SmcPrimeApproxCoalescent, } model_citations = { "dtwf": [ stdpopsim.Citation( doi="https://doi.org/10.1371/journal.pgen.1008619", year="2020", author="Nelson et al.", reasons={stdpopsim.CiteReason.ENGINE}, ) ] } @property def supported_models(self): return list(self.model_class_map.keys()) def _convert_model_spec(self, model_str, model_changes): """ Convert the specified model specification into a form suitable for sim_ancestry. The model param is a string or None. The model_changes is either None or list of (time, model_str) tuples. Also return the appropriate extra citations. """ citations = [] if model_str is None: model_str = "hudson" else: if model_str not in self.model_class_map: raise ValueError(f"Unrecognised model '{model_str}'") if model_str in self.model_citations: citations.extend(self.model_citations[model_str]) if model_changes is None: model = model_str else: model_list = [] last_t = 0 last_model = model_str for t, model in model_changes: if model not in self.supported_models: raise ValueError(f"Unrecognised model '{model}'") if model in self.model_citations: citations.extend(self.model_citations[model]) duration = t - last_t model_list.append( self.model_class_map[last_model](duration=duration)) last_model = model last_t = t model_list.append(self.model_class_map[last_model](duration=None)) model = model_list return model, citations def simulate( self, demographic_model, contig, samples, *, seed=None, msprime_model=None, msprime_change_model=None, dry_run=False, **kwargs, ): """ Simulate the demographic model using msprime. See :meth:`.Engine.simulate()` for definitions of parameters defined for all engines. :param msprime_model: The msprime simulation model to be used. One of ``hudson``, ``dtwf``, ``smc``, or ``smc_prime``. See msprime API documentation for details. :type msprime_model: str :param msprime_change_model: A list of (time, model) tuples, which changes the simulation model to the new model at the time specified. :type msprime_change_model: list of (float, str) tuples :param dry_run: If True, ``end_time=0`` is passed to :meth:`msprime.simulate()` to initialise the simulation and then immediately return. :type dry_run: bool :param \\**kwargs: Further arguments passed to :meth:`msprime.sim_ancestry()` """ model, citations = self._convert_model_spec(msprime_model, msprime_change_model) self.citations.extend(citations) if "random_seed" in kwargs.keys(): if seed is None: seed = kwargs["random_seed"] del kwargs["random_seed"] else: raise ValueError("Cannot set both seed and random_seed") # TODO: remove this after a release or two. See #745. self._warn_zigzag(demographic_model) self._warn_mutation_rate_mismatch(contig, demographic_model) rng = np.random.default_rng(seed) seeds = rng.integers(1, 2**31 - 1, size=2) ts = msprime.sim_ancestry( samples=samples, recombination_rate=contig.recombination_map, demography=demographic_model.model, ploidy=2, random_seed=seeds[0], model=model, end_time=0 if dry_run else None, **kwargs, ) ts = msprime.sim_mutations( ts, end_time=0 if dry_run else None, random_seed=seeds[1], rate=contig.mutation_rate, ) if contig.inclusion_mask is not None: ts = stdpopsim.utils.mask_tree_sequence(ts, contig.inclusion_mask, False) if contig.exclusion_mask is not None: ts = stdpopsim.utils.mask_tree_sequence(ts, contig.exclusion_mask, True) if dry_run: ts = None return ts def get_version(self): return msprime.__version__
class _SLiMEngine(stdpopsim.Engine): id = "slim" #: description = "SLiM forward-time Wright-Fisher simulator" #: citations = [ stdpopsim.Citation( doi="https://doi.org/10.1111/1755-0998.12968", year=2019, author="Haller et al.", reasons={stdpopsim.CiteReason.ENGINE}, ), ] def slim_path(self): return os.environ.get("SLIM", "slim") def get_version(self): s = subprocess.check_output([self.slim_path(), "-v"]) return s.split()[2].decode("ascii").rstrip(",") def simulate( self, demographic_model=None, contig=None, samples=None, seed=None, mutation_types=None, extended_events=None, slim_path=None, slim_script=False, slim_scaling_factor=1.0, slim_burn_in=10.0, dry_run=False, ): """ Simulate the demographic model using SLiM. See :meth:`.Engine.simulate()` for definitions of the ``demographic_model``, ``contig``, and ``samples`` parameters. :param seed: The seed for the random number generator. :type seed: int :param slim_path: The full path to the slim executable, or the name of a command in the current PATH. :type slim_path: str :param slim_script: If true, the simulation will not be executed. Instead the generated SLiM script will be printed to stdout. :type slim_script: bool :param slim_scaling_factor: Rescale model parameters by the given value, to speed up simulation. Population sizes and generation times are divided by this factor, whereas the mutation rate, recombination rate, and growth rates are multiplied by the factor. See SLiM manual: `5.5 Rescaling population sizes to improve simulation performance.` :type slim_scaling_factor: float :param slim_burn_in: Length of the burn-in phase, in units of N generations. :type slim_burn_in: float :param dry_run: If True, run the first generation setup and then end the simulation. :type dry_run: bool """ if slim_scaling_factor <= 0: raise ValueError("slim_scaling_factor must be positive") if slim_burn_in < 0: raise ValueError("slim_burn_in must be non-negative") if slim_scaling_factor != 1: warnings.warn( stdpopsim.SLiMScalingFactorWarning( f"You're using a scaling factor ({slim_scaling_factor}). " "This should give similar results for many situations, " "but is not equivalent, especially in the presence of selection. " "When using rescaling, you should be careful---do checks and " "compare results across different values of the scaling factor." )) run_slim = not slim_script # Ensure only "weighted" mutations are introduced by SLiM. mutation_rate = contig.mutation_rate slim_frac = stdpopsim.ext.slim_mutation_frac(mutation_types) contig = stdpopsim.Contig( recombination_map=contig.recombination_map, mutation_rate=slim_frac * mutation_rate, genetic_map=contig.genetic_map, inclusion_mask=contig.inclusion_mask, exclusion_mask=contig.exclusion_mask, ) mktemp = functools.partial(tempfile.NamedTemporaryFile, mode="w") @contextlib.contextmanager def script_file_f(): f = mktemp(suffix=".slim") if not slim_script else sys.stdout yield f # Don't close sys.stdout. if not slim_script: f.close() with script_file_f() as script_file, mktemp(suffix=".ts") as ts_file: recap_epoch = slim_makescript( script_file, ts_file.name, demographic_model, contig, samples, mutation_types, extended_events, slim_scaling_factor, slim_burn_in, ) script_file.flush() if not run_slim: return None self._run_slim(script_file.name, slim_path=slim_path, seed=seed, dry_run=dry_run) if dry_run: return None ts = pyslim.load(ts_file.name) ts = self._recap_and_rescale(ts, seed, recap_epoch, contig, mutation_rate, slim_frac, slim_scaling_factor) if contig.inclusion_mask is not None: ts = stdpopsim.utils.mask_tree_sequence(ts, contig.inclusion_mask, False) if contig.exclusion_mask is not None: ts = stdpopsim.utils.mask_tree_sequence(ts, contig.exclusion_mask, True) return ts def _run_slim(self, script_file, slim_path=None, seed=None, dry_run=False): """ Run SLiM. We capture the output using Popen's line-oriented text buffering (bufsize=1, universal_newlines=True) and redirect all messages to Python's logging module. By convention, messages from SLiM prefixed with "ERROR: " or "WARNING: " are treated as ERROR or WARN loglevels respectively. All other output on stdout is given the DEBUG loglevel. ERROR messages, and any output from SLiM on stderr, will raise a SLiMException here. """ if slim_path is None: slim_path = self.slim_path() slim_cmd = [slim_path] if seed is not None: slim_cmd.extend(["-s", f"{seed}"]) if dry_run: slim_cmd.extend(["-d", "dry_run=T"]) slim_cmd.append(script_file) with subprocess.Popen( slim_cmd, bufsize=1, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) as proc: for line in proc.stdout: line = line.rstrip() if line.startswith("ERROR: "): logger.error(line[len("ERROR: "):]) elif line.startswith("WARNING: "): warnings.warn( stdpopsim.UnspecifiedSLiMWarning( line[len("WARNING: "):])) else: # filter `dbg` function calls that generate output line = line.replace("dbg(self.source); ", "") logger.debug(line) stderr = proc.stderr.read() if proc.returncode != 0 or stderr: raise SLiMException( f"{slim_path} exited with code {proc.returncode}.\n{stderr}") def _simplify_remembered(self, ts): """ Remove all samples except those individuals that were explicity sampled in SLiM with sim.treeSeqRememberIndividuals(). """ nodes = itertools.chain.from_iterable( i.nodes for i in ts.individuals() if i.flags & pyslim.INDIVIDUAL_REMEMBERED) return ts.simplify(samples=list(nodes), filter_populations=False) def _recap_and_rescale( self, ts, seed, recap_epoch, contig, mutation_rate, slim_frac, slim_scaling_factor, ): """ Apply post-SLiM transformations to ``ts``. This rescales node times, does recapitation, simplification, and adds neutral mutations. """ # Node times come from SLiM generation numbers, which may have been # divided by a scaling factor for computational tractability. tables = ts.dump_tables() for table in (tables.nodes, tables.migrations): table.time *= slim_scaling_factor ts = pyslim.SlimTreeSequence.load_tables(tables) ts.slim_generation *= slim_scaling_factor rng = random.Random(seed) s1, s2 = rng.randrange(1, 2**32), rng.randrange(1, 2**32) population_configurations = [ msprime.PopulationConfiguration(initial_size=pop.start_size, growth_rate=pop.growth_rate) for pop in recap_epoch.populations ] ts = ts.recapitate( recombination_rate=contig.recombination_map. mean_recombination_rate, population_configurations=population_configurations, migration_matrix=recap_epoch.migration_matrix, random_seed=s1, ) ts = self._simplify_remembered(ts) if slim_frac < 1: # Add mutations to SLiM part of trees. rate = (1 - slim_frac) * mutation_rate ts = pyslim.SlimTreeSequence( msprime.mutate( ts, rate=rate, keep=True, random_seed=s2, end_time=ts.slim_generation, )) # Add mutations to recapitated part of trees. s3 = rng.randrange(1, 2**32) ts = pyslim.SlimTreeSequence( msprime.mutate( ts, rate=mutation_rate, keep=True, random_seed=s3, start_time=ts.slim_generation, )) return ts def recap_and_rescale( self, ts, demographic_model, contig, samples, mutation_types=None, extended_events=None, slim_scaling_factor=1.0, seed=None, **kwargs, ): """ Apply post-SLiM transformations to ``ts``. This rescales node times, does recapitation, simplification, and adds neutral mutations. If the SLiM engine was used to output a SLiM script, and the script was run outside of stdpopsim, this function can be used to transform the SLiM tree sequence following the procedure that would have been used if stdpopsim had run SLiM itself. The parameters after ``ts`` have the same meaning as for :func:`simulate`, and the values for ``demographic_model``, ``contig``, ``samples``, and ``slim_scaling_factor`` should match those that were used to generate the SLiM script with :func:`simulate`. :param ts: The tree sequence output by SLiM. :type ts: :class:`pyslim.SlimTreeSequence` .. warning:: The :func:`recap_and_rescale` function is provided in the hope that it will be useful. But as we can't anticipate what changes you'll make to the SLiM code before using it, the stdpopsim source code should be consulted to determine if the behaviour is appropriate for your case. """ # Only "weighted" mutations are introduced by SLiM. mutation_rate = contig.mutation_rate slim_frac = stdpopsim.ext.slim_mutation_frac(mutation_types) contig = stdpopsim.Contig( recombination_map=contig.recombination_map, mutation_rate=slim_frac * mutation_rate, genetic_map=contig.genetic_map, ) with open(os.devnull, "w") as script_file: recap_epoch = slim_makescript( script_file, "unused.trees", demographic_model, contig, samples, mutation_types, extended_events, slim_scaling_factor, 1, ) ts = self._recap_and_rescale(ts, seed, recap_epoch, contig, mutation_rate, slim_frac, slim_scaling_factor) return ts
chr12 136387465 5.44e-9 chr13 117095149 4.91e-9 chr14 108868599 4.70e-9 chr15 99152023 4.82e-9 chr16 77800216 6.12e-9 chr17 73212453 7.26e-9 chr18 94050890 4.57e-9 chr19 60714840 7.56e-9 chr20 62736349 5.83e-9 chr21 48394510 4.98e-9 chr22 46535552 6.03e-9 chrX 156195299 9.50e-9 """ _locke2011 = stdpopsim.Citation(author="Locke et al.", year=2011, doi="http://doi.org/10.1038/nature09687") _nater2017 = stdpopsim.Citation( author="Nater et al.", year=2017, doi="https://doi.org/10.1016/j.cub.2017.09.047") _chromosomes = [] for line in _chromosome_data.splitlines(): name, length, mean_rr = line.split()[:3] _chromosomes.append( stdpopsim.Chromosome(id=name, length=int(length), mutation_rate=1.5e-8, recombination_rate=float(mean_rr)))
import stdpopsim from . import genome_data # These are in Table 1 of Juneja et al: _recombination_rate = {"1": 0.306, "2": 0.249, "3": 0.291, "MT": 0} _JunejaEtAl = stdpopsim.Citation( doi="https://doi.org/10.1371/journal.pntd.0002652", year=2014, author="Juneja et al.", reasons={stdpopsim.CiteReason.REC_RATE}, ) _CrawfordEtAl = stdpopsim.Citation( doi="https://doi.org/10.1186/s12915-017-0351-0", year=2017, author="Crawford et al.", reasons={ stdpopsim.CiteReason.GEN_TIME, stdpopsim.CiteReason.POP_SIZE, stdpopsim.CiteReason.MUT_RATE, }, ) _KeightleyEtAl = stdpopsim.Citation( doi="https://doi.org/10.1101/gr.091231.109", year=2009, author="Keightley et al.", reasons={
def test_get_bibtex_bad_connection(self): # Tests an invalid URL # Asserts that it raises a value error. citation = stdpopsim.Citation(doi='DOI', author="Authors", year="2000") with self.assertRaises(ValueError): citation.fetch_bibtex()
"13": 7.56e-10, "14": 8.96e-10, "15": 6.91e-10, "16": 9.59e-10, "17": 1.05e-9, } _genome = stdpopsim.Genome.from_data( genome_data.data, recombination_rate=_recombination_rate, mutation_rate=_mutation_rate, citations=[ stdpopsim.Citation( author="Merchant et al", year=2007, doi="https://doi.org/10.1126/science.1143609", reasons={stdpopsim.CiteReason.ASSEMBLY }, # v5 - v6 assembly still en route! ), stdpopsim.Citation( author="Hasan and Ness", year=2020, doi="https://doi.org/10.6084/m9.figshare.14608239.v1", reasons={stdpopsim.CiteReason.REC_RATE}, ), stdpopsim.Citation( author="Ness et al", year=2015, doi="https://doi.org/10.6084/m9.figshare.14700156.v1", reasons={stdpopsim.CiteReason.MUT_RATE}, ),
"13": 4.91e-9, "14": 4.70e-9, "15": 4.82e-9, "16": 6.12e-9, "17": 7.26e-9, "18": 4.57e-9, "19": 7.56e-9, "20": 5.83e-9, "21": 4.98e-9, "22": 6.03e-9, "X": 9.50e-9, "MT": 0, } _locke2011 = stdpopsim.Citation(author="Locke et al.", year=2011, doi="http://doi.org/10.1038/nature09687") _nater2017 = stdpopsim.Citation( author="Nater et al.", year=2017, doi="https://doi.org/10.1016/j.cub.2017.09.047") _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Nater et al. 2017 used mu=1.5e-8 per generation, based on the
import stdpopsim from . import genome_data _LovernEtAl = stdpopsim.Citation( doi="https://doi.org/10.1093/ilar.45.1.54", year=2004, author="Lovern et al.", reasons={stdpopsim.CiteReason.GEN_TIME}, ) _BourgeoisEtAl = stdpopsim.Citation( doi="https://doi.org/10.1093/gbe/evz110", year=2019, author="Pombi et al.", reasons={ stdpopsim.CiteReason.POP_SIZE, stdpopsim.CiteReason.MUT_RATE, stdpopsim.CiteReason.REC_RATE, }, ) # No recombination rate yet for this species. # Author of BourgeoisEtAl is sending the recombination map # Placeholder rate of 1cM/Mb used _recombo_rate = 1e-8 _recombination_rate = { "1": _recombo_rate, "2": _recombo_rate, "3": _recombo_rate,
def hominin_composite(): id = "HomininComposite_4G20" description = "Four population out of Africa with Neandertal admixture" long_description = """ A composite of demographic parameters from multiple sources """ # samples: # T_Altai = 115e3 # T_Vindija = 55e3 # n_YRI = 108 # n_CEU = 99 populations = [ stdpopsim.Population(id="YRI", description="1000 Genomes YRI (Yorubans)"), stdpopsim.Population( id="CEU", description=( "1000 Genomes CEU (Utah Residents (CEPH) with Northern and " "Western European Ancestry" ), ), stdpopsim.Population(id="Nea", description="Neandertal lineage"), stdpopsim.Population( id="Anc", description="Ancestral hominins", sampling_time=None ), ] pop = {p.id: i for i, p in enumerate(populations)} citations = [ stdpopsim.Citation( author="Kuhlwilm et al.", year=2016, doi="https://doi.org/10.1038/nature16544", ), stdpopsim.Citation( author="Prüfer et al.", year=2017, doi="https://doi.org/10.1126/science.aao1887", ), stdpopsim.Citation( author="Ragsdale and Gravel", year=2019, doi="https://doi.org/10.1371/journal.pgen.1008204", ), ] generation_time = 29 # Kuhlwilm et al. 2016 N_YRI = 27000 N_Nea = 3400 N_Anc = 18500 # Ragsdale & Gravel 2019 N_CEU0 = 1450 r_CEU = 0.00202 T_CEU_exp = 31.9e3 / generation_time N_CEU = N_CEU0 * math.exp(r_CEU * T_CEU_exp) T_YRI_CEU_split = 65.7e3 / generation_time N_ooa_bottleneck = 1080 # Prüfer et al. 2017 T_Nea_human_split = 550e3 / generation_time T_Nea_CEU_mig = 55e3 / generation_time m_Nea_CEU = 0.0225 pop_meta = (p.asdict() for p in populations) population_configurations = [ msprime.PopulationConfiguration(initial_size=N_YRI, metadata=next(pop_meta)), msprime.PopulationConfiguration( initial_size=N_CEU, growth_rate=r_CEU, metadata=next(pop_meta) ), msprime.PopulationConfiguration(initial_size=N_Nea, metadata=next(pop_meta)), msprime.PopulationConfiguration(initial_size=N_Anc, metadata=next(pop_meta)), ] demographic_events = [ # out-of-Africa bottleneck msprime.PopulationParametersChange( time=T_CEU_exp, initial_size=N_ooa_bottleneck, growth_rate=0, population_id=pop["CEU"], ), # Neandertal -> CEU admixture msprime.MassMigration( time=T_Nea_CEU_mig, proportion=m_Nea_CEU, source=pop["CEU"], destination=pop["Nea"], ), # population splits msprime.MassMigration( time=T_YRI_CEU_split, source=pop["CEU"], destination=pop["Anc"] ), msprime.MassMigration( time=T_YRI_CEU_split, source=pop["YRI"], destination=pop["Anc"] ), msprime.MassMigration( time=T_Nea_human_split, source=pop["Nea"], destination=pop["Anc"] ), ] return stdpopsim.DemographicModel( id=id, description=description, long_description=long_description, populations=populations, citations=citations, generation_time=generation_time, population_configurations=population_configurations, demographic_events=demographic_events, )
""" Genome and demographic model definitions for Escherichia coli. """ import stdpopsim from . import genome_data ########################################################### # # Genome definition # ########################################################### _hartl_et_al = stdpopsim.Citation( author="Hartl, Moriyama, and Sawyer", year="1994", # doesn't have a doi doi="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1206133/", ) _sezonov_et_al = stdpopsim.Citation(author="Sezonov et al.", year="2007", doi="https://doi.org/10.1128/JB.01368-07") _wielgoss_et_al = stdpopsim.Citation( author="Wielgoss et al.", year="2011", doi="https://doi.org/10.1534/g3.111.000406") _blattner_et_al = stdpopsim.Citation(author="Blattner et al.", year="1997", doi="10.1126/science.277.5331.1453")