Пример #1
0
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            mutation_rate=7e-9,
            recombination_rate=_recombination_rate_data[name],
        )
    )

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    mutation_rate_citations=[
        stdpopsim.Citation(
            author="Ossowski et al.",
            year="2010",
            doi="https://doi.org/10.1126/science.1180677",
            reasons={stdpopsim.CiteReason.MUT_RATE},
        )
    ],
    recombination_rate_citations=[
        stdpopsim.Citation(
            author="Huber et al.",
            year="2014",
            doi="https://doi.org/10.1093/molbev/msu247",
            reasons={stdpopsim.CiteReason.REC_RATE},
        )
    ],
    assembly_citations=[
        stdpopsim.Citation(
            doi="https://doi.org/10.1093/nar/gkm965",
            year="2007",
Пример #2
0
class _SLiMEngine(stdpopsim.Engine):
    id = "slim"  #:
    description = "SLiM forward-time Wright-Fisher simulator"  #:
    citations = [
            stdpopsim.Citation(
                doi="https://doi.org/10.1111/1755-0998.12968",
                year=2019,
                author="Haller et al.",
                reasons={stdpopsim.CiteReason.ENGINE}),
            ]

    def slim_path(self):
        return os.environ.get("SLIM", "slim")

    def get_version(self):
        s = subprocess.check_output([self.slim_path(), "-v"])
        return s.split()[2].decode("ascii").rstrip(",")

    def simulate(
            self, demographic_model=None, contig=None, samples=None, seed=None,
            verbosity=0, slim_path=None, slim_script=False, slim_scaling_factor=10,
            slim_no_recapitation=False, slim_no_burnin=False, **kwargs):
        """
        Simulate the demographic model using SLiM.
        See :meth:`.Engine.simulate()` for definitions of the
        ``demographic_model``, ``contig``, and ``samples`` parameters.

        :param seed: The seed for the random number generator.
        :type seed: int
        :param slim_path: The full path to the slim executable, or the name of
            a command in the current PATH.
        :type slim_path: str
        :param slim_script: If true, the simulation will not be executed.
            Instead the generated SLiM script will be printed to stdout.
        :type slim_script: bool
        :param slim_scaling_factor: Rescale model parameters by the given value,
            to speed up simulation. Population sizes and generation times are
            divided by this factor, whereas the mutation rate, recombination
            rate, and growth rates are multiplied by the factor.
            See SLiM manual: `5.5 Rescaling population sizes to improve
            simulation performance.`
        :type slim_scaling_factor: float
        :param slim_no_recapitation: Do an explicit burn in, and add
            mutations, within the SLiM simulation. This may be much slower than
            the defaults (recapitation and neutral mutation overlay with
            msprime). The burn in behaviour is to wait until all individuals in
            the ancestral populations have a common ancestor within their
            respective population, and then wait another 10*N generations.
        :type slim_no_recapitation: bool
        :param slim_no_burnin: Do not perform a burn in at the start of the
            simulation.  This option is only relevant when
            ``slim_no_recapitation=True``.
        :type slim_no_burnin: bool
        """

        run_slim = not slim_script
        do_recap = not slim_no_recapitation
        check_coalescence = slim_no_recapitation and not slim_no_burnin

        if slim_path is None:
            slim_path = self.slim_path()

        if do_recap:
            mutation_rate = contig.mutation_rate
            # Ensure no mutations are introduced by SLiM.
            contig = stdpopsim.Contig(
                    recombination_map=contig.recombination_map,
                    mutation_rate=0,
                    genetic_map=contig.genetic_map)

        slim_cmd = [slim_path]
        if seed is not None:
            slim_cmd.extend(["-s", f"{seed}"])

        mktemp = functools.partial(tempfile.NamedTemporaryFile, mode="w")

        @contextlib.contextmanager
        def script_file_f():
            f = mktemp(suffix=".slim") if not slim_script else sys.stdout
            yield f
            # Don't close sys.stdout.
            if not slim_script:
                f.close()

        with script_file_f() as script_file, mktemp(suffix=".ts") as ts_file:

            recap_epoch = slim_makescript(
                    script_file, ts_file.name,
                    demographic_model, contig, samples,
                    slim_scaling_factor, check_coalescence, verbosity)

            script_file.flush()

            if not run_slim:
                return None

            slim_cmd.append(script_file.name)
            stdout = subprocess.DEVNULL if verbosity == 0 else None
            subprocess.check_call(slim_cmd, stdout=stdout)

            ts = pyslim.load(ts_file.name)

        # Node times come from SLiM generation numbers, which may have been
        # divided by a scaling factor for computational tractibility.
        tables = ts.dump_tables()
        for table in (tables.nodes, tables.migrations):
            table.time *= slim_scaling_factor
        ts = pyslim.SlimTreeSequence.load_tables(tables)
        ts.slim_generation *= slim_scaling_factor

        if do_recap:
            rng = random.Random(seed)
            s1, s2 = rng.randrange(1, 2**32), rng.randrange(1, 2**32)

            population_configurations = [
                    msprime.PopulationConfiguration(
                        initial_size=pop.start_size,
                        growth_rate=pop.growth_rate)
                    for pop in recap_epoch.populations]
            ts = ts.recapitate(
                    recombination_rate=contig.recombination_map.mean_recombination_rate,
                    population_configurations=population_configurations,
                    migration_matrix=recap_epoch.migration_matrix,
                    random_seed=s1)

        ts = simplify_remembered(ts)

        if do_recap:
            # Add neutral mutations.
            ts = pyslim.SlimTreeSequence(msprime.mutate(
                ts, rate=mutation_rate, keep=True, random_seed=s2))

        return ts
Пример #3
0
import collections

import msprime

import stdpopsim
from . import genome_data

###########################################################
#
# Genome definition
#
###########################################################

# citations
_LiAndStephan = stdpopsim.Citation(
    author="Li et al.",
    year=2006,
    doi="https://doi.org/10.1371/journal.pgen.0020166")

_SchriderEtAl = stdpopsim.Citation(
    author="Schrider et al.",
    year=2013,
    doi="https://doi.org/10.1534/genetics.113.151670")

_DosSantosEtAl = stdpopsim.Citation(doi="https://doi.org/10.1093/nar/gku1099",
                                    year="2015",
                                    author="dos Santos et al.",
                                    reasons={stdpopsim.CiteReason.ASSEMBLY})

_genome_wide_estimate = 8.4e-9  # WRONG, underestimate used in S&S!

_recombination_rate_data = collections.defaultdict(
Пример #4
0
import stdpopsim
from . import genome_data


_LiAndStephan = stdpopsim.Citation(
    author="Li et al.",
    year=2006,
    doi="https://doi.org/10.1371/journal.pgen.0020166",
    reasons={stdpopsim.CiteReason.GEN_TIME, stdpopsim.CiteReason.POP_SIZE},
)

_SchriderEtAl = stdpopsim.Citation(
    author="Schrider et al.",
    year=2013,
    doi="https://doi.org/10.1534/genetics.113.151670",
)

_DosSantosEtAl = stdpopsim.Citation(
    doi="https://doi.org/10.1093/nar/gku1099",
    year=2015,
    author="dos Santos et al.",
    reasons={stdpopsim.CiteReason.ASSEMBLY},
)

_HoskinsEtAl = stdpopsim.Citation(
    doi="https://doi.org/10.1101/gr.185579.114",
    year=2015,
    author="Hoskins et al.",
    reasons={stdpopsim.CiteReason.ASSEMBLY},
)
Пример #5
0
_an = stdpopsim.Annotation(
    species=_species,
    id="ensembl_havana_104_exons",
    description="Ensembl Havana exon annotations on GRCh38",
    url=("ftp://ftp.ensembl.org/pub/release-104/"
         "gff3/homo_sapiens/Homo_sapiens.GRCh38.104.gff3.gz"),
    gff_sha256=
    "313ad46bd4af78b45b9f5d8407bbcbd3f87f4be0747060e84b3b5eb931530ec1",
    intervals_url=("https://stdpopsim.s3-us-west-2.amazonaws.com/"
                   "annotations/HomSap/ensembl_havana_104_exons.tar.gz"),
    intervals_sha256=
    "5c356d092b31fa40bfce434994de276e9040ed9a80fc047a5e3b94410157f1cf",
    citations=[
        stdpopsim.Citation(
            year=2018,
            author="Hunt et al",
            doi="https://doi.org/10.1093/database/bay119",
            reasons={stdpopsim.CiteReason.ANNOTATION},
        )
    ],
    file_pattern="ensembl_havana_exons_{id}.txt",
    annotation_source="ensembl_havana",
    annotation_type="exon",
)
_species.add_annotations(_an)

# add CDS
_an2 = stdpopsim.Annotation(
    species=_species,
    id="ensembl_havana_104_CDS",
    description="Ensembl Havana CDS annotations on GRCh38",
    url=("ftp://ftp.ensembl.org/pub/release-104/"
Пример #6
0
import msprime

import stdpopsim
from . import genome_data

###########################################################
#
# Genome definition
#
###########################################################

# De novo assembly of the cattle reference genome with single-molecule sequencing.
_RosenEtAl = stdpopsim.Citation(
    doi="https://doi.org/10.1093/gigascience/giaa021",
    year="2020",
    author="Rosen et al.",
)

# Frequency of mosaicism points towards mutation-prone early cleavage
# cell divisions in cattle.
_HarlandEtAl = stdpopsim.Citation(
    author="Harland et al.",
    year="2017",
    # BioRxiv preprint
    doi="https://doi.org/10.1101/079863",
)

# Cattle Sex-Specific Recombination and Genetic Control from a
# Large Pedigree Analysis.
_MaEtAl = stdpopsim.Citation(
Пример #7
0
class _MsprimeEngine(Engine):
    id = "msprime"  #:
    description = "Msprime coalescent simulator"  #:
    citations = [
        stdpopsim.Citation(
            doi="https://doi.org/10.1371/journal.pcbi.1004842",
            year="2016",
            author="Kelleher et al.",
            reasons={stdpopsim.CiteReason.ENGINE},
        )
    ]
    # We default to the first model in the list.
    supported_models = ["hudson", "dtwf", "smc", "smc_prime"]
    model_citations = {
        "dtwf": [
            stdpopsim.Citation(
                doi="https://doi.org/10.1371/journal.pgen.1008619",
                year="2020",
                author="Nelson et al.",
                reasons={stdpopsim.CiteReason.ENGINE},
            )
        ]
    }

    def simulate(
        self,
        demographic_model=None,
        contig=None,
        samples=None,
        seed=None,
        msprime_model=None,
        msprime_change_model=None,
        dry_run=False,
    ):
        """
        Simulate the demographic model using msprime.
        See :meth:`.Engine.simulate()` for definitions of parameters defined
        for all engines.

        :param msprime_model: The msprime simulation model to be used.
            One of ``hudson``, ``dtwf``, ``smc``, or ``smc_prime``.
            See msprime API documentation for details.
        :type msprime_model: str
        :param msprime_change_model: A list of (time, model) tuples, which
            changes the simulation model to the new model at the time specified.
        :type msprime_change_model: list of (float, str) tuples
        :param dry_run: If True, ``end_time=0`` is passed to :meth:`msprime.simulate()`
            to initialise the simulation and then immediately return.
        :type dry_run: bool
        """
        if msprime_model is None:
            msprime_model = self.supported_models[0]
        else:
            if msprime_model not in self.supported_models:
                raise ValueError(f"Unrecognised model '{msprime_model}'")
            if msprime_model in self.model_citations:
                self.citations.extend(self.model_citations[msprime_model])

        demographic_events = demographic_model.demographic_events.copy()
        if msprime_change_model is not None:
            for t, model in msprime_change_model:
                if model not in self.supported_models:
                    raise ValueError(f"Unrecognised model '{model}'")
                model_change = msprime.SimulationModelChange(t, model)
                demographic_events.append(model_change)
                if model in self.model_citations:
                    self.citations.extend(self.model_citations[model])
            demographic_events.sort(key=lambda x: x.time)

        ts = msprime.simulate(
            samples=samples,
            recombination_map=contig.recombination_map,
            mutation_rate=contig.mutation_rate,
            population_configurations=demographic_model.
            population_configurations,
            migration_matrix=demographic_model.migration_matrix,
            demographic_events=demographic_events,
            random_seed=seed,
            model=msprime_model,
            end_time=0 if dry_run else None,
        )

        if contig.inclusion_mask is not None:
            ts = stdpopsim.utils.mask_tree_sequence(ts, contig.inclusion_mask,
                                                    False)
        if contig.exclusion_mask is not None:
            ts = stdpopsim.utils.mask_tree_sequence(ts, contig.exclusion_mask,
                                                    True)

        if dry_run:
            ts = None
        return ts

    def get_version(self):
        return msprime.__version__
Пример #8
0
    "15": 1.3835785893339787e-08,
    "16": 1.4834607113882717e-08,
    "17": 1.582489036239487e-08,
    "18": 1.5075956950023575e-08,
    "19": 1.8220141872466202e-08,
    "20": 1.7178269031631664e-08,
    "21": 1.3045214034879191e-08,
    "22": 1.4445022767788226e-08,
    "X": 1.164662223273842e-08,
    "Y": 0.0,
    "MT": 0.0,
}

_genome2001 = stdpopsim.Citation(
    doi="http://dx.doi.org/10.1038/35057062",
    year=2001,
    author="International Human Genome Sequencing Consortium",
    reasons={stdpopsim.CiteReason.ASSEMBLY},
)

_hapmap2007 = stdpopsim.Citation(
    doi="https://doi.org/10.1038/nature06258",
    year=2007,
    author="The International HapMap Consortium",
)

_takahata1993 = stdpopsim.Citation(
    doi="https://doi.org/10.1093/oxfordjournals.molbev.a039995",
    year=1993,
    author="Takahata",
    reasons={stdpopsim.CiteReason.POP_SIZE},
)
Пример #9
0
import stdpopsim


_hapmap2007 = stdpopsim.Citation(
    doi="https://doi.org/10.1038/nature06258",
    year=2007,
    author="The International HapMap Consortium",
)

_species = stdpopsim.get_species("HomSap")

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="HapMapII_GRCh37",
    description="HapMap Phase II lifted over to GRCh37",
    long_description="""
        This genetic map is from the Phase II Hapmap project
        and based on 3.1 million genotyped SNPs
        from 270 individuals across four populations (YRI, CEU, CHB and JPT).
        Genome wide recombination rates were estimated using LDHat.
        This version of the HapMap genetic map was lifted over to GRCh37
        (and adjusted in regions where the genome assembly had rearranged)
        for use in the 1000 Genomes project. Please see the README file on
        the 1000 Genomes download site for details of these adjustments.
        ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20110106_recombination_hotspots
        """,
    url=(
        "https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/"
        "HomSap/HapmapII_GRCh37_RecombinationHotspots.tar.gz"
    ),
    sha256="80f22d9e6cb0e497074ed1bc277e765fa9d8e22f21b2f66c3b10286520f6b68f",
Пример #10
0
import math

import msprime

import stdpopsim


_species = stdpopsim.get_species("PonAbe")


_locke2011 = stdpopsim.Citation(
    author="Locke et al.", year=2011, doi="http://doi.org/10.1038/nature09687"
)


def _orangutan():
    id = "TwoSpecies_2L11"
    description = "Two population orangutan model"
    long_description = """
        The two orang-utan species, Sumatran (Pongo abelii) and Bornean (Pongo
        pygmaeus) inferred from the joint-site frequency spectrum with ten
        individuals from each population. This model is an isolation-with-
        migration model, with exponential growth or decay in each population
        after the split. The Sumatran population grows in size, while the
        Bornean population slightly declines.
    """

    citations = [_locke2011.because(stdpopsim.CiteReason.DEM_MODEL)]

    populations = [
        stdpopsim.Population("Bornean", "Pongo pygmaeus (Bornean) population"),
Пример #11
0
import msprime

import stdpopsim

_species = stdpopsim.get_species("DroMel")

_LiAndStephan = stdpopsim.Citation(
    author="Li et al.",
    year=2006,
    doi="https://doi.org/10.1371/journal.pgen.0020166")

# population definitions that are reused.
_afr_population = stdpopsim.Population(
    id="AFR", description="African D. melanogaster population")
_eur_population = stdpopsim.Population(
    id="EUR", description="European D. melanogaster population")


def _afr_3epoch():
    id = "African3Epoch_1S16"
    description = "Three epoch African population"
    long_description = """
        The three epoch (modern, bottleneck, ancestral) model estimated for a
        single African Drosophila Melanogaster population from Sheehan and Song (2016).
        Population sizes are estimated by a
        deep learning model trained on simulation data. NOTE: Due to differences in
        coalescence units between PSMC (2N) and msms (4N) the number of generations were
        doubled from PSMC estimates when simulating data from msms in the original
        publication. We have faithfully represented the published model here.
    """
    populations = [_afr_population]
Пример #12
0
import collections
import stdpopsim

from . import genome_data

# De novo assembly of the cattle reference genome with single-molecule sequencing.
_RosenEtAl = stdpopsim.Citation(
    author="Rosen et al.",
    year=2020,
    doi="https://doi.org/10.1093/gigascience/giaa021",
    reasons={stdpopsim.CiteReason.ASSEMBLY},
)

# Frequency of mosaicism points towards mutation-prone early cleavage
# cell divisions in cattle.
_HarlandEtAl = stdpopsim.Citation(
    author="Harland et al.",
    year=2017,
    doi="https://doi.org/10.1101/079863",
    reasons={stdpopsim.CiteReason.MUT_RATE},
)

# Cattle Sex-Specific Recombination and Genetic Control from a
# Large Pedigree Analysis.
_MaEtAl = stdpopsim.Citation(
    author="Ma et al.",
    year=2015,
    doi="https://doi.org/10.1371/journal.pgen.1005387",
    reasons={stdpopsim.CiteReason.REC_RATE},
)
Пример #13
0
class _MsprimeEngine(Engine):
    id = "msprime"  #:
    description = "Msprime coalescent simulator"  #:
    citations = [
        stdpopsim.Citation(
            doi="https://doi.org/10.1371/journal.pcbi.1004842",
            year="2016",
            author="Kelleher et al.",
            reasons={stdpopsim.CiteReason.ENGINE},
        )
    ]
    # We default to the first model in the list.
    supported_models = ["hudson", "dtwf", "smc", "smc_prime"]
    model_citations = {
        "dtwf": [
            stdpopsim.Citation(
                doi="https://doi.org/10.1371/journal.pgen.1008619",
                year="2020",
                author="Nelson et al.",
                reasons={stdpopsim.CiteReason.ENGINE},
            )
        ]
    }

    def simulate(
        self,
        demographic_model,
        contig,
        samples,
        *,
        seed=None,
        msprime_model=None,
        msprime_change_model=None,
        dry_run=False,
        **kwargs,
    ):
        """
        Simulate the demographic model using msprime.
        See :meth:`.Engine.simulate()` for definitions of parameters defined
        for all engines.

        :param msprime_model: The msprime simulation model to be used.
            One of ``hudson``, ``dtwf``, ``smc``, or ``smc_prime``.
            See msprime API documentation for details.
        :type msprime_model: str
        :param msprime_change_model: A list of (time, model) tuples, which
            changes the simulation model to the new model at the time specified.
        :type msprime_change_model: list of (float, str) tuples
        :param dry_run: If True, ``end_time=0`` is passed to :meth:`msprime.simulate()`
            to initialise the simulation and then immediately return.
        :type dry_run: bool
        :param \\**kwargs: Further arguments passed to :meth:`msprime.simulate()`
        """
        if msprime_model is None:
            msprime_model = self.supported_models[0]
        else:
            if msprime_model not in self.supported_models:
                raise ValueError(f"Unrecognised model '{msprime_model}'")
            if msprime_model in self.model_citations:
                self.citations.extend(self.model_citations[msprime_model])

        if msprime_change_model is not None:
            msprime_model = [msprime_model]
            for t, model in msprime_change_model:
                if model not in self.supported_models:
                    raise ValueError(f"Unrecognised model '{model}'")
                msprime_model.append((t, model))
                if model in self.model_citations:
                    self.citations.extend(self.model_citations[model])

        if "random_seed" in kwargs.keys():
            if seed is None:
                seed = kwargs["random_seed"]
                del kwargs["random_seed"]
            else:
                raise ValueError("Cannot set both seed and random_seed")

        # TODO: remove this after a release or two. See #745.
        self._warn_zigzag(demographic_model)

        rng = np.random.default_rng(seed)
        seeds = rng.integers(1, 2**31 - 1, size=2)

        ts = msprime.sim_ancestry(
            samples=samples,
            recombination_rate=contig.recombination_map,
            demography=demographic_model.model,
            ploidy=2,
            random_seed=seeds[0],
            model=msprime_model,
            end_time=0 if dry_run else None,
            **kwargs,
        )
        ts = msprime.sim_mutations(
            ts,
            end_time=0 if dry_run else None,
            random_seed=seeds[1],
            rate=contig.mutation_rate,
        )

        if contig.inclusion_mask is not None:
            ts = stdpopsim.utils.mask_tree_sequence(ts, contig.inclusion_mask,
                                                    False)
        if contig.exclusion_mask is not None:
            ts = stdpopsim.utils.mask_tree_sequence(ts, contig.exclusion_mask,
                                                    True)

        if dry_run:
            ts = None
        return ts

    def get_version(self):
        return msprime.__version__
Пример #14
0
def _sma_1pop():
    # the size during the interval times[k] to times[k+1] = sizes[k]
    times = np.array(
        [
            699,
            2796,
            6068,
            9894,
            14370,
            19606,
            25730,
            32894,
            41275,
            51077,
            62544,
            75958,
            91648,
            110001,
            131471,
            156584,
            185960,
            220324,
            260520,
            307540,
            362541,
            426879,
            502139,
            590173,
            693151,
            813610,
            954517,
            1119341,
            1312147,
            1537686,
            1801500,
            2110100,
        ]
    )
    sizes = np.array(
        [
            42252426,
            42252426,
            60323,
            72174,
            40591,
            21158,
            21442,
            39942,
            78908,
            111132,
            110745,
            96283,
            87661,
            83932,
            83829,
            91813,
            111644,
            143456,
            181571,
            217331,
            241400,
            246984,
            238593,
            228222,
            217752,
            198019,
            165210,
            121796,
            121796,
            73989,
            73989,
            73989,
        ]
    )

    # MSMC is accurate from 40Kya-1.6Mya for A.thaliana (Durvasula et al 2017)
    # set the first 7 sizes
    # equal to the size at 8 (~40Kya)
    sizes[:8] = sizes[8]
    # set the last 2 entries equal
    # to the size at 30 (~1.6Mya)
    sizes[30:32] = sizes[30]

    demographic_events = []
    for sz, t in zip(sizes, times):
        demographic_events.append(
            msprime.PopulationParametersChange(time=t, initial_size=sz, population_id=0)
        )

    populations = [
        stdpopsim.Population(
            id="SouthMiddleAtlas",
            description="Arabidopsis Thaliana South Middle Atlas population",
        )
    ]

    return stdpopsim.DemographicModel(
        id="SouthMiddleAtlas_1D17",
        description="South Middle Atlas piecewise constant size",
        long_description="""
            This model comes from MSMC using two randomly sampled homozygous
            individuals (Khe32 and Ifr4) from the South Middle Atlas region
            from the Middle Atlas Mountains in Morocco. The model is estimated
            with 32 time periods. Because estimates from the recent and ancient
            past are less accurate, we set the population size in the first 7
            time periods equal to the size at the 8th time period and the size
            during last 2 time periods equal to the size in the 30th time
            period.
        """,
        populations=populations,
        citations=[
            stdpopsim.Citation(
                author="Durvasula et al.",
                year=2017,
                doi="https://doi.org/10.1073/pnas.1616736114",
                reasons={stdpopsim.CiteReason.DEM_MODEL},
            )
        ],
        generation_time=1,
        demographic_events=demographic_events,
        population_configurations=[
            msprime.PopulationConfiguration(
                initial_size=sizes[0], metadata=populations[0].asdict()
            )
        ],
    )
Пример #15
0
    "31": 1.1397713284329192e-08,
    "32": 1.1555927931648279e-08,
    "33": 1.3339402745926785e-08,
    "34": 1.0483812411227089e-08,
    "35": 1.4299102611645524e-08,
    "36": 1.187517782077471e-08,
    "37": 1.3834580623461596e-08,
    "38": 1.4363726512881696e-08,
    "X": 9.506483722244087e-09,
    "MT": 0,
}

_LindbladTohEtAl = stdpopsim.Citation(
    # Genome sequence, comparative analysis and haplotype structure of the
    # domestic dog.
    author="Lindblad-Toh et al.",
    year=2005,
    doi="https://doi.org/10.1038/nature04338",
)

_SkoglundEtAl = stdpopsim.Citation(
    # Ancient wolf genome reveals an early divergence of domestic dog
    # ancestors and admixture into high-latitude breeds.
    author="Skoglund et al.",
    year=2015,
    doi="https://doi.org/10.1016/j.cub.2015.04.019",
)

_FranzEtAl = stdpopsim.Citation(
    # Genomic and archaeological evidence suggest a dual origin of
    # domestic dogs.
Пример #16
0
    "14": 4.70e-9,
    "15": 4.82e-9,
    "16": 6.12e-9,
    "17": 7.26e-9,
    "18": 4.57e-9,
    "19": 7.56e-9,
    "20": 5.83e-9,
    "21": 4.98e-9,
    "22": 6.03e-9,
    "X": 9.50e-9,
    "MT": 0,
}

_locke2011 = stdpopsim.Citation(
    author="Locke et al.",
    year=2011,
    doi="http://doi.org/10.1038/nature09687",
    reasons={stdpopsim.CiteReason.GEN_TIME, stdpopsim.CiteReason.POP_SIZE},
)

_nater2017 = stdpopsim.Citation(
    author="Nater et al.",
    year=2017,
    doi="https://doi.org/10.1016/j.cub.2017.09.047",
    reasons={stdpopsim.CiteReason.MUT_RATE, stdpopsim.CiteReason.REC_RATE},
)

_chromosomes = []
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
# https://www.ncbi.nlm.nih.gov/genome/?term=drosophila+melanogaster.
# FIXME: add mean mutation and recombination rate data to this table.
_chromosome_data = """\
chrX   23542271
chr2L   23513712
chr2R   25286936
chr3L   28110227
chr3R   32079331
chr4   1348131
chrY   3667352
chrM   19524
"""

# citations
_LiAndStephan = stdpopsim.Citation(
    author="Li et al.",
    year=2006,
    doi="https://doi.org/10.1371/journal.pgen.0020166")

_SchriderEtAl = stdpopsim.Citation(
    author="Schrider et al.",
    year=2013,
    doi="https://doi.org/10.1534/genetics.113.151670")

_chromosomes = []
for line in _chromosome_data.splitlines():
    name, length = line.split()[:2]
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=int(length),
            mutation_rate=5.49e-9,  # citation: _SchriderEtAl
Пример #18
0
        inbred lines of D. melanogaster. This is based on the
        products of 5,860 female meioses from whole genome sequencing data.
        Recombination rates were calculated from the density of individual
        recombination events that were detected in crosses. This map was
        subsequently lifted over to the dm6 assembly.
        """,
    url=(
        "https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/"
        "DroMel/comeron2012_maps.tar.gz"
    ),
    sha256="08185a0e3b0ad26eefe69fc6bdb8f3f599a760e11e87dd343335b33d1563f62a",
    file_pattern="genetic_map_comeron2012_dm6_chr{id}.txt",
    citations=[
        stdpopsim.Citation(
            author="Comeron et al",
            doi="https://doi.org/10.1371/journal.pgen.1002905",
            year=2012,
            reasons={stdpopsim.CiteReason.GEN_MAP},
        )
    ],
)
_species.add_genetic_map(_gm)

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="ComeronCrossoverV2_dm6",
    description="Crossover map from meioses products of 8 lab crosses",
    long_description="""
        The crossover map from a study of 8 crosses of 12 highly
        inbred lines of D. melanogaster. This is based on the
        products of 5,860 female meioses from whole genome sequencing data.
        Recombination rates were calculated from the density of individual
Пример #19
0
"""
Genome and demographic model definitions for Escherichia coli.
"""
import stdpopsim

###########################################################
#
# Genome definition
#
###########################################################

_lapierre_et_al = stdpopsim.Citation(
    author="Lapierre et al.",
    year="2016",
    doi="https://doi.org/10.1093/molbev/msw048")

_sezonov_et_al = stdpopsim.Citation(
    author="Sezonov et al.",
    year="2007",
    doi="https://doi.org/10.1128/JB.01368-07")

_perfeito_et_al = stdpopsim.Citation(
    author="Perfeito et al.",
    year="2007",
    doi="https://doi.org/10.1126/science.1142284")

_kibota_and_lynch = stdpopsim.Citation(
    author="Kibota and Lynch",
    year="1996",
    doi="https://doi.org/10.1038/381694a0")
Пример #20
0
import stdpopsim

_species = stdpopsim.get_species("CanFam")

_CampbellEtAl = stdpopsim.Citation(
    # A Pedigree-Based Map of Recombination in the Domestic Dog Genome.
    author="Campbell et al.",
    year=2016,
    doi="https://doi.org/10.1534/g3.116.034678",
)

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="Campbell2016_CanFam3_1",
    description="Pedigree-based crossover map from 237 individuals",
    long_description="""
        Sex-averaged crossover frequency map based on 163,400 autosomal SNPs
        genotyped in a pedigree of 237 Labrador Retriever x Greyhound crosses.
        Genotypes were phased without respect to the pedigree, using SHAPEIT2,
        recombinations were called using duoHMM, and genetic distances were
        obtained using Haldane's map function.
        """,
    url="https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/"
    "CanFam/dog_genetic_maps.tar.gz",
    sha256="585afb424615e2fb0825d807db0b10fe1c797a6dbb804ecbb3fef5e8387d194f",
    file_pattern="chr{id}_average_canFam3.1.txt",
    citations=[_CampbellEtAl.because(stdpopsim.CiteReason.GEN_MAP)],
)
_species.add_genetic_map(_gm)
Пример #21
0
class _MsprimeEngine(Engine):
    id = "msprime"  #:
    description = "Msprime coalescent simulator"  #:
    citations = [
        stdpopsim.Citation(
            doi="https://doi.org/10.1371/journal.pcbi.1004842",
            year="2016",
            author="Kelleher et al.",
            reasons={stdpopsim.CiteReason.ENGINE},
        )
    ]

    # We default to the first model in the list.
    model_class_map = {
        "hudson": msprime.StandardCoalescent,
        "dtwf": msprime.DiscreteTimeWrightFisher,
        "smc": msprime.SmcApproxCoalescent,
        "smc_prime": msprime.SmcPrimeApproxCoalescent,
    }

    model_citations = {
        "dtwf": [
            stdpopsim.Citation(
                doi="https://doi.org/10.1371/journal.pgen.1008619",
                year="2020",
                author="Nelson et al.",
                reasons={stdpopsim.CiteReason.ENGINE},
            )
        ]
    }

    @property
    def supported_models(self):
        return list(self.model_class_map.keys())

    def _convert_model_spec(self, model_str, model_changes):
        """
        Convert the specified model specification into a form suitable
        for sim_ancestry. The model param is a string or None. The
        model_changes is either None or list of (time, model_str) tuples.
        Also return the appropriate extra citations.
        """
        citations = []
        if model_str is None:
            model_str = "hudson"
        else:
            if model_str not in self.model_class_map:
                raise ValueError(f"Unrecognised model '{model_str}'")
            if model_str in self.model_citations:
                citations.extend(self.model_citations[model_str])

        if model_changes is None:
            model = model_str
        else:
            model_list = []
            last_t = 0
            last_model = model_str
            for t, model in model_changes:
                if model not in self.supported_models:
                    raise ValueError(f"Unrecognised model '{model}'")
                if model in self.model_citations:
                    citations.extend(self.model_citations[model])
                duration = t - last_t
                model_list.append(
                    self.model_class_map[last_model](duration=duration))
                last_model = model
                last_t = t
            model_list.append(self.model_class_map[last_model](duration=None))
            model = model_list

        return model, citations

    def simulate(
        self,
        demographic_model,
        contig,
        samples,
        *,
        seed=None,
        msprime_model=None,
        msprime_change_model=None,
        dry_run=False,
        **kwargs,
    ):
        """
        Simulate the demographic model using msprime.
        See :meth:`.Engine.simulate()` for definitions of parameters defined
        for all engines.

        :param msprime_model: The msprime simulation model to be used.
            One of ``hudson``, ``dtwf``, ``smc``, or ``smc_prime``.
            See msprime API documentation for details.
        :type msprime_model: str
        :param msprime_change_model: A list of (time, model) tuples, which
            changes the simulation model to the new model at the time specified.
        :type msprime_change_model: list of (float, str) tuples
        :param dry_run: If True, ``end_time=0`` is passed to :meth:`msprime.simulate()`
            to initialise the simulation and then immediately return.
        :type dry_run: bool
        :param \\**kwargs: Further arguments passed to :meth:`msprime.sim_ancestry()`
        """

        model, citations = self._convert_model_spec(msprime_model,
                                                    msprime_change_model)
        self.citations.extend(citations)

        if "random_seed" in kwargs.keys():
            if seed is None:
                seed = kwargs["random_seed"]
                del kwargs["random_seed"]
            else:
                raise ValueError("Cannot set both seed and random_seed")

        # TODO: remove this after a release or two. See #745.
        self._warn_zigzag(demographic_model)
        self._warn_mutation_rate_mismatch(contig, demographic_model)

        rng = np.random.default_rng(seed)
        seeds = rng.integers(1, 2**31 - 1, size=2)

        ts = msprime.sim_ancestry(
            samples=samples,
            recombination_rate=contig.recombination_map,
            demography=demographic_model.model,
            ploidy=2,
            random_seed=seeds[0],
            model=model,
            end_time=0 if dry_run else None,
            **kwargs,
        )
        ts = msprime.sim_mutations(
            ts,
            end_time=0 if dry_run else None,
            random_seed=seeds[1],
            rate=contig.mutation_rate,
        )

        if contig.inclusion_mask is not None:
            ts = stdpopsim.utils.mask_tree_sequence(ts, contig.inclusion_mask,
                                                    False)
        if contig.exclusion_mask is not None:
            ts = stdpopsim.utils.mask_tree_sequence(ts, contig.exclusion_mask,
                                                    True)

        if dry_run:
            ts = None
        return ts

    def get_version(self):
        return msprime.__version__
Пример #22
0
class _SLiMEngine(stdpopsim.Engine):
    id = "slim"  #:
    description = "SLiM forward-time Wright-Fisher simulator"  #:
    citations = [
        stdpopsim.Citation(
            doi="https://doi.org/10.1111/1755-0998.12968",
            year=2019,
            author="Haller et al.",
            reasons={stdpopsim.CiteReason.ENGINE},
        ),
    ]

    def slim_path(self):
        return os.environ.get("SLIM", "slim")

    def get_version(self):
        s = subprocess.check_output([self.slim_path(), "-v"])
        return s.split()[2].decode("ascii").rstrip(",")

    def simulate(
        self,
        demographic_model=None,
        contig=None,
        samples=None,
        seed=None,
        mutation_types=None,
        extended_events=None,
        slim_path=None,
        slim_script=False,
        slim_scaling_factor=1.0,
        slim_burn_in=10.0,
        dry_run=False,
    ):
        """
        Simulate the demographic model using SLiM.
        See :meth:`.Engine.simulate()` for definitions of the
        ``demographic_model``, ``contig``, and ``samples`` parameters.

        :param seed: The seed for the random number generator.
        :type seed: int
        :param slim_path: The full path to the slim executable, or the name of
            a command in the current PATH.
        :type slim_path: str
        :param slim_script: If true, the simulation will not be executed.
            Instead the generated SLiM script will be printed to stdout.
        :type slim_script: bool
        :param slim_scaling_factor: Rescale model parameters by the given value,
            to speed up simulation. Population sizes and generation times are
            divided by this factor, whereas the mutation rate, recombination
            rate, and growth rates are multiplied by the factor.
            See SLiM manual: `5.5 Rescaling population sizes to improve
            simulation performance.`
        :type slim_scaling_factor: float
        :param slim_burn_in: Length of the burn-in phase, in units of N
            generations.
        :type slim_burn_in: float
        :param dry_run: If True, run the first generation setup and then end the
            simulation.
        :type dry_run: bool
        """

        if slim_scaling_factor <= 0:
            raise ValueError("slim_scaling_factor must be positive")
        if slim_burn_in < 0:
            raise ValueError("slim_burn_in must be non-negative")

        if slim_scaling_factor != 1:
            warnings.warn(
                stdpopsim.SLiMScalingFactorWarning(
                    f"You're using a scaling factor ({slim_scaling_factor}). "
                    "This should give similar results for many situations, "
                    "but is not equivalent, especially in the presence of selection. "
                    "When using rescaling, you should be careful---do checks and "
                    "compare results across different values of the scaling factor."
                ))

        run_slim = not slim_script

        # Ensure only "weighted" mutations are introduced by SLiM.
        mutation_rate = contig.mutation_rate
        slim_frac = stdpopsim.ext.slim_mutation_frac(mutation_types)
        contig = stdpopsim.Contig(
            recombination_map=contig.recombination_map,
            mutation_rate=slim_frac * mutation_rate,
            genetic_map=contig.genetic_map,
            inclusion_mask=contig.inclusion_mask,
            exclusion_mask=contig.exclusion_mask,
        )

        mktemp = functools.partial(tempfile.NamedTemporaryFile, mode="w")

        @contextlib.contextmanager
        def script_file_f():
            f = mktemp(suffix=".slim") if not slim_script else sys.stdout
            yield f
            # Don't close sys.stdout.
            if not slim_script:
                f.close()

        with script_file_f() as script_file, mktemp(suffix=".ts") as ts_file:

            recap_epoch = slim_makescript(
                script_file,
                ts_file.name,
                demographic_model,
                contig,
                samples,
                mutation_types,
                extended_events,
                slim_scaling_factor,
                slim_burn_in,
            )

            script_file.flush()

            if not run_slim:
                return None

            self._run_slim(script_file.name,
                           slim_path=slim_path,
                           seed=seed,
                           dry_run=dry_run)

            if dry_run:
                return None

            ts = pyslim.load(ts_file.name)

        ts = self._recap_and_rescale(ts, seed, recap_epoch, contig,
                                     mutation_rate, slim_frac,
                                     slim_scaling_factor)

        if contig.inclusion_mask is not None:
            ts = stdpopsim.utils.mask_tree_sequence(ts, contig.inclusion_mask,
                                                    False)
        if contig.exclusion_mask is not None:
            ts = stdpopsim.utils.mask_tree_sequence(ts, contig.exclusion_mask,
                                                    True)

        return ts

    def _run_slim(self, script_file, slim_path=None, seed=None, dry_run=False):
        """
        Run SLiM.

        We capture the output using Popen's line-oriented text buffering
        (bufsize=1, universal_newlines=True) and redirect all messages to
        Python's logging module.
        By convention, messages from SLiM prefixed with "ERROR: " or
        "WARNING: " are treated as ERROR or WARN loglevels respectively.
        All other output on stdout is given the DEBUG loglevel.
        ERROR messages, and any output from SLiM on stderr, will raise a
        SLiMException here.
        """
        if slim_path is None:
            slim_path = self.slim_path()
        slim_cmd = [slim_path]
        if seed is not None:
            slim_cmd.extend(["-s", f"{seed}"])
        if dry_run:
            slim_cmd.extend(["-d", "dry_run=T"])
        slim_cmd.append(script_file)

        with subprocess.Popen(
                slim_cmd,
                bufsize=1,
                universal_newlines=True,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
        ) as proc:
            for line in proc.stdout:
                line = line.rstrip()
                if line.startswith("ERROR: "):
                    logger.error(line[len("ERROR: "):])
                elif line.startswith("WARNING: "):
                    warnings.warn(
                        stdpopsim.UnspecifiedSLiMWarning(
                            line[len("WARNING: "):]))
                else:
                    # filter `dbg` function calls that generate output
                    line = line.replace("dbg(self.source); ", "")
                    logger.debug(line)
            stderr = proc.stderr.read()

        if proc.returncode != 0 or stderr:
            raise SLiMException(
                f"{slim_path} exited with code {proc.returncode}.\n{stderr}")

    def _simplify_remembered(self, ts):
        """
        Remove all samples except those individuals that were explicity
        sampled in SLiM with sim.treeSeqRememberIndividuals().
        """
        nodes = itertools.chain.from_iterable(
            i.nodes for i in ts.individuals()
            if i.flags & pyslim.INDIVIDUAL_REMEMBERED)
        return ts.simplify(samples=list(nodes), filter_populations=False)

    def _recap_and_rescale(
        self,
        ts,
        seed,
        recap_epoch,
        contig,
        mutation_rate,
        slim_frac,
        slim_scaling_factor,
    ):
        """
        Apply post-SLiM transformations to ``ts``. This rescales node times,
        does recapitation, simplification, and adds neutral mutations.
        """
        # Node times come from SLiM generation numbers, which may have been
        # divided by a scaling factor for computational tractability.
        tables = ts.dump_tables()
        for table in (tables.nodes, tables.migrations):
            table.time *= slim_scaling_factor
        ts = pyslim.SlimTreeSequence.load_tables(tables)
        ts.slim_generation *= slim_scaling_factor

        rng = random.Random(seed)
        s1, s2 = rng.randrange(1, 2**32), rng.randrange(1, 2**32)

        population_configurations = [
            msprime.PopulationConfiguration(initial_size=pop.start_size,
                                            growth_rate=pop.growth_rate)
            for pop in recap_epoch.populations
        ]
        ts = ts.recapitate(
            recombination_rate=contig.recombination_map.
            mean_recombination_rate,
            population_configurations=population_configurations,
            migration_matrix=recap_epoch.migration_matrix,
            random_seed=s1,
        )

        ts = self._simplify_remembered(ts)

        if slim_frac < 1:
            # Add mutations to SLiM part of trees.
            rate = (1 - slim_frac) * mutation_rate
            ts = pyslim.SlimTreeSequence(
                msprime.mutate(
                    ts,
                    rate=rate,
                    keep=True,
                    random_seed=s2,
                    end_time=ts.slim_generation,
                ))

        # Add mutations to recapitated part of trees.
        s3 = rng.randrange(1, 2**32)
        ts = pyslim.SlimTreeSequence(
            msprime.mutate(
                ts,
                rate=mutation_rate,
                keep=True,
                random_seed=s3,
                start_time=ts.slim_generation,
            ))

        return ts

    def recap_and_rescale(
        self,
        ts,
        demographic_model,
        contig,
        samples,
        mutation_types=None,
        extended_events=None,
        slim_scaling_factor=1.0,
        seed=None,
        **kwargs,
    ):
        """
        Apply post-SLiM transformations to ``ts``. This rescales node times,
        does recapitation, simplification, and adds neutral mutations.

        If the SLiM engine was used to output a SLiM script, and the script was
        run outside of stdpopsim, this function can be used to transform the
        SLiM tree sequence following the procedure that would have been used
        if stdpopsim had run SLiM itself.
        The parameters after ``ts`` have the same meaning as for :func:`simulate`,
        and the values for ``demographic_model``, ``contig``, ``samples``,
        and ``slim_scaling_factor`` should match those that were used to
        generate the SLiM script with :func:`simulate`.

        :param ts: The tree sequence output by SLiM.
        :type ts: :class:`pyslim.SlimTreeSequence`

        .. warning::
            The :func:`recap_and_rescale` function is provided in the hope that
            it will be useful. But as we can't anticipate what changes you'll
            make to the SLiM code before using it, the stdpopsim source code
            should be consulted to determine if the behaviour is appropriate
            for your case.
        """
        # Only "weighted" mutations are introduced by SLiM.
        mutation_rate = contig.mutation_rate
        slim_frac = stdpopsim.ext.slim_mutation_frac(mutation_types)
        contig = stdpopsim.Contig(
            recombination_map=contig.recombination_map,
            mutation_rate=slim_frac * mutation_rate,
            genetic_map=contig.genetic_map,
        )

        with open(os.devnull, "w") as script_file:
            recap_epoch = slim_makescript(
                script_file,
                "unused.trees",
                demographic_model,
                contig,
                samples,
                mutation_types,
                extended_events,
                slim_scaling_factor,
                1,
            )

        ts = self._recap_and_rescale(ts, seed, recap_epoch, contig,
                                     mutation_rate, slim_frac,
                                     slim_scaling_factor)
        return ts
Пример #23
0
chr12    136387465   5.44e-9
chr13    117095149   4.91e-9
chr14    108868599   4.70e-9
chr15    99152023    4.82e-9
chr16    77800216    6.12e-9
chr17    73212453    7.26e-9
chr18    94050890    4.57e-9
chr19    60714840    7.56e-9
chr20    62736349    5.83e-9
chr21    48394510    4.98e-9
chr22    46535552    6.03e-9
chrX     156195299   9.50e-9
"""

_locke2011 = stdpopsim.Citation(author="Locke et al.",
                                year=2011,
                                doi="http://doi.org/10.1038/nature09687")

_nater2017 = stdpopsim.Citation(
    author="Nater et al.",
    year=2017,
    doi="https://doi.org/10.1016/j.cub.2017.09.047")

_chromosomes = []
for line in _chromosome_data.splitlines():
    name, length, mean_rr = line.split()[:3]
    _chromosomes.append(
        stdpopsim.Chromosome(id=name,
                             length=int(length),
                             mutation_rate=1.5e-8,
                             recombination_rate=float(mean_rr)))
Пример #24
0
import stdpopsim

from . import genome_data

# These are in Table 1 of Juneja et al:
_recombination_rate = {"1": 0.306, "2": 0.249, "3": 0.291, "MT": 0}

_JunejaEtAl = stdpopsim.Citation(
    doi="https://doi.org/10.1371/journal.pntd.0002652",
    year=2014,
    author="Juneja et al.",
    reasons={stdpopsim.CiteReason.REC_RATE},
)


_CrawfordEtAl = stdpopsim.Citation(
    doi="https://doi.org/10.1186/s12915-017-0351-0",
    year=2017,
    author="Crawford et al.",
    reasons={
        stdpopsim.CiteReason.GEN_TIME,
        stdpopsim.CiteReason.POP_SIZE,
        stdpopsim.CiteReason.MUT_RATE,
    },
)

_KeightleyEtAl = stdpopsim.Citation(
    doi="https://doi.org/10.1101/gr.091231.109",
    year=2009,
    author="Keightley et al.",
    reasons={
Пример #25
0
 def test_get_bibtex_bad_connection(self):
     # Tests an invalid URL
     # Asserts that it raises a value error.
     citation = stdpopsim.Citation(doi='DOI', author="Authors", year="2000")
     with self.assertRaises(ValueError):
         citation.fetch_bibtex()
Пример #26
0
    "13": 7.56e-10,
    "14": 8.96e-10,
    "15": 6.91e-10,
    "16": 9.59e-10,
    "17": 1.05e-9,
}

_genome = stdpopsim.Genome.from_data(
    genome_data.data,
    recombination_rate=_recombination_rate,
    mutation_rate=_mutation_rate,
    citations=[
        stdpopsim.Citation(
            author="Merchant et al",
            year=2007,
            doi="https://doi.org/10.1126/science.1143609",
            reasons={stdpopsim.CiteReason.ASSEMBLY
                     },  # v5 - v6 assembly still en route!
        ),
        stdpopsim.Citation(
            author="Hasan and Ness",
            year=2020,
            doi="https://doi.org/10.6084/m9.figshare.14608239.v1",
            reasons={stdpopsim.CiteReason.REC_RATE},
        ),
        stdpopsim.Citation(
            author="Ness et al",
            year=2015,
            doi="https://doi.org/10.6084/m9.figshare.14700156.v1",
            reasons={stdpopsim.CiteReason.MUT_RATE},
        ),
Пример #27
0
    "13": 4.91e-9,
    "14": 4.70e-9,
    "15": 4.82e-9,
    "16": 6.12e-9,
    "17": 7.26e-9,
    "18": 4.57e-9,
    "19": 7.56e-9,
    "20": 5.83e-9,
    "21": 4.98e-9,
    "22": 6.03e-9,
    "X": 9.50e-9,
    "MT": 0,
}

_locke2011 = stdpopsim.Citation(author="Locke et al.",
                                year=2011,
                                doi="http://doi.org/10.1038/nature09687")

_nater2017 = stdpopsim.Citation(
    author="Nater et al.",
    year=2017,
    doi="https://doi.org/10.1016/j.cub.2017.09.047")

_chromosomes = []
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            # Nater et al. 2017 used mu=1.5e-8 per generation, based on the
Пример #28
0
import stdpopsim

from . import genome_data

_LovernEtAl = stdpopsim.Citation(
    doi="https://doi.org/10.1093/ilar.45.1.54",
    year=2004,
    author="Lovern et al.",
    reasons={stdpopsim.CiteReason.GEN_TIME},
)

_BourgeoisEtAl = stdpopsim.Citation(
    doi="https://doi.org/10.1093/gbe/evz110",
    year=2019,
    author="Pombi et al.",
    reasons={
        stdpopsim.CiteReason.POP_SIZE,
        stdpopsim.CiteReason.MUT_RATE,
        stdpopsim.CiteReason.REC_RATE,
    },
)

# No recombination rate yet for this species.
# Author of BourgeoisEtAl is sending the recombination map
# Placeholder rate of 1cM/Mb used
_recombo_rate = 1e-8

_recombination_rate = {
    "1": _recombo_rate,
    "2": _recombo_rate,
    "3": _recombo_rate,
Пример #29
0
def hominin_composite():
    id = "HomininComposite_4G20"
    description = "Four population out of Africa with Neandertal admixture"
    long_description = """
                A composite of demographic parameters from multiple sources
                """
    # samples:
    # T_Altai = 115e3
    # T_Vindija = 55e3
    # n_YRI = 108
    # n_CEU = 99

    populations = [
        stdpopsim.Population(id="YRI", description="1000 Genomes YRI (Yorubans)"),
        stdpopsim.Population(
            id="CEU",
            description=(
                "1000 Genomes CEU (Utah Residents (CEPH) with Northern and "
                "Western European Ancestry"
            ),
        ),
        stdpopsim.Population(id="Nea", description="Neandertal lineage"),
        stdpopsim.Population(
            id="Anc", description="Ancestral hominins", sampling_time=None
        ),
    ]
    pop = {p.id: i for i, p in enumerate(populations)}

    citations = [
        stdpopsim.Citation(
            author="Kuhlwilm et al.",
            year=2016,
            doi="https://doi.org/10.1038/nature16544",
        ),
        stdpopsim.Citation(
            author="Prüfer et al.",
            year=2017,
            doi="https://doi.org/10.1126/science.aao1887",
        ),
        stdpopsim.Citation(
            author="Ragsdale and Gravel",
            year=2019,
            doi="https://doi.org/10.1371/journal.pgen.1008204",
        ),
    ]

    generation_time = 29

    # Kuhlwilm et al. 2016
    N_YRI = 27000
    N_Nea = 3400
    N_Anc = 18500

    # Ragsdale & Gravel 2019
    N_CEU0 = 1450
    r_CEU = 0.00202
    T_CEU_exp = 31.9e3 / generation_time
    N_CEU = N_CEU0 * math.exp(r_CEU * T_CEU_exp)
    T_YRI_CEU_split = 65.7e3 / generation_time
    N_ooa_bottleneck = 1080

    # Prüfer et al. 2017
    T_Nea_human_split = 550e3 / generation_time
    T_Nea_CEU_mig = 55e3 / generation_time
    m_Nea_CEU = 0.0225

    pop_meta = (p.asdict() for p in populations)
    population_configurations = [
        msprime.PopulationConfiguration(initial_size=N_YRI, metadata=next(pop_meta)),
        msprime.PopulationConfiguration(
            initial_size=N_CEU, growth_rate=r_CEU, metadata=next(pop_meta)
        ),
        msprime.PopulationConfiguration(initial_size=N_Nea, metadata=next(pop_meta)),
        msprime.PopulationConfiguration(initial_size=N_Anc, metadata=next(pop_meta)),
    ]

    demographic_events = [
        # out-of-Africa bottleneck
        msprime.PopulationParametersChange(
            time=T_CEU_exp,
            initial_size=N_ooa_bottleneck,
            growth_rate=0,
            population_id=pop["CEU"],
        ),
        # Neandertal -> CEU admixture
        msprime.MassMigration(
            time=T_Nea_CEU_mig,
            proportion=m_Nea_CEU,
            source=pop["CEU"],
            destination=pop["Nea"],
        ),
        # population splits
        msprime.MassMigration(
            time=T_YRI_CEU_split, source=pop["CEU"], destination=pop["Anc"]
        ),
        msprime.MassMigration(
            time=T_YRI_CEU_split, source=pop["YRI"], destination=pop["Anc"]
        ),
        msprime.MassMigration(
            time=T_Nea_human_split, source=pop["Nea"], destination=pop["Anc"]
        ),
    ]

    return stdpopsim.DemographicModel(
        id=id,
        description=description,
        long_description=long_description,
        populations=populations,
        citations=citations,
        generation_time=generation_time,
        population_configurations=population_configurations,
        demographic_events=demographic_events,
    )
Пример #30
0
"""
Genome and demographic model definitions for Escherichia coli.
"""
import stdpopsim
from . import genome_data

###########################################################
#
# Genome definition
#
###########################################################

_hartl_et_al = stdpopsim.Citation(
    author="Hartl, Moriyama, and Sawyer",
    year="1994",
    # doesn't have a doi
    doi="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1206133/",
)

_sezonov_et_al = stdpopsim.Citation(author="Sezonov et al.",
                                    year="2007",
                                    doi="https://doi.org/10.1128/JB.01368-07")

_wielgoss_et_al = stdpopsim.Citation(
    author="Wielgoss et al.",
    year="2011",
    doi="https://doi.org/10.1534/g3.111.000406")

_blattner_et_al = stdpopsim.Citation(author="Blattner et al.",
                                     year="1997",
                                     doi="10.1126/science.277.5331.1453")