Пример #1
0
 def verify_simulation(self, n, m, r):
     """
     Verifies a simulation for the specified parameters.
     """
     recomb_map = msprime.RecombinationMap.uniform_map(m, r, discrete=True)
     rng = _msprime.RandomGenerator(1)
     sim = msprime.simulator_factory(n,
                                     recombination_map=recomb_map,
                                     random_generator=rng)
     self.assertEqual(sim.random_generator, rng)
     sim.run()
     self.assertEqual(sim.num_breakpoints, len(sim.breakpoints))
     self.assertGreater(sim.time, 0)
     self.assertGreater(sim.num_avl_node_blocks, 0)
     self.assertGreater(sim.num_segment_blocks, 0)
     self.assertGreater(sim.num_node_mapping_blocks, 0)
     tree_sequence = sim.get_tree_sequence()
     t = 0.0
     for record in tree_sequence.nodes():
         if record.time > t:
             t = record.time
     self.assertEqual(sim.time, t)
     self.assertGreater(sim.num_common_ancestor_events, 0)
     self.assertGreaterEqual(sim.num_recombination_events, 0)
     self.assertGreaterEqual(np.sum(sim.num_migration_events), 0)
     self.assertGreaterEqual(sim.num_multiple_recombination_events, 0)
Пример #2
0
 def test_recombination_n100(self):
     rng = _msprime.RandomGenerator(100)
     sim = msprime.simulator_factory(100,
                                     recombination_rate=0.2,
                                     record_full_arg=True,
                                     random_generator=rng)
     self.verify(sim)
Пример #3
0
 def test_no_recombination(self):
     rng = _msprime.RandomGenerator(1)
     sim = msprime.simulator_factory(10,
                                     random_generator=rng,
                                     record_full_arg=True)
     ts = self.verify(sim)
     ts_simplified = ts.simplify()
     t1 = ts.tables
     t2 = ts_simplified.tables
     self.assertEqual(t1.nodes, t2.nodes)
     self.assertEqual(t1.edges, t2.edges)
Пример #4
0
 def test_smc_variants(self):
     for model in ["smc", "smc_prime"]:
         threshold = 20
         sim = msprime.simulator_factory(
             sample_size=10,
             recombination_rate=5,
             model=model,
             random_generator=_msprime.RandomGenerator(3),
         )
         sim.run()
         self.assertGreater(sim.num_rejected_common_ancestor_events, 0)
         self.assertGreater(sim.num_common_ancestor_events, threshold)
         self.assertGreater(sim.num_recombination_events, threshold)
Пример #5
0
 def test_multimerger(self):
     rng = _msprime.RandomGenerator(1234)
     sim = msprime.simulator_factory(
         100,
         recombination_rate=0.1,
         record_full_arg=True,
         random_generator=rng,
         demographic_events=[
             msprime.InstantaneousBottleneck(time=0.1,
                                             population=0,
                                             strength=5)
         ],
     )
     self.verify(sim, multiple_mergers=True)
Пример #6
0
    def test_hudson(self):
        threshold = 20
        sim = msprime.simulator_factory(
            sample_size=10,
            recombination_rate=10,
            random_generator=_msprime.RandomGenerator(2),
        )
        sim.run()
        self.assertGreater(sim.num_common_ancestor_events, threshold)
        self.assertGreater(sim.num_recombination_events, threshold)
        self.assertEqual(sim.num_rejected_common_ancestor_events, 0)

        sim2 = msprime.simulator_factory(
            sample_size=10,
            recombination_rate=10,
            model="hudson",
            random_generator=_msprime.RandomGenerator(2),
        )
        sim2.run()
        self.assertEqual(sim2.num_common_ancestor_events,
                         sim.num_common_ancestor_events)
        self.assertEqual(sim2.num_recombination_events,
                         sim.num_recombination_events)
        self.assertEqual(sim2.num_rejected_common_ancestor_events, 0)
Пример #7
0
def mutate(tree_sequence,
           rate=None,
           random_seed=None,
           model=None,
           keep=False,
           start_time=None,
           end_time=None):
    """
    Simulates mutations on the specified ancestry and returns the resulting
    :class:`tskit.TreeSequence`. Mutations are generated at the specified rate in
    measured generations. Mutations are generated under the infinite sites
    model, and so the rate of new mutations is per unit of sequence length per
    generation.

    If a random seed is specified, this is used to seed the random number
    generator. If the same seed is specified and all other parameters are equal
    then the same mutations will be generated. If no random seed is specified
    then one is generated automatically.

    If the ``model`` parameter is specified, this determines the model under
    which mutations are generated. Currently only the :class:`.InfiniteSites`
    mutation model is supported. This parameter is useful if you wish to obtain
    sequences with letters from the nucleotide alphabet rather than the default
    0/1 states. By default mutations from the infinite sites model with a binary
    alphabet are generated.

    By default, sites and mutations in the parameter tree sequence are
    discarded. If the ``keep`` parameter is true, however, *additional*
    mutations are simulated. Under the infinite sites mutation model, all new
    mutations generated will occur at distinct positions from each other and
    from any existing mutations (by rejection sampling).

    The time interval over which mutations can occur may be controlled
    using the ``start_time`` and ``end_time`` parameters. The ``start_time``
    defines the lower bound (in time-ago) on this interval and ``max_time``
    the upper bound. Note that we may have mutations associated with
    nodes with time <= ``start_time`` since mutations store the node at the
    bottom (i.e., towards the leaves) of the branch that they occur on.

    :param tskit.TreeSequence tree_sequence: The tree sequence onto which we
        wish to throw mutations.
    :param float rate: The rate of mutation per generation. (Default: 0).
    :param int random_seed: The random seed. If this is `None`, a
        random seed will be automatically generated. Valid random
        seeds must be between 1 and :math:`2^{32} - 1`.
    :param MutationModel model: The mutation model to use when generating
        mutations. If not specified or None, the :class:`.InfiniteSites`
        mutation model is used.
    :param bool keep: Whether to keep existing mutations (default: False).
    :param float start_time: The minimum time at which a mutation can
        occur. (Default: no restriction.)
    :param float end_time: The maximum time at which a mutation can occur
        (Default: no restriction).
    :return: The :class:`tskit.TreeSequence` object  resulting from overlaying
        mutations on the input tree sequence.
    :rtype: :class:`tskit.TreeSequence`
    """
    try:
        tables = tree_sequence.tables
    except AttributeError:
        raise ValueError("First argument must be a TreeSequence instance.")
    if random_seed is None:
        random_seed = simulations._get_random_seed()
    random_seed = int(random_seed)

    rng = _msprime.RandomGenerator(random_seed)
    if model is None:
        model = InfiniteSites()
    try:
        alphabet = model.alphabet
    except AttributeError:
        raise TypeError("model must be an InfiniteSites instance")
    if rate is None:
        rate = 0
    rate = float(rate)
    keep = bool(keep)

    parameters = {
        "command": "mutate",
        "rate": rate,
        "random_seed": random_seed,
        "keep": keep
    }

    if start_time is None:
        start_time = -sys.float_info.max
    else:
        start_time = float(start_time)
        parameters["start_time"] = start_time

    if end_time is None:
        end_time = sys.float_info.max
    else:
        end_time = float(end_time)
        parameters["end_time"] = end_time
    # TODO Add a JSON representation of the model to the provenance.
    provenance_dict = provenance.get_provenance_dict(parameters)

    if start_time > end_time:
        raise ValueError("start_time must be <= end_time")

    mutation_generator = _msprime.MutationGenerator(rng,
                                                    rate,
                                                    alphabet=alphabet,
                                                    start_time=start_time,
                                                    end_time=end_time)
    lwt = _msprime.LightweightTableCollection()
    lwt.fromdict(tables.asdict())
    mutation_generator.generate(lwt, keep=keep)

    tables = tskit.TableCollection.fromdict(lwt.asdict())
    tables.provenances.add_row(json.dumps(provenance_dict))
    return tables.tree_sequence()
Пример #8
0
    def __init__(
        self,
        sample_size=1,
        num_loci=1,
        scaled_recombination_rate=0,
        num_replicates=1,
        migration_matrix=None,
        population_configurations=None,
        demographic_events=None,
        scaled_mutation_rate=0,
        print_trees=False,
        precision=3,
        random_seeds=None,
        scaled_gene_conversion_rate=0,
        gene_conversion_track_length=1,
        hotspots=None,
    ):
        self._sample_size = sample_size
        self._num_loci = num_loci
        self._num_replicates = num_replicates
        self._recombination_rate = scaled_recombination_rate
        self._mutation_rate = scaled_mutation_rate
        # For strict ms-compability we want to have m non-recombining loci
        if hotspots is None:
            self._recomb_map = msprime.RecombinationMap.uniform_map(
                num_loci, self._recombination_rate, discrete=True)
        else:
            self._recomb_map = hotspots_to_recomb_map(hotspots,
                                                      self._recombination_rate,
                                                      num_loci)

        # sort out the random seeds
        ms_seeds = random_seeds
        if random_seeds is None:
            ms_seeds = generate_seeds()
        seed = get_single_seed(ms_seeds)
        self._random_generator = _msprime.RandomGenerator(seed)
        self._ms_random_seeds = ms_seeds

        # If we have specified any population_configurations we don't want
        # to give the overall sample size.
        sample_size = self._sample_size
        if population_configurations is not None:
            sample_size = None
        # msprime measure's time in units of generations, given a specific
        # Ne value whereas ms uses coalescent time. To be compatible with ms,
        # we therefore need to use an Ne value of 1/4.
        self._simulator = msprime.simulator_factory(
            Ne=0.25,
            sample_size=sample_size,
            recombination_map=self._recomb_map,
            population_configurations=population_configurations,
            migration_matrix=migration_matrix,
            demographic_events=demographic_events,
            gene_conversion_rate=scaled_gene_conversion_rate,
            gene_conversion_track_length=gene_conversion_track_length,
            random_generator=self._random_generator,
        )

        self._precision = precision
        self._print_trees = print_trees
        self._mutation_generator = mutations._simple_mutation_generator(
            self._mutation_rate, self._simulator.sequence_length,
            self._random_generator)
Пример #9
0
 def test_mutation_generator_unsupported(self):
     n = 10
     mutgen = msprime.mutations._simple_mutation_generator(
         1, 1, _msprime.RandomGenerator(1))
     with self.assertRaises(ValueError):
         msprime.simulate(n, mutation_generator=mutgen)
Пример #10
0
 def test_random_seed(self):
     seed = 12345
     rng = _msprime.RandomGenerator(seed)
     sim = msprime.simulator_factory(10, random_generator=rng)
     self.assertEqual(rng, sim.random_generator)
     self.assertEqual(rng.get_seed(), seed)
Пример #11
0
def mutate(
    tree_sequence,
    rate=None,
    random_seed=None,
    model=None,
    keep=False,
    start_time=None,
    end_time=None,
    discrete=False,
):
    """
    Simulates mutations on the specified ancestry and returns the resulting
    :class:`tskit.TreeSequence`. Mutations are generated at the specified rate in
    measured generations. Mutations are generated under the infinite sites
    model, and so the rate of new mutations is per unit of sequence length per
    generation.

    If a random seed is specified, this is used to seed the random number
    generator. If the same seed is specified and all other parameters are equal
    then the same mutations will be generated. If no random seed is specified
    then one is generated automatically.

    If the ``model`` parameter is specified, this determines the model under
    which mutations are generated. Currently only the :class:`.InfiniteSites`
    mutation model is supported. This parameter is useful if you wish to obtain
    sequences with letters from the nucleotide alphabet rather than the default
    0/1 states. By default mutations from the infinite sites model with a binary
    alphabet are generated.

    By default, sites and mutations in the parameter tree sequence are
    discarded. If the ``keep`` parameter is true, however, *additional*
    mutations are simulated. Under the infinite sites mutation model, all new
    mutations generated will occur at distinct positions from each other and
    from any existing mutations (by rejection sampling).

    The time interval over which mutations can occur may be controlled
    using the ``start_time`` and ``end_time`` parameters. The ``start_time``
    defines the lower bound (in time-ago) on this interval and ``max_time``
    the upper bound. Note that we may have mutations associated with
    nodes with time <= ``start_time`` since mutations store the node at the
    bottom (i.e., towards the leaves) of the branch that they occur on.

    :param tskit.TreeSequence tree_sequence: The tree sequence onto which we
        wish to throw mutations.
    :param float rate: The rate of mutation per generation, as either a
        single number (for a uniform rate) or as a
        :class:`.MutationMap`. (Default: 0).
    :param int random_seed: The random seed. If this is `None`, a
        random seed will be automatically generated. Valid random
        seeds must be between 1 and :math:`2^{32} - 1`.
    :param MutationModel model: The mutation model to use when generating
        mutations. If not specified or None, the :class:`.BinaryMutations`
        mutation model is used.
    :param bool keep: Whether to keep existing mutations (default: False).
    :param float start_time: The minimum time ago at which a mutation can
        occur. (Default: no restriction.)
    :param float end_time: The maximum time ago at which a mutation can occur
        (Default: no restriction).
    :param bool discrete: Whether to generate mutations at only integer positions
        along the genome.  Default is False, which produces infinite-sites
        mutations at floating-point positions.
    :return: The :class:`tskit.TreeSequence` object  resulting from overlaying
        mutations on the input tree sequence.
    :rtype: :class:`tskit.TreeSequence`
    """
    try:
        tables = tree_sequence.tables
    except AttributeError:
        raise ValueError("First argument must be a TreeSequence instance.")
    seed = random_seed
    if random_seed is None:
        seed = core.get_random_seed()
    else:
        seed = int(seed)

    if rate is None:
        rate = 0
    try:
        rate = float(rate)
        rate_map = MutationMap(position=[0.0, tree_sequence.sequence_length],
                               rate=[rate, 0.0])
    except TypeError:
        rate_map = rate
    if not isinstance(rate_map, MutationMap):
        raise TypeError("rate must be a float or a MutationMap")

    if start_time is None:
        start_time = -sys.float_info.max
    else:
        start_time = float(start_time)
    if end_time is None:
        end_time = sys.float_info.max
    else:
        end_time = float(end_time)
    if start_time > end_time:
        raise ValueError("start_time must be <= end_time")
    keep = bool(keep)
    discrete = bool(discrete)

    if model is None:
        model = BinaryMutations()
    if not isinstance(model, MutationModel):
        raise TypeError("model must be a MutationModel")

    argspec = inspect.getargvalues(inspect.currentframe())
    parameters = {
        "command": "mutate",
        **{arg: argspec.locals[arg]
           for arg in argspec.args},
    }
    parameters["random_seed"] = seed
    encoded_provenance = provenance.json_encode_provenance(
        provenance.get_provenance_dict(parameters))

    rng = _msprime.RandomGenerator(seed)
    mutation_generator = _msprime.MutationGenerator(random_generator=rng,
                                                    rate_map=rate_map._ll_map,
                                                    model=model)
    lwt = _msprime.LightweightTableCollection()
    lwt.fromdict(tables.asdict())
    mutation_generator.generate(lwt,
                                keep=keep,
                                start_time=start_time,
                                end_time=end_time,
                                discrete=discrete)

    tables = tskit.TableCollection.fromdict(lwt.asdict())
    tables.provenances.add_row(encoded_provenance)
    return tables.tree_sequence()