示例#1
0
    def test_gaps(self):
        # SMC simulations should never have adjacent edgesets with
        # a non-zero distance between them and the same parent.
        # First we do a simulation with the standard model to make sure
        # we have plausible parameter values.
        sample_size = 10
        recombination_rate = 20
        random_seed = 1

        ts = msprime.simulate(
            sample_size=sample_size, recombination_rate=recombination_rate,
            random_seed=random_seed)
        edgesets = sorted(ts.edgesets(), key=lambda e: (e.parent, e.left))
        num_found = 0
        for j in range(1, len(edgesets)):
            r = edgesets[j - 1]
            s = edgesets[j]
            if r.right != s.left and r.parent == s.parent:
                num_found += 1
        self.assertGreater(num_found, 10)  # Make a reasonable threshold

        # Now do the same for SMC and SMC'.
        for model in ["smc", "smc_prime"]:
            ts = msprime.simulate(
                sample_size=sample_size, recombination_rate=recombination_rate,
                random_seed=random_seed, model=model)
            edgesets = sorted(ts.edgesets(), key=lambda e: (e.parent, e.left))
            num_found = 0
            for j in range(1, len(edgesets)):
                r = edgesets[j - 1]
                s = edgesets[j]
                if r.right != s.left and r.parent == s.parent:
                    num_found += 1
            self.assertEqual(num_found, 0)
示例#2
0
    def test_gaps(self):
        # SMC simulations should never have adjacent coalescence records with
        # a non-zero distance between them and the same time/node value.
        # First we do a simulation with the standard model to make sure
        # we have plausible parameter values.
        sample_size = 10
        recombination_rate = 20
        random_seed = 1

        ts = msprime.simulate(
            sample_size=sample_size, recombination_rate=recombination_rate,
            random_seed=random_seed)
        records = list(ts.records())
        num_found = 0
        for j in range(1, len(records)):
            r = records[j - 1]
            s = records[j]
            if r.right != s.left and r.node == s.node:
                num_found += 1
        self.assertGreater(num_found, 10)  # Make a reasonable threshold

        # Now do the same for SMC and SMC'.
        for model in ["smc", "smc_prime"]:
            ts = msprime.simulate(
                sample_size=sample_size, recombination_rate=recombination_rate,
                random_seed=random_seed, model=model)
            records = list(ts.records())
            num_found = 0
            for j in range(1, len(records)):
                r = records[j - 1]
                s = records[j]
                if r.right != s.left and r.node == s.node:
                    num_found += 1
            self.assertEqual(num_found, 0)
示例#3
0
 def test_simple_cases(self):
     for n in range(2, 10):
         st = next(msprime.simulate(n).trees())
         self.verify_sparse_tree(st)
     for n in [11, 13, 19, 101]:
         st = next(msprime.simulate(n).trees())
         self.verify_sparse_tree(st)
 def test_two_populations_no_migration_one_locus(self):
     seed = 1234
     ts1 = msprime.simulate(
         population_configurations=[
             msprime.PopulationConfiguration(10),
             msprime.PopulationConfiguration(10)],
         migration_matrix=np.zeros((2, 2)),
         __tmp_max_time=0.1,
         random_seed=seed)
     ts2 = msprime.simulate(
         population_configurations=[
             msprime.PopulationConfiguration(),
             msprime.PopulationConfiguration()],
         migration_matrix=np.zeros((2, 2)),
         from_ts=ts1,
         demographic_events=[
             msprime.MassMigration(100, 0, 1, 1.0)],
         random_seed=seed)
     tree = ts2.first()
     # We should have two children at the root, and every node below
     # should be in one population.
     root_children = tree.children(tree.root)
     self.assertEqual(len(root_children), 2)
     populations = {ts2.node(u).population: u for u in root_children}
     self.assertEqual(len(populations), 2)
     for pop in [0, 1]:
         for node in tree.nodes(populations[pop]):
             self.assertEqual(ts2.node(node).population, pop)
 def test_two_populations_migration(self):
     n = 10
     seed = 1234
     ts1 = msprime.simulate(
         population_configurations=[
             msprime.PopulationConfiguration(n),
             msprime.PopulationConfiguration(0)],
         migration_matrix=[[0, 1], [1, 0]],
         random_seed=seed)
     tables = msprime.TableCollection(1)
     tables.populations.add_row()
     tables.populations.add_row()
     for _ in range(n):
         tables.nodes.add_row(
             flags=msprime.NODE_IS_SAMPLE, time=0, population=0)
     ts2 = msprime.simulate(
         from_ts=tables.tree_sequence(), start_time=0,
         population_configurations=[
             msprime.PopulationConfiguration(),
             msprime.PopulationConfiguration()],
         migration_matrix=[[0, 1], [1, 0]],
         random_seed=seed)
     tables1 = ts1.dump_tables()
     tables2 = ts2.dump_tables()
     tables1.provenances.clear()
     tables2.provenances.clear()
     self.assertEqual(tables1, tables2)
 def test_single_locus_max_time(self):
     from_ts = msprime.simulate(20, __tmp_max_time=1, random_seed=5)
     self.assertGreater(max(tree.num_roots for tree in from_ts.trees()), 1)
     start_time = from_ts.tables.nodes.time.max()
     final_ts = msprime.simulate(
         from_ts=from_ts, start_time=start_time, random_seed=2)
     self.verify_from_tables(from_ts, final_ts, start_time)
     self.verify_simulation_completed(final_ts)
 def test_from_single_locus_decapitated(self):
     ts = msprime.simulate(10, random_seed=5)
     from_ts = tsutil.decapitate(ts, ts.num_edges // 2)
     start_time = from_ts.tables.nodes.time.max()
     final_ts = msprime.simulate(
         from_ts=from_ts, start_time=start_time, random_seed=2)
     self.verify_from_tables(from_ts, final_ts, start_time)
     self.verify_simulation_completed(final_ts)
示例#8
0
 def test_provenance(self):
     ts = msprime.simulate(10)
     self.assertEqual(ts.num_provenances, 1)
     self.verify_provenance(ts.provenance(0))
     # TODO check the form of the dictionary
     for ts in msprime.simulate(10, num_replicates=10):
         self.assertEqual(ts.num_provenances, 1)
         self.verify_provenance(ts.provenance(0))
 def test_sequence_length(self):
     from_ts = msprime.simulate(
         5, __tmp_max_time=0.1, random_seed=5, length=5)
     start_time = from_ts.tables.nodes.time.max()
     final_ts = msprime.simulate(
         from_ts=from_ts, start_time=start_time, random_seed=2, length=5)
     self.verify_from_tables(from_ts, final_ts, start_time)
     self.verify_simulation_completed(final_ts)
 def test_individuals(self):
     from_ts = msprime.simulate(25, random_seed=5, __tmp_max_time=0.5)
     self.assertTrue(any(tree.num_roots > 1 for tree in from_ts.trees()))
     from_ts = tsutil.insert_random_ploidy_individuals(from_ts, seed=2)
     start_time = from_ts.tables.nodes.time.max()
     final_ts = msprime.simulate(
         from_ts=from_ts, start_time=start_time, random_seed=2)
     self.verify_from_tables(from_ts, final_ts, start_time)
     self.verify_simulation_completed(final_ts)
 def test_sequence_length_mismatch(self):
     base_ts = self.get_example_base(length=5)
     for bad_length in [1, 4.99, 5.01, 100]:
         with self.assertRaises(ValueError):
             msprime.simulate(from_ts=base_ts, start_time=100, length=bad_length)
         recomb_map = msprime.RecombinationMap.uniform_map(bad_length, 1)
         with self.assertRaises(ValueError):
             msprime.simulate(
                 from_ts=base_ts, start_time=100, recombination_map=recomb_map)
 def test_decapitated_mutations(self):
     ts = msprime.simulate(10, random_seed=5, mutation_rate=10)
     from_ts = tsutil.decapitate(ts, ts.num_edges // 2)
     self.assertGreater(from_ts.num_mutations, 0)
     start_time = from_ts.tables.nodes.time.max()
     final_ts = msprime.simulate(
         from_ts=from_ts, start_time=start_time, random_seed=2)
     self.verify_from_tables(from_ts, final_ts, start_time)
     self.verify_simulation_completed(final_ts)
 def test_from_multi_locus_old_recombination(self):
     ts = msprime.simulate(10, recombination_rate=2, random_seed=5)
     self.assertGreater(ts.num_trees, 1)
     from_ts = tsutil.decapitate(ts, ts.num_edges // 2)
     start_time = from_ts.tables.nodes.time.max()
     final_ts = msprime.simulate(
         from_ts=from_ts, start_time=start_time, random_seed=2,
         recombination_rate=2)
     self.verify_from_tables(from_ts, final_ts, start_time)
     self.verify_simulation_completed(final_ts)
示例#14
0
 def test_models(self):
     # Exponential growth of 0 and constant model should be identical.
     for n in [2, 10, 100]:
         m1 = msprime.PopulationConfiguration(n, growth_rate=0)
         m2 = msprime.PopulationConfiguration(n, initial_size=1.0)
         st1 = next(msprime.simulate(
             random_seed=1, population_configurations=[m1]).trees())
         st2 = next(msprime.simulate(
             random_seed=1, population_configurations=[m2]).trees())
         self.assertEqual(st1.parent_dict, st2.parent_dict)
 def test_population_mismatch(self):
     for N in range(1, 5):
         base_ts = self.get_example_base(num_populations=N)
         start_time = max(node.time for node in base_ts.nodes())
         for k in range(1, N):
             if k != N:
                 with self.assertRaises(ValueError):
                     msprime.simulate(
                         from_ts=base_ts, start_time=start_time,
                         population_configurations=[
                             msprime.PopulationConfiguration() for _ in range(k)])
 def test_zero_recombination_rate(self):
     from_ts = msprime.simulate(
         sample_size=4, __tmp_max_time=1, random_seed=5, recombination_rate=1)
     self.assertGreater(max(tree.num_roots for tree in from_ts.trees()), 1)
     start_time = from_ts.tables.nodes.time.max()
     with self.assertRaises(_msprime.InputError):
         # Raises:
         # The specified recombination map is too coarse to translate...
         msprime.simulate(
             from_ts=from_ts, start_time=start_time, random_seed=2,
             recombination_rate=0)
示例#17
0
 def test_simple_cases(self):
     m = 1
     r = 0.1
     for n in range(2, 10):
         self.verify_sparse_trees(msprime.simulate(n, m, r))
     n = 4
     for m in range(1, 10):
         self.verify_sparse_trees(msprime.simulate(n, m, r))
     m = 100
     for r in [0.001, 0.01]:
         self.verify_sparse_trees(msprime.simulate(n, m, r))
 def test_small_num_loci(self):
     for m in [1, 10, 16, 100]:
         recombination_map = msprime.RecombinationMap.uniform_map(10, 1, num_loci=m)
         from_ts = msprime.simulate(
             sample_size=4, __tmp_max_time=1, random_seed=5,
             recombination_map=recombination_map)
         self.assertGreater(max(tree.num_roots for tree in from_ts.trees()), 1)
         start_time = from_ts.tables.nodes.time.max()
         final_ts = msprime.simulate(
             from_ts=from_ts, start_time=start_time, random_seed=2,
             recombination_map=recombination_map)
         self.verify_from_tables(from_ts, final_ts, start_time)
         self.verify_simulation_completed(final_ts)
 def test_fine_to_coarse_map(self):
     from_ts = msprime.simulate(
         sample_size=4, __tmp_max_time=1, random_seed=5, recombination_rate=1)
     self.assertGreater(max(tree.num_roots for tree in from_ts.trees()), 1)
     self.assertGreater(from_ts.num_edges, 2)
     start_time = from_ts.tables.nodes.time.max()
     final_ts = msprime.simulate(
         from_ts=from_ts, start_time=start_time, random_seed=2,
         recombination_map=msprime.RecombinationMap.uniform_map(1, 1, num_loci=10))
     max_roots = max(tree.num_roots for tree in final_ts.trees())
     # We don't correctly finish the tree sequence when we get mapping problems.
     # This must be documented as a "known issue".
     self.assertGreater(max_roots, 1)
 def test_nonuniform_recombination_map(self):
     positions = [0, 0.25, 0.5, 0.75, 1]
     rates = [1, 2, 1, 3, 0]
     num_loci = 100
     recomb_map = msprime.RecombinationMap(positions, rates, num_loci)
     from_ts = msprime.simulate(
         5, __tmp_max_time=0.1, random_seed=5, recombination_map=recomb_map)
     start_time = from_ts.tables.nodes.time.max()
     final_ts = msprime.simulate(
         from_ts=from_ts, start_time=start_time, random_seed=2,
         recombination_map=recomb_map)
     self.verify_from_tables(from_ts, final_ts, start_time)
     self.verify_simulation_completed(final_ts)
    def test_from_subclass(self):
        from_ts = msprime.simulate(20, __tmp_max_time=1, random_seed=5)

        class MockTreeSequence(msprime.TreeSequence):
            pass

        subclass_instance = MockTreeSequence(from_ts.ll_tree_sequence)
        self.assertTrue(type(subclass_instance), MockTreeSequence)
        self.assertIsInstance(subclass_instance, msprime.TreeSequence)
        self.assertGreater(max(tree.num_roots for tree in subclass_instance.trees()), 1)
        start_time = subclass_instance.tables.nodes.time.max()
        final_ts = msprime.simulate(
            from_ts=subclass_instance, start_time=start_time, random_seed=2)
        self.verify_from_tables(subclass_instance, final_ts, start_time)
        self.verify_simulation_completed(final_ts)
    def test_single_population_id_null(self):
        base_ts = self.get_example_base()
        tables = base_ts.dump_tables()
        nodes = tables.nodes

        for j in range(base_ts.num_nodes):
            population = np.zeros_like(nodes.population)
            population[j] = -1
            nodes.set_columns(
                flags=nodes.flags,
                population=population,
                time=nodes.time)
            with self.assertRaises(_msprime.InputError):
                msprime.simulate(
                    from_ts=tables.tree_sequence(), start_time=nodes.time.max())
    def test_low_recombination_rate_interval(self):
        from_ts = msprime.simulate(
            sample_size=4, __tmp_max_time=1, random_seed=16, recombination_rate=1,
            length=10)
        self.assertGreater(max(tree.num_roots for tree in from_ts.trees()), 1)
        start_time = from_ts.tables.nodes.time.max()

        recombination_map = msprime.RecombinationMap(
            positions=[0, 3, 7, 10], rates=[1, 1e-6, 1, 0], num_loci=100)
        with self.assertRaises(_msprime.InputError):
            # Raises:
            # The specified recombination map is too coarse to translate...
            msprime.simulate(
                from_ts=from_ts, start_time=start_time, random_seed=2,
                recombination_map=recombination_map)
示例#24
0
def main():
    before = time.clock()
    # Run the actual simulations
    tree_sequence = msprime.simulate(
        sample_size=10 ** 5,
        num_loci=100 * 10 ** 6,
        scaled_recombination_rate=0.001,
        scaled_mutation_rate=0.001,
        max_memory="5G",
        random_seed=1,  # Arbitrary - make this reproducible.
    )
    duration = time.clock() - before
    print("Simulated 100k genomes in {0:.3f} seconds.".format(duration))

    # Write the results to file, which is small and can be quickly reloaded
    # to avoid the cost of re-running the simulation. We can reload the
    # file in a few seconds using msprime.load(filename).
    tree_sequence.dump("large-example.hdf5")

    # Now write the haplotypes to a file.
    # WARNING! This takes a lot of memory (>100G), so make sure you don't
    # crash your server. This memory requirement will be drastically reduced
    # in future versions.
    before = time.clock()
    with open("large-example-haplotypes.txt", "w") as f:
        for h in tree_sequence.haplotypes():
            print(h, file=f)
    duration = time.clock() - before
    print("Wrote 100k haplotypes to file in {0:.3f} seconds".format(duration))
示例#25
0
文件: dev.py 项目: terhorst/msprime
def leaf_count_example():
    # This is an implementation of the algorithm to count the number
    # of leaves under each node.
    n = 20
    ts = msprime.simulate(n, 10000, scaled_recombination_rate=0.1, random_seed=1)
    pi = [0 for j in range(ts.get_num_nodes() + 1)]
    nu = [0 for j in range(ts.get_num_nodes() + 1)]
    for j in range(1, n + 1):
        nu[j] = 1
    for l, records_out, records_in in ts.diffs():
        for node, children, time in records_out:
            # print("out:", node, children)
            for c in children:
                pi[c] = 0
            leaves_lost = nu[node]
            u = node
            while u != 0:
                nu[u] -= leaves_lost
                # print("Setting nu[", u, "] = ", nu[u])
                u = pi[u]
        for node, children, time in records_in:
            # print("in:", node, children)
            num_leaves = 0
            for c in children:
                pi[c] = node
                num_leaves += nu[c]
            u = node
            while u != 0:
                nu[u] += num_leaves
                # print("setting nu[", u, "] = ", nu[u])
                u = pi[u]
        nup = count_leaves(pi, n)
        assert nup == nu
示例#26
0
def variants_example():
    tree_sequence = msprime.simulate(
        sample_size=20, Ne=1e4, length=5e3, recombination_rate=2e-8,
        mutation_rate=2e-8, random_seed=10)
    print("Simulated ", tree_sequence.get_num_mutations(), "mutations")
    for variant in tree_sequence.variants():
        print(variant.index, variant.position, variant.genotypes, sep="\t")
 def test_individuals(self):
     n = 10
     ts = msprime.simulate(n, mutation_rate=1, random_seed=2)
     tables = ts.dump_tables()
     for j in range(n):
         tables.individuals.add_row(flags=j, location=(j, j), metadata=b"x" * j)
     self.verify(tables)
示例#28
0
def variable_recomb_example():
    infile = "../hapmap/genetic_map_GRCh37_chr22.txt"
    # Read in the recombination map using the read_hapmap method,
    recomb_map = msprime.RecombinationMap.read_hapmap(infile)

    # Now we get the positions and rates from the recombination
    # map and plot these using 500 bins.
    positions = np.array(recomb_map.get_positions()[1:])
    rates = np.array(recomb_map.get_rates()[1:])
    num_bins = 500
    v, bin_edges, _ = scipy.stats.binned_statistic(
        positions, rates, bins=num_bins)
    x = bin_edges[:-1][np.logical_not(np.isnan(v))]
    y = v[np.logical_not(np.isnan(v))]
    fig, ax1 = pyplot.subplots(figsize=(16, 6))
    ax1.plot(x, y, color="blue")
    ax1.set_ylabel("Recombination rate")
    ax1.set_xlabel("Chromosome position")

    # Now we run the simulation for this map. We assume Ne=10^4
    # and have a sample of 100 individuals
    tree_sequence = msprime.simulate(
        sample_size=100,
        Ne=10**4,
        recombination_map=recomb_map)
    # Now plot the density of breakpoints along the chromosome
    breakpoints = np.array(list(tree_sequence.breakpoints()))
    ax2 = ax1.twinx()
    v, bin_edges = np.histogram(breakpoints, num_bins, density=True)
    ax2.plot(bin_edges[:-1], v, color="green")
    ax2.set_ylabel("Breakpoint density")
    ax2.set_xlim(1.5e7, 5.3e7)
    fig.savefig("_static/hapmap_chr22.svg")
def verify():
    """
    Checks that simplify() does the right thing, by comparing to the implementation
    in msprime.
    """
    for n in [10, 100, 1000]:
        ts = msprime.simulate(n, recombination_rate=1, random_seed=1)
        nodes = ts.tables.nodes
        edges = ts.tables.edges
        print("simulated for ", n)

        for N in range(2, 10):
            sample = list(range(N))
            ts1 = simplify(sample, nodes, edges, ts.sequence_length)
            ts2 = ts.simplify(sample)

            n1 = ts1.tables.nodes
            n2 = ts2.tables.nodes
            assert np.array_equal(n1.time, n2.time)
            assert np.array_equal(n1.flags, n2.flags)
            e1 = ts1.tables.edges
            e2 = ts2.tables.edges
            assert np.array_equal(e1.left, e2.left)
            assert np.array_equal(e1.right, e2.right)
            assert np.array_equal(e1.parent, e1.parent)
            assert np.array_equal(e1.child, e1.child)
示例#30
0
 def test_interleaved_migrations(self):
     t1 = 1.5
     t2 = 10.5
     t3 = 50.5
     ts = msprime.simulate(
         Ne=1/4,
         samples=[
             msprime.Sample(0, 0),
             msprime.Sample(1, t1),
             msprime.Sample(2, t2),
             msprime.Sample(3, t3)],
         population_configurations=[
             msprime.PopulationConfiguration(),
             msprime.PopulationConfiguration(),
             msprime.PopulationConfiguration(),
             msprime.PopulationConfiguration()],
         demographic_events=[
             msprime.MassMigration(time=t1, source=0, destination=1),
             msprime.MassMigration(time=t2, source=1, destination=2),
             msprime.MassMigration(time=t3, source=2, destination=3)])
     t = next(ts.trees())
     self.assertEqual(t.get_time(0), 0)
     self.assertEqual(t.get_time(1), t1)
     self.assertEqual(t.get_time(2), t2)
     self.assertEqual(t.get_time(3), t3)
     self.assertEqual(t.get_population(0), 0)
     self.assertEqual(t.get_population(1), 1)
     self.assertEqual(t.get_population(2), 2)
     self.assertEqual(t.get_population(3), 3)
     self.assertEqual(t.get_population(4), 1)
     self.assertEqual(t.get_population(5), 2)
     self.assertEqual(t.get_population(6), 3)
     self.assertTrue(t1 < t.get_time(4) < t2)
     self.assertTrue(t2 < t.get_time(5) < t3)
     self.assertGreater(t.get_time(6), t3)
示例#31
0
 def test_population_mismatch_no_population_configs(self):
     for N in range(2, 5):
         base_ts = self.get_example_base(num_populations=N)
         start_time = max(node.time for node in base_ts.nodes())
         with self.assertRaises(ValueError):
             msprime.simulate(from_ts=base_ts, start_time=start_time)
示例#32
0
 def test_no_mutations_with_start_time(self):
     with self.assertRaises(ValueError):
         msprime.simulate(10, mutation_rate=10, start_time=3)
     # But fine if we set start_time = None
     ts = msprime.simulate(10, mutation_rate=10, start_time=None, random_seed=1)
     self.assertGreater(ts.num_sites, 0)
示例#33
0
 def test_single_tree(self):
     ts = msprime.simulate(6, random_seed=1)
     S = [range(3), range(3, 6)]
     self.verify(ts, S)
示例#34
0
 def test_jukes_cantor_n20(self):
     ts = msprime.simulate(20, random_seed=2)
     ts = tsutil.jukes_cantor(ts, 5, 1, seed=2)
     self.verify_jukes_cantor(ts)
示例#35
0
 def get_tree_sequence(self):
     ts = msprime.simulate(
         10, length=10, recombination_rate=1, mutation_rate=10, random_seed=3)
     self.assertGreater(ts.get_num_mutations(), 10)
     return ts
示例#36
0
                              source=0,
                              destination=36,
                              proportion=1)
    ],

    #t 45k: Migrate lineages 36 > 37: HG and basal Eurasians merge
    [
        msprime.MassMigration(time=45000,
                              source=36,
                              destination=37,
                              proportion=1)
    ]
]

demog = [item for sublist in demog_list for item in sublist]

demog = [item for sublist in demog_list for item in sublist]

ts = msprime.simulate(Ne=10000,
                      population_configurations=population_configurations,
                      migration_matrix=mig_mat,
                      mutation_rate=args.mu,
                      recombination_rate=args.rho,
                      length=args.length,
                      demographic_events=demog)

print("writing genotype to vcf file")

with open(args.outpre + "_chr" + args.chr + ".vcf", "w") as vcf_file:
    ts.write_vcf(vcf_file, ploidy=2, contig_id=args.chr)
def out_of_africa(nbp,seed,samsize):
    '''
    Gutenkunst et al. out-of-Africa model
    :return:
    '''
    # First we set out the maximum likelihood values of the various parameters
    # given in Table 1.
    N_A = 7300
    N_B = 2100
    N_AF = 12300
    N_EU0 = 1000
    N_AS0 = 510
    # Times are provided in years, so we convert into generations.
    generation_time = 25
    T_AF = 220e3 / generation_time
    T_B = 140e3 / generation_time
    T_EU_AS = 21.2e3 / generation_time
    # We need to work out the starting (diploid) population sizes based on
    # the growth rates provided for these two populations
    r_EU = 0.004
    r_AS = 0.0055
    N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS)
    N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS)
    # Migration rates during the various epochs.
    m_AF_B = 25e-5
    m_AF_EU = 3e-5
    m_AF_AS = 1.9e-5
    m_EU_AS = 9.6e-5
    # Population IDs correspond to their indexes in the population
    # configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB
    # initially.
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=0, initial_size=N_AF),
        msprime.PopulationConfiguration(
            #sample_size=1, initial_size=N_EU, growth_rate=r_EU),
            sample_size = 0, initial_size = N_EU, growth_rate = r_EU),
        msprime.PopulationConfiguration(
            #sample_size=1, initial_size=N_AS, growth_rate=r_AS)
            sample_size = samsize, initial_size = N_AS, growth_rate = r_AS)
    ]
    migration_matrix = [
        [      0, m_AF_EU, m_AF_AS],
        [m_AF_EU,       0, m_EU_AS],
        [m_AF_AS, m_EU_AS,       0],
    ]
    demographic_events = [
        # CEU and CHB merge into B with rate changes at T_EU_AS
        msprime.MassMigration(
            time=T_EU_AS, source=2, destination=1, proportion=1.0),
        msprime.MigrationRateChange(time=T_EU_AS, rate=0),
        msprime.MigrationRateChange(
            time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)),
        msprime.MigrationRateChange(
            time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)),
        msprime.PopulationParametersChange(
            time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1),
        # Population B merges into YRI at T_B
        msprime.MassMigration(
            time=T_B, source=1, destination=0, proportion=1.0),
        # Size changes to N_A at T_AF
        msprime.PopulationParametersChange(
            time=T_AF, initial_size=N_A, population_id=0)
    ]
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    # dd = msprime.DemographyDebugger(
    #     population_configurations=population_configurations,
    #     migration_matrix=migration_matrix,
    #     demographic_events=demographic_events)
    # dd.print_history()

    scale = 100
    rho = 1e-8 / scale
    mu = 1.25e-8 / scale
    treeseq = msprime.simulate(population_configurations=population_configurations, migration_matrix=migration_matrix, \
                               demographic_events=demographic_events, length=nbp, recombination_rate=rho, \
                               mutation_rate=mu, random_seed=seed)
    with sys.stdout as vcffile:
        treeseq.write_vcf(vcffile, 2)
示例#38
0
 def test_many_trees_all_nodes(self):
     ts = msprime.simulate(6, length=4, recombination_rate=2, random_seed=1)
     S = [np.arange(ts.num_nodes, dtype=np.int32)]
     self.verify(ts, S, np.arange(ts.num_nodes, dtype=np.int32))
示例#39
0
 def test_many_trees_all_sample_sets(self):
     ts = msprime.simulate(6, recombination_rate=2, random_seed=1)
     self.assertGreater(ts.num_trees, 2)
     for S in set_partitions(list(range(ts.num_samples))):
         self.verify(ts, S)
示例#40
0
 def test_single_tree_all_sample_sets(self):
     ts = msprime.simulate(6, random_seed=1)
     for S in set_partitions(list(range(ts.num_samples))):
         self.verify(ts, S)
示例#41
0
 def test_single_tree_partial_samples(self):
     ts = msprime.simulate(6, random_seed=1)
     S = [range(3), range(3, 4)]
     self.verify(ts, S)
示例#42
0
 def test_single_tree_all_nodes(self):
     ts = msprime.simulate(10, random_seed=1)
     S = [np.arange(ts.num_nodes, dtype=np.int32)]
     self.verify(ts, S, np.arange(ts.num_nodes, dtype=np.int32))
示例#43
0
 def f(n, m, r):
     return msprime.simulate(
         sample_size=n, length=m, recombination_rate=r)
示例#44
0
 def test_simple_recombination(self):
     ts = msprime.simulate(10,
                           recombination_rate=0.1,
                           random_seed=1,
                           end_time=0.5)
     self.verify(ts)
示例#45
0
    def run(self, ngens):
        L = 1
        if self.num_loci is not None:
            L = self.num_loci
        tables = tskit.TableCollection(sequence_length=L)
        tables.populations.add_row()
        if self.deep_history:
            # initial population
            init_ts = msprime.simulate(self.N,
                                       recombination_rate=1.0,
                                       length=L,
                                       random_seed=self.seed)
            init_tables = init_ts.dump_tables()
            flags = init_tables.nodes.flags
            if not self.initial_generation_samples:
                flags = np.zeros_like(init_tables.nodes.flags)
            tables.nodes.set_columns(time=init_tables.nodes.time + ngens,
                                     flags=flags)
            tables.edges.set_columns(
                left=init_tables.edges.left,
                right=init_tables.edges.right,
                parent=init_tables.edges.parent,
                child=init_tables.edges.child,
            )
        else:
            flags = 0
            if self.initial_generation_samples:
                flags = tskit.NODE_IS_SAMPLE
            for _ in range(self.N):
                tables.nodes.add_row(flags=flags, time=ngens, population=0)

        pop = list(range(self.N))
        for t in range(ngens - 1, -1, -1):
            if self.debug:
                print("t:", t)
                print("pop:", pop)

            dead = [self.rng.random() > self.survival for k in pop]
            # sample these first so that all parents are from the previous gen
            new_parents = [(self.rng.choice(pop), self.rng.choice(pop))
                           for k in range(sum(dead))]
            k = 0
            if self.debug:
                print("Replacing", sum(dead), "individuals.")
            for j in range(self.N):
                if dead[j]:
                    # this is: offspring ID, lparent, rparent, breakpoint
                    offspring = len(tables.nodes)
                    tables.nodes.add_row(time=t, population=0)
                    lparent, rparent = new_parents[k]
                    k += 1
                    bp = self.random_breakpoint()
                    if self.debug:
                        print("--->", offspring, lparent, rparent, bp)
                    pop[j] = offspring
                    if bp > 0.0:
                        tables.edges.add_row(left=0.0,
                                             right=bp,
                                             parent=lparent,
                                             child=offspring)
                    if bp < L:
                        tables.edges.add_row(left=bp,
                                             right=L,
                                             parent=rparent,
                                             child=offspring)

        if self.debug:
            print("Done! Final pop:")
            print(pop)
        flags = tables.nodes.flags
        flags[pop] = tskit.NODE_IS_SAMPLE
        tables.nodes.set_columns(flags=flags,
                                 time=tables.nodes.time,
                                 population=tables.nodes.population)
        return tables
示例#46
0
 def test_no_recombination(self):
     ts = msprime.simulate(10, random_seed=2, end_time=0.5)
     self.verify(ts)
示例#47
0
 def test_many_trees_infinite_sites(self):
     ts = msprime.simulate(6, recombination_rate=2, mutation_rate=2, random_seed=1)
     self.assertGreater(ts.num_sites, 0)
     self.assertGreater(ts.num_trees, 2)
     self.verify(ts)
示例#48
0
 def test_no_recombination_time_zero(self):
     ts = msprime.simulate(10, random_seed=3, end_time=0.0)
     self.verify(ts)
示例#49
0
 def test_no_mutations(self):
     ts = msprime.simulate(10)
     self.assertEqual(ts.get_num_mutations(), 0)
     variants = list(ts.variants())
     self.assertEqual(len(variants), 0)
示例#50
0
 def test_single_tree_simulated_mutations(self):
     ts = msprime.simulate(20, mutation_rate=10, random_seed=15)
     ts = tsutil.subsample_sites(ts, self.num_test_sites)
     self.verify_matrix(ts)
     self.verify_max_distance(ts)
示例#51
0
 def test_fails_deletion_mutations(self):
     ts = msprime.simulate(10, random_seed=2)
     tables = ts.tables
     tables.sites.add_row(0, "")
     tsp = tables.tree_sequence()
     self.assertRaises(TypeError, list, tsp.haplotypes())
示例#52
0
 def test_single_tree_regular_mutations(self):
     ts = msprime.simulate(self.num_test_sites, length=self.num_test_sites)
     ts = tsutil.insert_branch_mutations(ts)
     # We don't support back mutations, so this should fail.
     self.assertRaises(_tskit.LibraryError, self.verify_matrix, ts)
     self.assertRaises(_tskit.LibraryError, self.verify_max_distance, ts)
示例#53
0
 def test_bug_instance2(self):
     ts = msprime.simulate(10,
                           recombination_rate=10,
                           end_time=1.0,
                           random_seed=61)
     self.verify(ts)
示例#54
0
 def test_mutation_generator_unsupported(self):
     n = 10
     mutgen = msprime.MutationGenerator(msprime.RandomGenerator(1), 1)
     with self.assertRaises(ValueError):
         msprime.simulate(n, mutation_generator=mutgen)
示例#55
0
 def test_no_sites(self):
     ts = msprime.simulate(12, random_seed=3)
     self.assertEqual(ts.num_sites, 0)
     self.verify(ts, 3, random_seed=7)
示例#56
0
 def test_start_time_less_than_zero(self):
     base_ts = self.get_example_base()
     with self.assertRaises(ValueError):
         msprime.simulate(from_ts=base_ts, start_time=-1)
示例#57
0
 def test_large_recombination(self):
     ts = msprime.simulate(15,
                           recombination_rate=1.0,
                           random_seed=2,
                           end_time=0.25)
     self.verify(ts)
示例#58
0
 def test_start_time_less_than_base_nodes(self):
     base_ts = self.get_example_base()
     max_time = max(node.time for node in base_ts.nodes())
     for x in [0, max_time - 1, max_time - 1e-6]:
         with self.assertRaises(_msprime.InputError):
             msprime.simulate(from_ts=base_ts, start_time=x)
示例#59
0
 def test_single_tree_internal_reference_sets(self):
     ts = msprime.simulate(10, random_seed=1)
     tree = ts.first()
     S = [[u] for u in tree.children(tree.root)]
     self.verify(ts, S, ts.samples())
示例#60
0
 def test_numpy_random_seed(self):
     seed = np.array([12345], dtype=np.int64)[0]
     self.assertEqual(seed.dtype, np.int64)
     ts1 = msprime.simulate(10, random_seed=seed)
     ts2 = msprime.simulate(10, random_seed=seed)
     self.assertEqual(ts1.tables.nodes, ts2.tables.nodes)