Python LdCalculator примеры, msprime.LdCalculator Python примеры использования

Пример #1

0

Показать файл

 def verify_max_distance(self, ts):
     """
     Verifies that the max_distance parameter works as expected.
     """
     mutations = list(ts.mutations())
     ldc = msprime.LdCalculator(ts)
     A = ldc.get_r2_matrix()
     j = len(mutations) // 2
     for k in range(j):
         x = mutations[j + k].position - mutations[j].position
         a = ldc.get_r2_array(j, max_distance=x)
         self.assertEqual(a.shape[0], k)
         self.assertTrue(np.allclose(A[j, j + 1:j + 1 + k], a))
         x = mutations[j].position - mutations[j - k].position
         a = ldc.get_r2_array(j, max_distance=x, direction=msprime.REVERSE)
         self.assertEqual(a.shape[0], k)
         self.assertTrue(np.allclose(A[j, j - k:j], a[::-1]))
     L = ts.get_sequence_length()
     m = len(mutations)
     a = ldc.get_r2_array(0, max_distance=L)
     self.assertEqual(a.shape[0], m - 1)
     self.assertTrue(np.allclose(A[0, 1:], a))
     a = ldc.get_r2_array(m - 1, max_distance=L, direction=msprime.REVERSE)
     self.assertEqual(a.shape[0], m - 1)
     self.assertTrue(np.allclose(A[m - 1, :-1], a[::-1]))

Пример #2

0

Показать файл

Файл: simNe_functions_simulate.py Проект: rwaples/simNe

def get_msprime_ld(sim_data):
    ld_calc = msprime.LdCalculator(sim_data)
    A = ld_calc.get_r2_matrix()
    plt.imshow(A, interpolation="none", vmin=0, vmax=1, cmap="Blues")
    plt.xticks([])
    plt.yticks([])
    plt.show()

Пример #3

0

Показать файл

    def verify_matrix(self, ts):
        m = ts.get_num_sites()
        ldc = msprime.LdCalculator(ts)
        A = ldc.get_r2_matrix()
        self.assertEqual(A.shape, (m, m))
        B = get_r2_matrix(ts)
        self.assertTrue(np.allclose(A, B))

        # Now look at each row in turn, and verify it's the same
        # when we use get_r2 directly.
        for j in range(m):
            a = ldc.get_r2_array(j, direction=msprime.FORWARD)
            b = A[j, j + 1:]
            self.assertEqual(a.shape[0], m - j - 1)
            self.assertEqual(b.shape[0], m - j - 1)
            self.assertTrue(np.allclose(a, b))
            a = ldc.get_r2_array(j, direction=msprime.REVERSE)
            b = A[j, :j]
            self.assertEqual(a.shape[0], j)
            self.assertEqual(b.shape[0], j)
            self.assertTrue(np.allclose(a[::-1], b))

        # Now check every cell in the matrix in turn.
        for j in range(m):
            for k in range(m):
                self.assertAlmostEqual(ldc.get_r2(j, k), A[j, k])

Пример #4

0

Показать файл

def clump_variants(simulation, summary_stats, nhaps, r2_threshold,
                   window_size):
    """
    perform variant clumping in a greedy fasion with p-value and r2 threshold in windows
    return only those variants meeting some nominal threshold
    
    1: make a dict of pos -> variant for subset of sites meeting criteria
    2: make an r2 dict of all pairs of snps meeting p-value threshold and in same window
    """
    # make a list of SNPs ordered by p-value
    eprint('Subsetting variants to usable list' + current_time())
    usable_positions = {}  # position -> variant (simulation indices)

    sim_pos_index = {}
    for variant in tqdm(simulation.variants(),
                        total=simulation.get_num_mutations()):
        if variant.position in summary_stats:
            usable_positions[variant.position] = variant
            sim_pos_index[variant.position] = variant.index

    # order all snps by p-value
    ordered_positions = sorted(summary_stats.keys(),
                               key=lambda x: summary_stats[x][-1])
    #[(x, (x in usable_positions.keys())) for x in ordered_positions]

    eur_subset = simulation.subset(range(nhaps[0], (nhaps[0] + nhaps[1])))
    eur_index_pos = {}
    eur_pos_index = {}
    for mutation in tqdm(eur_subset.mutations(),
                         total=eur_subset.get_num_mutations()):
        eur_index_pos[mutation.index] = mutation.position
        eur_pos_index[mutation.position] = mutation.index
    ordered_eur_index = sorted(eur_index_pos.keys())
    ld_calc = msprime.LdCalculator(eur_subset)
    #ld_calc = msprime.LdCalculator(simulation)

    # compute LD and prune in order of significance (popping index of SNPs)
    for position in ordered_positions:
        if position in usable_positions:
            r2_forward = ld_calc.get_r2_array(eur_pos_index[position],
                                              direction=msprime.FORWARD,
                                              max_distance=125e3)
            #print([position, np.where(r2_forward > r2_threshold)[0], np.where(r2_reverse > r2_threshold)[0]])
            for i in np.where(r2_forward > r2_threshold)[0]:
                usable_positions.pop(
                    eur_index_pos[eur_pos_index[position] + i + 1],
                    None)  #identify next position in eur space
            r2_reverse = ld_calc.get_r2_array(eur_pos_index[position],
                                              direction=msprime.REVERSE,
                                              max_distance=125e3)
            for i in np.where(r2_reverse > r2_threshold)[0]:
                usable_positions.pop(
                    eur_index_pos[eur_pos_index[position] - i - 1], None)

    clumped_snps = set(usable_positions.keys())

    eprint('Starting SNPs: ' + str(len(ordered_positions)) +
           '; SNPs after clumping: ' + str(len(clumped_snps)) + current_time())

    return (clumped_snps, usable_positions)

Пример #5

0

Показать файл

    def pos_r2(ts):
        """Obtain vectors of position differences and r^2 per pair of sites.

        Arguments
        ---------
        ts : msprime.TreeSequence
            tree sequence object

        Returns
        -------
        pos_diff : np.array
            position difference for pairs of snps

        r2 : np.array
            r^2 as computed between the different sites

        """
        ld_calc = msp.LdCalculator(ts)
        r2_est = ld_calc.r2_matrix()
        # Computing positions and indices
        pos = np.array([s.position for s in ts.sites()], dtype=np.float32)
        n_sites = ts.num_sites
        pos_diff_mat = np.zeros(shape=(n_sites, n_sites), dtype=np.float32)
        #       print(r2_est.shape, pos_diff_mat.shape)
        for i in np.arange(len(pos)):
            for j in np.arange(i):
                # Calculating the absolute difference in position
                pos_diff_mat[i, j] = np.abs(pos[i] - pos[j])

        # Extract entries that matter (and are matched)
        r2 = r2_est[pos_diff_mat > 0]
        pos_diff = pos_diff_mat[pos_diff_mat > 0]
        return (pos_diff, r2)

Пример #6

0

Показать файл

    def _pos_r2(ts):
        """Obtain vectors of position differences and r^2 per pair of sites.

        Arguments
        ---------
        ts : msprime.TreeSequence
            tree sequence object

        Returns
        -------
        pos_diff : np.array
            position difference for pairs of snps

        r2 : np.array
            r^2 as computed between the different sites

        """
        ld_calc = msp.LdCalculator(ts)
        r2_est = ld_calc.r2_matrix()
        # Computing positions and indices
        pos = np.array([s.position for s in ts.sites()], dtype=np.float32)
        pos_diff_mat = np.zeros(shape=(pos.shape[0], pos.shape[0]), dtype=np.float32)
        for i in np.arange(len(pos)):
            for j in np.arange(i):
                # Calculating the absolute difference in position
                pos_diff_mat[i, j] = np.abs(pos[i] - pos[j])

        # Extract entries that matter (and are matched)
        r2 = r2_est[pos_diff_mat > 0]
        pos_diff = pos_diff_mat[pos_diff_mat > 0]

        # Set undefined values to be 1 (due to non-segregating issues...)
        r2[np.isnan(r2)] = 1.0
        return (pos_diff, r2)

Пример #7

0

Показать файл

 def test_deprecated_aliases(self):
     ts = msprime.simulate(20, mutation_rate=10, random_seed=15)
     ts = tsutil.subsample_sites(ts, self.num_test_sites)
     ldc = msprime.LdCalculator(ts)
     A = ldc.get_r2_matrix()
     B = ldc.r2_matrix()
     self.assertTrue(np.array_equal(A, B))
     a = ldc.get_r2_array(0)
     b = ldc.r2_array(0)
     self.assertTrue(np.array_equal(a, b))
     self.assertEqual(ldc.get_r2(0, 1), ldc.r2(0, 1))

Пример #8

0

Показать файл

Файл: simulations.py Проект: dortegadelv/analysis

 def thread_worker(thread_index):
     ld = msp.LdCalculator(ts)
     chunk_size = int(math.ceil(len(mask) / num_threads))
     nextSite = thread_index * chunk_size
     stop = nextSite + chunk_size
     while True:
         mask[nextSite] = True
         r2 = (ld.r2_array(nextSite) <= thresh)
         if nextSite > stop or len(r2) == 0 or not np.any(r2):
             break
         nextSite += (1 + np.argmax(r2))

Пример #9

0

Показать файл

 def verify_max_mutations(self, ts):
     """
     Verifies that the max mutations parameter works as expected.
     """
     mutations = list(ts.mutations())
     ldc = msprime.LdCalculator(ts)
     A = ldc.get_r2_matrix()
     j = len(mutations) // 2
     for k in range(j):
         a = ldc.get_r2_array(j, max_mutations=k)
         self.assertEqual(a.shape[0], k)
         self.assertTrue(np.allclose(A[j, j + 1:j + 1 + k], a))
         a = ldc.get_r2_array(j, max_mutations=k, direction=msprime.REVERSE)
         self.assertEqual(a.shape[0], k)
         self.assertTrue(np.allclose(A[j, j - k:j], a[::-1]))

Пример #10

0

Показать файл

 def thread_worker(thread_index):
     ld_calc = msprime.LdCalculator(tree_sequence)
     chunk_size = int(math.ceil(len(focal_mutations) / num_threads))
     start = thread_index * chunk_size
     for focal_mutation in focal_mutations[start: start + chunk_size]:
         a = ld_calc.get_r2_array(
             focal_mutation, max_distance=max_distance,
             direction=msprime.REVERSE)
         rev_indexes = focal_mutation - np.nonzero(a >= r2_threshold)[0] - 1
         a = ld_calc.get_r2_array(
             focal_mutation, max_distance=max_distance,
             direction=msprime.FORWARD)
         fwd_indexes = focal_mutation + np.nonzero(a >= r2_threshold)[0] + 1
         indexes = np.concatenate((rev_indexes[::-1], fwd_indexes))
         results[focal_mutation] = indexes
         progress_bar.update()

Пример #11

0

Показать файл

def ld_matrix_example():
    ts = msprime.simulate(100, recombination_rate=10, mutation_rate=20,
            random_seed=1)
    ld_calc = msprime.LdCalculator(ts)
    A = ld_calc.get_r2_matrix()
    # Now plot this matrix.
    x = A.shape[0] / pyplot.rcParams['savefig.dpi']
    x = max(x, pyplot.rcParams['figure.figsize'][0])
    fig, ax = pyplot.subplots(figsize=(x, x))
    fig.tight_layout(pad=0)
    im = ax.imshow(A, interpolation="none", vmin=0, vmax=1, cmap="Blues")
    ax.set_xticks([])
    ax.set_yticks([])
    for s in 'top', 'bottom', 'left', 'right':
        ax.spines[s].set_visible(False)
    pyplot.gcf().colorbar(im, shrink=.5, pad=0)
    pyplot.savefig("_static/ld.svg")

Пример #12

0

Показать файл

def two_bins(NA, N1, N2, Ts, M1, M2):
    NA = NA
    N1 = N1
    N2 = N2
    Ts = Ts
    M1 = M1
    M2 = M2

    population_configurations = [
        msprime.PopulationConfiguration(sample_size=0, initial_size=N1),
        msprime.PopulationConfiguration(sample_size=50, initial_size=N2)
    ]
    migration_matrix = [[0, M2], [0, 0]]
    demographic_events = [
        msprime.MigrationRateChange(time=Ts / 2, rate=M1, matrix_index=(0, 1)),
        #msprime.MigrationRateChange(time=Ts/2, rate=M1, matrix_index=(1, 0)),
        msprime.MassMigration(time=Ts, source=1, destination=0, proportion=1.0)
    ]

    #dp = msprime.DemographyDebugger(
    #    Ne=NA,
    #    population_configurations=population_configurations,
    #    migration_matrix=migration_matrix,
    #    demographic_events=demographic_events)
    #dp.print_history()

    replicates = 500000
    sim = msprime.simulate(Ne=NA,
                           population_configurations=population_configurations,
                           migration_matrix=migration_matrix,
                           demographic_events=demographic_events,
                           mutation_rate=1e-7,
                           recombination_rate=1e-8,
                           length=100000,
                           num_replicates=replicates)
    pi = np.zeros(replicates)
    seg = np.zeros(replicates)
    ld = np.zeros(replicates)
    for j, s in enumerate(sim):
        pi[j] = s.get_pairwise_diversity()
        seg[j] = s.get_num_mutations()
        ld[j] = np.var(msprime.LdCalculator(s).get_r2_matrix())

    #return(np.array([np.mean(pi),np.var(pi),np.mean(seg),np.var(seg)]))
    #return(np.array([np.var(pi),np.var(seg), np.var(ld)]))
    return (np.array([np.var(seg)]))

Пример #13

0

Показать файл