示例#1
0
def test_divergence__windowed_scikit_allel_comparison(sample_size, n_cohorts,
                                                      chunks):
    ts = simulate_ts(sample_size, length=200)
    ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
    ds, subsets = add_cohorts(ds, ts,
                              n_cohorts)  # type: ignore[no-untyped-call]
    ds = window(ds, size=25)
    ds = divergence(ds)
    div = ds["stat_divergence"].values
    # test off-diagonal entries, by replacing diagonal with NaNs
    div[:, np.arange(2), np.arange(2)] = np.nan

    # Calculate divergence using scikit-allel moving_statistic
    # (Don't use windowed_divergence, since it treats the last window differently)
    ds1 = count_variant_alleles(ts_to_dataset(
        ts, samples=ts.samples()[:1]))  # type: ignore[no-untyped-call]
    ds2 = count_variant_alleles(ts_to_dataset(
        ts, samples=ts.samples()[1:]))  # type: ignore[no-untyped-call]
    ac1 = ds1["variant_allele_count"].values
    ac2 = ds2["variant_allele_count"].values
    mpd = allel.mean_pairwise_difference_between(ac1, ac2, fill=0)
    ska_div = allel.moving_statistic(mpd, np.sum, size=25)  # noqa: F841
    # TODO: investigate why numbers are different
    np.testing.assert_allclose(
        div[:-1], ska_div)  # scikit-allel has final window missing
示例#2
0
    def test_mean_pairwise_divergence(self):

        # simplest case, two haplotypes in each population
        h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1],
                            [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2],
                            [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]])
        h1 = h.take([0, 1], axis=1)
        h2 = h.take([2, 3], axis=1)
        ac1 = h1.count_alleles()
        ac2 = h2.count_alleles()

        expect = [0 / 4, 2 / 4, 4 / 4, 2 / 4, 0 / 4, 4 / 4, 3 / 4, -1, -1]
        actual = allel.mean_pairwise_difference_between(ac1, ac2, fill=-1)
        aeq(expect, actual)
示例#3
0
    def test_pairwise_distance_multidim(self):
        g = GenotypeArray(
            [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]],
             [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]],
             [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]],
             [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]],
            dtype='i1')
        gac = g.to_allele_counts()

        def metric(ac1, ac2):
            mpd = allel.mean_pairwise_difference_between(ac1, ac2, fill=0)
            return mpd.sum()

        expect = [
            allel.mean_pairwise_difference_between(gac[:, 0],
                                                   gac[:, 1],
                                                   fill=0).sum()
        ]
        actual = allel.pairwise_distance(gac, metric)
        aeq(expect, actual)
示例#4
0
 def metric(ac1, ac2):
     mpd = allel.mean_pairwise_difference_between(ac1, ac2, fill=0)
     return mpd.sum()
示例#5
0
def RecombinationRepper(
        pooled_args):  #  provide r_rate, model_function, reps, samples

    r_rate = pooled_args[0]
    model_function = pooled_args[1]
    reps = pooled_args[2]
    samples = pooled_args[3]

    mean_Fst_dists = []
    var_Fst_dists = []
    mean_SE_dists = []
    mean_SE_dists_shuf = []
    tree_counts = []
    mean_Dxy_dists = []
    mean_Tajima_dists = []
    mean_diversity_dists = []
    mean_H12_dists = []

    for t in range(reps):
        print(t)

        new_tree = migration_simulation_2patch(r_rate)
        # Add mutations to the tree
        count = 0
        for r in new_tree.trees():
            count += 1


#		print(t, count)
        tree_counts.append(count)

        new_tree_dist_Fst = []
        new_tree_dist_SE = []
        new_tree_dist_SE_shuf = []
        new_tree_dist_Dxy = []
        new_tree_dist_diversity = []
        new_tree_dist_Tajima = []
        new_tree_dist_H12 = []

        for i in range(samples):  ## Repeat 100 times
            # Add mutations to the tree
            muts = 0
            while muts == 0:
                mutated_tree = msprime.mutate(new_tree, 1.25e-7)
                muts = len([v for v in mutated_tree.variants()])
            # Get the genotype matrix, ready for using sci-kit.allel
            msprime_genotype_matrix = mutated_tree.genotype_matrix()
            # Convert msprime's haplotype matrix into genotypes by randomly merging chromosomes
            haplotype_array = allel.HaplotypeArray(msprime_genotype_matrix)

            genotype_array = haplotype_array.to_genotypes(ploidy=2)

            shuffled_genotypes = shuffle(genotype_array, random_state=0)

            ac1 = haplotype_array.count_alleles(
                subpop=[s for s in range(0, 100)])
            ac2 = haplotype_array.count_alleles(
                subpop=[s for s in range(100, 200)])

            ## Calculate Tajima's D
            Tajimas_D = allel.tajima_d(ac1)

            ## Calculate Dxy
            dxy = sum(allel.mean_pairwise_difference_between(ac1,
                                                             ac2)) / 10000.

            ## Calculate Garud's H statistics for the population
            ## Grab the haplotypes for 400SNPs from deme 1
            hapslice = haplotype_array[:400, 0:100]
            H_vector = allel.garud_h(hapslice)

            ## Calculate Diversity
            pi = sum(allel.mean_pairwise_difference(ac1)) / 10000.

            subpopulations = [[p for p in range(0, 50)],
                              [z for z in range(50, 100)]]
            mean_fst = allel.average_weir_cockerham_fst(genotype_array,
                                                        blen=100,
                                                        subpops=subpopulations)
            mean_fst_shuf = allel.average_weir_cockerham_fst(
                shuffled_genotypes, blen=100, subpops=subpopulations)

            new_tree_dist_Fst.append(mean_fst[0])
            new_tree_dist_SE.append(mean_fst[1])
            new_tree_dist_SE_shuf.append(mean_fst_shuf[1])
            new_tree_dist_Tajima.append(Tajimas_D)
            new_tree_dist_Dxy.append(dxy)
            new_tree_dist_H12.append(H_vector[1])
            new_tree_dist_diversity.append(pi)

        mean_Fst_dists.append(np.mean(new_tree_dist_Fst))

        var_Fst_dists.append(np.sqrt(np.var(new_tree_dist_Fst)))

        mean_SE_dists.append(np.mean(new_tree_dist_SE))

        mean_SE_dists_shuf.append(np.mean(new_tree_dist_SE_shuf))

        mean_Dxy_dists.append(np.mean(new_tree_dist_Dxy))

        mean_Tajima_dists.append(np.mean(new_tree_dist_Tajima))

        mean_H12_dists.append(np.mean(new_tree_dist_H12))

        mean_diversity_dists.append(np.mean(new_tree_dist_diversity))

    return [
        r_rate, mean_Fst_dists, mean_SE_dists, mean_SE_dists_shuf,
        var_Fst_dists, tree_counts, mean_Dxy_dists, mean_Tajima_dists,
        mean_diversity_dists, mean_H12_dists
    ]