示例#1
0
def test_diversity__windowed(sample_size):
    ts = simulate_ts(sample_size, length=200)
    ds = ts_to_dataset(ts)  # type: ignore[no-untyped-call]
    ds, subsets = add_cohorts(
        ds, ts, cohort_key_names=["cohorts"])  # type: ignore[no-untyped-call]
    ds = window(ds, size=25)
    ds = diversity(ds)
    div = ds["stat_diversity"].sel(cohorts="co_0").compute()

    # Calculate diversity using tskit windows
    # Find the variant positions so we can have windows with a fixed number of variants
    positions = ts.tables.sites.position
    windows = np.concatenate(([0], positions[::25][1:], [ts.sequence_length]))
    ts_div = ts.diversity(windows=windows, span_normalise=False)
    np.testing.assert_allclose(div, ts_div)

    # Calculate diversity using scikit-allel moving_statistic
    # (Don't use windowed_diversity, since it treats the last window differently)
    ds = count_variant_alleles(
        ts_to_dataset(ts))  # type: ignore[no-untyped-call]
    ac = ds["variant_allele_count"].values
    mpd = allel.mean_pairwise_difference(ac, fill=0)
    ska_div = allel.moving_statistic(mpd, np.sum, size=25)
    np.testing.assert_allclose(
        div[:-1], ska_div)  # scikit-allel has final window missing
示例#2
0
def test_diversity(sample_size, chunks, cohort_allele_count):
    ts = simulate_ts(sample_size)
    ds = ts_to_dataset(ts, chunks)
    ds, subsets = add_cohorts(ds, ts, cohort_key_names=["cohorts"])
    if cohort_allele_count is not None:
        ds = count_cohort_alleles(ds, merge=False).rename(
            {variables.cohort_allele_count: cohort_allele_count})
        ds = ds.assign_coords({"cohorts": ["co_0"]})
        ds = diversity(ds, cohort_allele_count=cohort_allele_count)
    else:
        ds = ds.assign_coords({"cohorts": ["co_0"]})
        ds = diversity(ds)

    div = ds.stat_diversity.sum(axis=0,
                                skipna=False).sel(cohorts="co_0").values
    ts_div = ts.diversity(span_normalise=False)
    np.testing.assert_allclose(div, ts_div)
示例#3
0
def test_diversity(sample_size, chunks, cohort_allele_count):
    ts = msprime.simulate(sample_size,
                          length=100,
                          mutation_rate=0.05,
                          random_seed=42)
    ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
    ds, subsets = add_cohorts(
        ds, ts, cohort_key_names=["cohorts"])  # type: ignore[no-untyped-call]
    if cohort_allele_count is not None:
        ds = count_cohort_alleles(ds, merge=False).rename(
            {variables.cohort_allele_count: cohort_allele_count})
        ds = ds.assign_coords({"cohorts": ["co_0"]})
        ds = diversity(ds, cohort_allele_count=cohort_allele_count)
    else:
        ds = ds.assign_coords({"cohorts": ["co_0"]})
        ds = diversity(ds)

    div = ds.stat_diversity.sum(axis=0,
                                skipna=False).sel(cohorts="co_0").values
    ts_div = ts.diversity(span_normalise=False)
    np.testing.assert_allclose(div, ts_div)
示例#4
0
def test_diversity__missing_call_genotype():
    ds = xr.Dataset()
    with pytest.raises(ValueError, match="call_genotype not present"):
        diversity(ds)