Exemplo n.º 1
0
def test_divergence__missing_calls():
    ds = get_dataset([
        [[0, 0], [-1, -1], [-1, -1]],  # all of cohort 1 calls are missing
    ])
    ds["sample_cohort"] = xr.DataArray(np.array([0, 1, 1]), dims="samples")
    ds = divergence(ds)
    np.testing.assert_equal(ds["stat_divergence"].values[0, 1], np.nan)
Exemplo n.º 2
0
def test_divergence__windowed(sample_size, n_cohorts, chunks):
    ts = msprime.simulate(sample_size,
                          length=200,
                          mutation_rate=0.05,
                          random_seed=42)
    ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
    ds, subsets = add_cohorts(ds, ts,
                              n_cohorts)  # type: ignore[no-untyped-call]
    ds = window(ds, size=25)
    ds = divergence(ds)
    div = ds["stat_divergence"].values
    # test off-diagonal entries, by replacing diagonal with NaNs
    div[:, np.arange(2), np.arange(2)] = np.nan

    # Calculate diversity using tskit windows
    # Find the variant positions so we can have windows with a fixed number of variants
    positions = ts.tables.sites.position
    windows = np.concatenate(([0], positions[::25][1:], [ts.sequence_length]))
    n_windows = len(windows) - 1
    ts_div = np.full([n_windows, n_cohorts, n_cohorts], np.nan)
    for i, j in itertools.combinations(range(n_cohorts), 2):
        ts_div[:, i, j] = ts.divergence([subsets[i], subsets[j]],
                                        windows=windows,
                                        span_normalise=False)
        ts_div[:, j, i] = ts_div[:, i, j]
    np.testing.assert_allclose(div, ts_div)
Exemplo n.º 3
0
def test_divergence__windowed_scikit_allel_comparison(sample_size, n_cohorts,
                                                      chunks):
    ts = simulate_ts(sample_size, length=200)
    ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
    ds, subsets = add_cohorts(ds, ts,
                              n_cohorts)  # type: ignore[no-untyped-call]
    ds = window(ds, size=25)
    ds = divergence(ds)
    div = ds["stat_divergence"].values
    # test off-diagonal entries, by replacing diagonal with NaNs
    div[:, np.arange(2), np.arange(2)] = np.nan

    # Calculate divergence using scikit-allel moving_statistic
    # (Don't use windowed_divergence, since it treats the last window differently)
    ds1 = count_variant_alleles(ts_to_dataset(
        ts, samples=ts.samples()[:1]))  # type: ignore[no-untyped-call]
    ds2 = count_variant_alleles(ts_to_dataset(
        ts, samples=ts.samples()[1:]))  # type: ignore[no-untyped-call]
    ac1 = ds1["variant_allele_count"].values
    ac2 = ds2["variant_allele_count"].values
    mpd = allel.mean_pairwise_difference_between(ac1, ac2, fill=0)
    ska_div = allel.moving_statistic(mpd, np.sum, size=25)  # noqa: F841
    # TODO: investigate why numbers are different
    np.testing.assert_allclose(
        div[:-1], ska_div)  # scikit-allel has final window missing
Exemplo n.º 4
0
def test_divergence(sample_size, n_cohorts, chunks):
    ts = msprime.simulate(sample_size,
                          length=100,
                          mutation_rate=0.05,
                          random_seed=42)
    ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
    ds, subsets = add_cohorts(ds, ts,
                              n_cohorts)  # type: ignore[no-untyped-call]
    ds = divergence(ds)
    div = ds.stat_divergence.sum(axis=0, skipna=False).values

    # entries on the diagonal are diversity values
    for i in range(n_cohorts):
        ts_div = ts.diversity([subsets[i]], span_normalise=False)
        np.testing.assert_allclose(div[i, i], ts_div)

    # test off-diagonal entries, by replacing diagonal with NaNs
    np.fill_diagonal(div, np.nan)
    ts_div = np.full([n_cohorts, n_cohorts], np.nan)
    for i, j in itertools.combinations(range(n_cohorts), 2):
        ts_div[i, j] = ts.divergence([subsets[i], subsets[j]],
                                     span_normalise=False)
        ts_div[j, i] = ts.divergence([subsets[j], subsets[i]],
                                     span_normalise=False)
    np.testing.assert_allclose(div, ts_div)