Пример #1
0
def test_observed_heterozygosity__scikit_allel_comparison(
        n_variant, n_sample, missing_pct, window_size, seed):
    ds = simulate_genotype_call_dataset(
        n_variant=n_variant,
        n_sample=n_sample,
        n_ploidy=2,
        missing_pct=missing_pct,
        seed=seed,
    )
    ds["sample_cohort"] = (
        ["samples"],
        np.zeros(n_sample, int),
    )
    ds = window(ds, size=window_size)
    ho_sg = observed_heterozygosity(ds)["stat_observed_heterozygosity"].values
    if n_sample % window_size:
        # scikit-allel will drop the ragged end
        ho_sg = ho_sg[0:-1]
    # calculate with scikit-allel
    ho_sa = allel.moving_statistic(
        allel.heterozygosity_observed(ds["call_genotype"]),
        np.sum,
        size=window_size,
    )
    # add cohort dimension to scikit-allel result
    np.testing.assert_almost_equal(ho_sg, ho_sa[..., None])
Пример #2
0
def test_observed_heterozygosity(chunks):
    ds = simulate_genotype_call_dataset(
        n_variant=4,
        n_sample=6,
        n_ploidy=4,
    )
    ds["call_genotype"] = (
        ["variants", "samples", "ploidy"],
        da.asarray([
            [
                [0, 0, 0, 0],
                [0, 0, 0, 0],
                [1, 1, 1, 1],
                [1, 1, 1, 1],
                [0, 0, 0, 0],
                [1, 1, 1, 1],
            ],
            [
                [0, 0, 0, 0],
                [0, 0, 0, 1],
                [0, 0, 1, 1],
                [0, 0, 1, 1],
                [1, 0, 1, 0],
                [0, 1, 0, 1],
            ],
            [
                [0, 0, 0, 0],
                [0, 0, 0, 0],
                [0, 1, 2, 3],
                [0, 1, 2, 3],
                [0, 1, 2, 3],
                [4, 5, 6, 7],
            ],
            [
                [0, 0, -1, -1],
                [0, 1, -1, -1],
                [0, 0, 1, 1],
                [-1, -1, -1, -1],
                [0, -1, -1, -1],
                [-1, -1, -1, -1],
            ],
        ]).rechunk(chunks),
    )
    ds.call_genotype_mask.values = ds.call_genotype < 0
    ds["sample_cohort"] = (
        ["samples"],
        da.asarray([0, 0, 1, 1, 2, 2]).rechunk(chunks[1]),
    )
    ho = observed_heterozygosity(ds)["stat_observed_heterozygosity"]
    np.testing.assert_almost_equal(
        ho,
        np.array([
            [0, 0, 0],
            [1 / 4, 2 / 3, 2 / 3],
            [0, 1, 1],
            [1 / 2, 4 / 6, np.nan],
        ]),
    )
Пример #3
0
def test_observed_heterozygosity__windowed(chunks, cohorts, expectation):
    ds = simulate_genotype_call_dataset(
        n_variant=4,
        n_sample=6,
        n_ploidy=4,
    )
    ds["call_genotype"] = (
        ["variants", "samples", "ploidy"],
        da.asarray([
            [
                [0, 0, 0, 0],
                [0, 0, 0, 0],
                [1, 1, 1, 1],
                [1, 1, 1, 1],
                [0, 0, 0, 0],
                [1, 1, 1, 1],
            ],
            [
                [0, 0, 0, 0],
                [0, 0, 0, 1],
                [0, 0, 1, 1],
                [0, 0, 1, 1],
                [1, 0, 1, 0],
                [0, 1, 0, 1],
            ],
            [
                [0, 0, 0, 0],
                [0, 0, 0, 0],
                [0, 1, 2, 3],
                [0, 1, 2, 3],
                [0, 1, 2, 3],
                [4, 5, 6, 7],
            ],
            [
                [0, 0, -1, -1],
                [0, 1, -1, -1],
                [0, 0, 1, 1],
                [-1, -1, -1, -1],
                [0, -1, -1, -1],
                [-1, -1, -1, -1],
            ],
        ]).rechunk(chunks),
    )
    ds.call_genotype_mask.values = ds.call_genotype < 0
    ds["sample_cohort"] = (
        ["samples"],
        da.asarray(cohorts).rechunk(chunks[1]),
    )
    ds = window(ds, size=2)
    ho = observed_heterozygosity(ds)["stat_observed_heterozygosity"]
    np.testing.assert_almost_equal(
        ho,
        np.array(expectation),
    )