Пример #1
0
def test_identity_by_state__chunked_sample_dimension():
    ds = simulate_genotype_call_dataset(n_variant=20, n_sample=10, n_ploidy=2)
    ds["call_genotype"] = ds.call_genotype.dims, da.asarray(
        ds.call_genotype.data,
        chunks=((20, ), (5, 5), (2, )),
    )
    with pytest.raises(
            NotImplementedError,
            match=
            "identity_by_state does not support chunking in the samples dimension",
    ):
        identity_by_state(ds)
Пример #2
0
def test_identity_by_state__tetraploid_multiallelic(chunks):
    ds = simulate_genotype_call_dataset(
        n_variant=2,
        n_sample=3,
        n_ploidy=4,
        n_allele=3,
        seed=0,
    )
    ds = count_call_alleles(ds)
    ds.call_genotype.data[0, 2] = -1  # null call
    if chunks is not None:
        ds["call_allele_count"] = (
            ds.call_allele_count.dims,
            ds.call_allele_count.data.rechunk(chunks),
        )
    ds = identity_by_state(ds)
    actual = ds.stat_identity_by_state.values
    expect = np.nanmean(
        np.array([
            [
                [0.5, 0.375, np.nan],
                [0.375, 0.375, np.nan],
                [np.nan, np.nan, np.nan],
            ],
            [[1.0, 0.25, 0.0], [0.25, 0.625, 0.1875], [0.0, 0.1875, 0.625]],
        ]),
        axis=0,
    )
    np.testing.assert_array_equal(expect, actual)
Пример #3
0
def test_identity_by_state__reference_implementation(ploidy, chunks, seed):
    ds = simulate_genotype_call_dataset(
        n_variant=sum(chunks[0]),
        n_sample=sum(chunks[1]),
        n_ploidy=ploidy,
        n_allele=sum(chunks[2]),
        missing_pct=0.2,
        seed=seed,
    )
    ds = count_call_alleles(ds)
    ds["call_allele_count"] = (
        ds.call_allele_count.dims,
        ds.call_allele_count.data.rechunk(chunks),
    )
    ds = identity_by_state(ds)
    actual = ds.stat_identity_by_state.values
    # reference implementation
    AF = ds.call_allele_frequency.data
    expect = np.nanmean(
        (AF[..., None, :, :] * AF[..., :, None, :]).sum(axis=-1),
        axis=0).compute()
    np.testing.assert_array_almost_equal(expect, actual)
Пример #4
0
def test_identity_by_state__diploid_biallelic(chunks):
    ds = simulate_genotype_call_dataset(
        n_variant=2,
        n_sample=3,
        n_ploidy=2,
        n_allele=2,
        seed=2,
    )
    ds = count_call_alleles(ds)
    if chunks is not None:
        ds["call_allele_count"] = (
            ds.call_allele_count.dims,
            ds.call_allele_count.data.rechunk(chunks),
        )
    ds = identity_by_state(ds)
    actual = ds.stat_identity_by_state.values
    expect = np.nanmean(
        np.array([
            [[1.0, 0.0, 0.5], [0.0, 1.0, 0.5], [0.5, 0.5, 0.5]],
            [[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [0.5, 0.5, 0.5]],
        ]),
        axis=0,
    )
    np.testing.assert_array_equal(expect, actual)