Пример #1
0
def test_count_cohort_alleles__chunked():
    rs = np.random.RandomState(0)
    calls = rs.randint(0, 1, size=(50, 10, 2))
    ds = get_dataset(calls)
    sample_cohort = np.repeat([0, 1], ds.dims["samples"] // 2)
    ds["sample_cohort"] = xr.DataArray(sample_cohort, dims="samples")
    ac1 = count_cohort_alleles(ds)
    # Coerce from numpy to multiple chunks in variants dimension only
    ds["call_genotype"] = ds["call_genotype"].chunk(chunks=(5, -1, -1))
    ac2 = count_cohort_alleles(ds)
    assert isinstance(ac2["cohort_allele_count"].data, da.Array)
    xr.testing.assert_equal(ac1, ac2)
Пример #2
0
def test_cohort_allele_frequencies__polyploid(chunks):
    ds = get_dataset(
        [
            [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, -2, -2]],
            [[0, 0, 0, 0], [0, 1, 0, 0], [0, 1, -2, -2]],
            [[1, 1, 0, 1], [1, 0, 0, 0], [2, 2, -2, -2]],
            [[1, -1, 1, 1], [-1, -1, 0, 0], [0, 0, -2, -2]],
        ],
        n_ploidy=4,
        n_allele=3,
    )
    ds["sample_cohort"] = "samples", [0, 1, 0]
    ds = count_cohort_alleles(ds).compute()
    ds["cohort_allele_count"] = ds["cohort_allele_count"].chunk(chunks)
    ds = cohort_allele_frequencies(ds)
    af = ds["cohort_allele_frequency"]
    np.testing.assert_equal(
        af,
        np.array([
            [[1.0, 0.0, 0.0], [1.0, 0.0, 0.0]],
            [[5 / 6, 1 / 6, 0.0], [0.75, 0.25, 0.0]],
            [[1 / 6, 0.5, 1 / 3], [0.75, 0.25, 0.0]],
            [[0.4, 0.6, 0.0], [1.0, 0.0, 0.0]],
        ]),
    )
Пример #3
0
def test_count_cohort_alleles__multi_variant_multi_sample():
    ds = get_dataset([
        [[0, 0], [0, 0], [0, 0], [0, 0]],
        [[0, 0], [0, 0], [0, 1], [0, 1]],
        [[1, 1], [0, 1], [1, 0], [1, 0]],
        [[1, 1], [1, 1], [1, 1], [1, 1]],
    ])
    # -1 means that the sample is not in any cohort
    ds["sample_cohort"] = xr.DataArray(np.array([0, 1, 1, -1]), dims="samples")
    ds = count_cohort_alleles(ds)
    ac = ds.cohort_allele_count
    np.testing.assert_equal(
        ac,
        np.array([[[2, 0], [4, 0]], [[2, 0], [3, 1]], [[0, 2], [2, 2]],
                  [[0, 2], [0, 4]]]),
    )
Пример #4
0
def test_cohort_allele_frequencies__diploid(chunks):
    ds = get_dataset([
        [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
        [[0, 0], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0]],
        [[1, 1], [0, 1], [1, 0], [0, 0], [0, 0], [1, 1]],
        [[1, -1], [1, 1], [-1, -1], [0, 0], [0, 0], [-1, -1]],
    ])
    ds["sample_cohort"] = "samples", [1, 0, 0, 1, -1, 1]
    ds = count_cohort_alleles(ds).compute()
    ds["cohort_allele_count"] = ds["cohort_allele_count"].chunk(chunks)
    ds = cohort_allele_frequencies(ds)
    af = ds["cohort_allele_frequency"]
    np.testing.assert_equal(
        af,
        np.array([
            [[1.0, 0.0], [1.0, 0.0]],
            [[0.75, 0.25], [1.0, 0.0]],
            [[0.5, 0.5], [1 / 3, 2 / 3]],
            [[0.0, 1.0], [2 / 3, 1 / 3]],
        ]),
    )