Exemplo n.º 1
0
def test_read_bgen_fancy_index(shared_datadir, chunks):
    path = shared_datadir / "example.bgen"
    ds = read_bgen(path, chunks=chunks)
    npt.assert_almost_equal(
        ds["call_genotype_probability"][INDEXES, 0], EXPECTED_PROBABILITIES, decimal=3
    )
    npt.assert_almost_equal(ds["call_dosage"][INDEXES, 0], EXPECTED_DOSAGES, decimal=3)
Exemplo n.º 2
0
def _rechunk_bgen(shared_datadir: Path, tmp_path: Path,
                  **kwargs: Any) -> Tuple[xr.Dataset, xr.Dataset, str]:
    path = shared_datadir / "example.bgen"
    ds = read_bgen(path, chunks=(10, -1, -1))
    store = tmp_path / "example.zarr"
    dsr = rechunk_bgen(ds, store, **kwargs)
    return ds, dsr, str(store)
Exemplo n.º 3
0
def test_read_bgen_with_sample_file(shared_datadir):
    # The example file was generated using
    # qctool -g sgkit_bgen/tests/data/example.bgen -og sgkit_bgen/tests/data/example-separate-samples.bgen -os sgkit_bgen/tests/data/example-separate-samples.sample -incl-samples sgkit_bgen/tests/data/samples
    # Then editing example-separate-samples.sample to change the sample IDs
    path = shared_datadir / "example-separate-samples.bgen"
    ds = read_bgen(path)
    # Check the sample IDs are the ones from the .sample file
    assert ds["sample_id"].values.tolist() == ["s1", "s2", "s3", "s4", "s5"]
Exemplo n.º 4
0
def test_read_bgen_with_no_samples(shared_datadir):
    # The example file was generated using
    # qctool -g sgkit_bgen/tests/data/example.bgen -og sgkit_bgen/tests/data/example-no-samples.bgen -os sgkit_bgen/tests/data/example-no-samples.sample -bgen-omit-sample-identifier-block -incl-samples sgkit_bgen/tests/data/samples
    # Then deleting example-no-samples.sample
    path = shared_datadir / "example-no-samples.bgen"
    ds = read_bgen(path)
    # Check the sample IDs are generated
    assert ds["sample_id"].values.tolist() == [
        "sample_0",
        "sample_1",
        "sample_2",
        "sample_3",
        "sample_4",
    ]
Exemplo n.º 5
0
def test_read_bgen_scalar_index(shared_datadir, chunks):
    path = shared_datadir / "example.bgen"
    ds = read_bgen(path, chunks=chunks)
    for i, ix in enumerate(INDEXES):
        npt.assert_almost_equal(
            ds["call_genotype_probability"][ix, 0], EXPECTED_PROBABILITIES[i], decimal=3
        )
        npt.assert_almost_equal(
            ds["call_dosage"][ix, 0], EXPECTED_DOSAGES[i], decimal=3
        )
        for j in range(3):
            npt.assert_almost_equal(
                ds["call_genotype_probability"][ix, 0, j],
                EXPECTED_PROBABILITIES[i, j],
                decimal=3,
            )
Exemplo n.º 6
0
def test_read_bgen(shared_datadir, chunks):
    path = shared_datadir / "example.bgen"
    ds = read_bgen(path, chunks=chunks)

    # check some of the data (in different chunks)
    assert ds["call_dosage"].shape == _shape("variants", "samples")
    npt.assert_almost_equal(ds["call_dosage"].values[1][0], 1.987, decimal=3)
    npt.assert_almost_equal(ds["call_dosage"].values[100][0], 0.160, decimal=3)
    npt.assert_array_equal(ds["call_dosage_mask"].values[0, 0], [True])
    npt.assert_array_equal(ds["call_dosage_mask"].values[0, 1], [False])
    assert ds["call_genotype_probability"].shape == _shape(
        "variants", "samples", "genotypes")
    npt.assert_almost_equal(ds["call_genotype_probability"].values[1][0],
                            [0.005, 0.002, 0.992],
                            decimal=3)
    npt.assert_almost_equal(ds["call_genotype_probability"].values[100][0],
                            [0.916, 0.007, 0.076],
                            decimal=3)
    npt.assert_array_equal(ds["call_genotype_probability_mask"].values[0, 0],
                           [True] * 3)
    npt.assert_array_equal(ds["call_genotype_probability_mask"].values[0, 1],
                           [False] * 3)