def test_display_genotypes__duplicate_variant_ids(): ds = simulate_genotype_call_dataset(n_variant=3, n_sample=3, seed=0) # set some variant IDs ds["variant_id"] = (["variants"], np.array(["V0", "V1", "V1"])) ds["variant_id_mask"] = (["variants"], np.array([False, False, False])) disp = display_genotypes(ds) expected = """\ samples S0 S1 S2 variants 0 0/0 1/0 1/0 1 0/1 1/0 0/1 2 0/0 1/0 1/1""" # noqa: W291 assert str(disp) == dedent(expected)
def test_display_genotypes__truncated_rows(): ds = simulate_genotype_call_dataset(n_variant=10, n_sample=10, seed=0) disp = display_genotypes(ds, max_variants=4, max_samples=10) expected = """\ samples S0 S1 S2 S3 S4 S5 S6 S7 S8 S9 variants 0 0/0 1/0 1/0 0/1 1/0 0/1 0/0 1/0 1/1 0/0 1 1/0 0/1 1/0 1/1 1/1 1/0 1/0 0/0 1/0 1/1 ... ... ... ... ... ... ... ... ... ... ... 8 0/1 0/0 1/0 0/1 0/1 1/0 1/0 0/1 1/0 1/0 9 1/1 0/1 1/0 0/1 1/0 1/1 0/1 1/0 1/1 1/0 [10 rows x 10 columns]""" # noqa: W291 assert str(disp) == dedent(expected)
def test_display_genotypes(): ds = simulate_genotype_call_dataset(n_variant=3, n_sample=3, seed=0) disp = display_genotypes(ds) expected = """\ samples S0 S1 S2 variants 0 0/0 1/0 1/0 1 0/1 1/0 0/1 2 0/0 1/0 1/1""" # noqa: W291 assert str(disp) == dedent(expected) expected_html = """<table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th>samples</th> <th>S0</th> <th>S1</th> <th>S2</th> </tr> <tr> <th>variants</th> <th></th> <th></th> <th></th> </tr> </thead> <tbody> <tr> <th>0</th> <td>0/0</td> <td>1/0</td> <td>1/0</td> </tr> <tr> <th>1</th> <td>0/1</td> <td>1/0</td> <td>0/1</td> </tr> <tr> <th>2</th> <td>0/0</td> <td>1/0</td> <td>1/1</td> </tr> </tbody> </table>""".strip() assert expected_html in disp._repr_html_()
def test_display_genotypes__large(): ds = simulate_genotype_call_dataset(n_variant=100_000, n_sample=1000, seed=0) disp = display_genotypes(ds, max_variants=4, max_samples=4) expected = """\ samples S0 S1 ... S998 S999 variants ... 0 0/0 1/0 ... 0/1 1/1 1 1/1 1/1 ... 0/1 1/1 ... ... ... ... ... ... 99998 0/1 1/1 ... 1/0 0/1 99999 1/0 1/0 ... 1/0 1/0 [100000 rows x 1000 columns]""" # noqa: W291 assert str(disp) == dedent(expected)
def test_create_genotype_call_dataset(): variant_contig_names = ["chr1"] variant_contig = np.array([0, 0], dtype="i1") variant_position = np.array([1000, 2000], dtype="i4") variant_allele = np.array([["A", "C"], ["G", "A"]], dtype="S1") variant_id = np.array(["rs1", "rs2"], dtype=str) sample_id = np.array(["sample_1", "sample_2", "sample_3"], dtype=str) call_genotype = np.array( [[[0, 0], [0, 1], [1, 0]], [[-1, 0], [0, -1], [-1, -1]]], dtype="i1") call_genotype_phased = np.array( [[True, True, False], [True, False, False]], dtype=bool) ds = create_genotype_call_dataset( variant_contig_names=variant_contig_names, variant_contig=variant_contig, variant_position=variant_position, variant_allele=variant_allele, sample_id=sample_id, call_genotype=call_genotype, call_genotype_phased=call_genotype_phased, variant_id=variant_id, ) assert DIM_VARIANT in ds.dims assert DIM_SAMPLE in ds.dims assert DIM_PLOIDY in ds.dims assert DIM_ALLELE in ds.dims assert ds.attrs["source"] == f"sgkit-{__version__}" assert ds.attrs["contigs"] == variant_contig_names assert_array_equal(ds["variant_contig"], variant_contig) assert_array_equal(ds["variant_position"], variant_position) assert_array_equal(ds["variant_allele"], variant_allele) assert_array_equal(ds["variant_id"], variant_id) assert_array_equal(ds["sample_id"], sample_id) assert_array_equal(ds["call_genotype"], call_genotype) assert_array_equal(ds["call_genotype_mask"], call_genotype < 0) assert_array_equal(ds["call_genotype_phased"], call_genotype_phased) disp = display_genotypes(ds) assert (str(disp) == """ samples sample_1 sample_2 sample_3 variants rs1 0|0 0|1 1/0 rs2 .|0 0/. ./. """.strip() # noqa: W291 )
def test_display_genotypes__truncated_columns(): ds = simulate_genotype_call_dataset(n_variant=10, n_sample=10, seed=0) disp = display_genotypes(ds, max_variants=10, max_samples=4) expected = """\ samples S0 S1 ... S8 S9 variants ... 0 0/0 1/0 ... 1/1 0/0 1 1/0 0/1 ... 1/0 1/1 2 1/1 1/1 ... 0/0 1/0 3 0/1 0/0 ... 1/0 0/0 4 0/1 0/0 ... 0/0 1/1 5 1/1 1/0 ... 0/0 1/0 6 1/1 0/0 ... 1/0 0/1 7 1/0 0/1 ... 0/1 0/0 8 0/1 0/0 ... 1/0 1/0 9 1/1 0/1 ... 1/1 1/0 [10 rows x 10 columns]""" # noqa: W291 assert str(disp) == dedent(expected)
def test_display_genotypes__truncated_rows_and_columns(): ds = simulate_genotype_call_dataset(n_variant=10, n_sample=10, seed=0) disp = display_genotypes(ds, max_variants=4, max_samples=4) expected = """\ samples S0 S1 ... S8 S9 variants ... 0 0/0 1/0 ... 1/1 0/0 1 1/0 0/1 ... 1/0 1/1 ... ... ... ... ... ... 8 0/1 0/0 ... 1/0 1/0 9 1/1 0/1 ... 1/1 1/0 [10 rows x 10 columns]""" # noqa: W291 assert str(disp) == dedent(expected) expected_html = """<table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th>samples</th> <th>S0</th> <th>S1</th> <th>...</th> <th>S8</th> <th>S9</th> </tr> <tr> <th>variants</th> <th></th> <th></th> <th></th> <th></th> <th></th> </tr> </thead> <tbody> <tr> <th>0</th> <td>0/0</td> <td>1/0</td> <td>...</td> <td>1/1</td> <td>0/0</td> </tr> <tr> <th>1</th> <td>1/0</td> <td>0/1</td> <td>...</td> <td>1/0</td> <td>1/1</td> </tr> <tr> <th>...</th> <td>...</td> <td>...</td> <td>...</td> <td>...</td> <td>...</td> </tr> <tr> <th>8</th> <td>0/1</td> <td>0/0</td> <td>...</td> <td>1/0</td> <td>1/0</td> </tr> <tr> <th>9</th> <td>1/1</td> <td>0/1</td> <td>...</td> <td>1/1</td> <td>1/0</td> </tr> </tbody> </table> <p>10 rows x 10 columns</p>""".strip() assert expected_html in disp._repr_html_()