def test_parquet_family_bin(fam1, fam2, gt):
    sv = SummaryVariant(summary_alleles_chr1)
    fv1 = FamilyVariant(sv, fam1, gt, None)
    fv2 = FamilyVariant(sv, fam2, gt, None)

    family_bin_size = 10
    pd = ParquetPartitionDescriptor(["1"], 1000, family_bin_size)
    for fa1, fa2 in zip(fv1.alleles, fv2.alleles):
        assert pd._evaluate_family_bin(fa1) == 9
        assert pd._evaluate_family_bin(fa2) == 6
        assert (pd.variant_filename(fa1) == "region_bin=1_11/family_bin=9/"
                "variants_region_bin_1_11_family_bin_9.parquet")
        assert (pd.variant_filename(fa2) == "region_bin=1_11/family_bin=6/"
                "variants_region_bin_1_11_family_bin_6.parquet")
def test_parquet_region_bin(fam1, gt, chromosomes, region_length,
                            summary_alleles, expected):
    sv = SummaryVariant(summary_alleles)
    fv = FamilyVariant(sv, fam1, gt, None)
    pd = ParquetPartitionDescriptor(chromosomes, region_length)
    region_bin = pd._evaluate_region_bin(fv)
    for fa in fv.alleles:
        assert region_bin == expected
        assert (pd.variant_filename(fa) ==
                f"region_bin={region_bin}/variants_region_bin_{region_bin}"
                f".parquet")
def test_parquet_frequency_bin(fam1, gt, attributes, rare_boundary, expected):
    summary_alleles = [
        SummaryAllele("1", 11539, "T", None, 0, 0, attributes=attributes)
    ] * 3
    sv = SummaryVariant(summary_alleles)
    fv = FamilyVariant(sv, fam1, gt, None)
    pd = ParquetPartitionDescriptor(["1"], 1000, rare_boundary=rare_boundary)

    for fa in fv.alleles:
        assert pd._evaluate_frequency_bin(fa) == expected
        assert (pd.variant_filename(
            fa) == f"region_bin=1_11/frequency_bin={expected}/" +
                f"variants_region_bin_1_11_frequency_bin_{expected}.parquet")
def test_parquet_coding_bin(fam1, gt, eff1, eff2, eff3, coding_effect_types,
                            expected):
    summary_alleles = [
        SummaryAllele("1", 11539, "T", None, 0, 0),
        SummaryAllele("1", 11539, "T", "G", 0, 1, attributes={"effects":
                                                              eff1}),
        SummaryAllele("1", 11539, "T", "C", 0, 2, attributes={"effects":
                                                              eff2}),
        SummaryAllele("1", 11539, "T", "A", 0, 3, attributes={"effects":
                                                              eff3}),
    ]
    gt = np.array([[0, 1, 0], [2, 0, 3]], dtype="int8")
    sv = SummaryVariant(summary_alleles)
    fv = FamilyVariant(sv, fam1, gt, None)
    pd = ParquetPartitionDescriptor(["1"],
                                    1000,
                                    coding_effect_types=coding_effect_types)
    for fa, ex in zip(fv.alleles, expected):
        assert pd._evaluate_coding_bin(fa) == ex
        assert (
            pd.variant_filename(fa) == f"region_bin=1_11/coding_bin={ex}/" +
            f"variants_region_bin_1_11_coding_bin_{ex}.parquet")