def test_partition_into_regions__num_parts(shared_datadir, vcf_file, is_path): vcf_path = path_for_test(shared_datadir, vcf_file, is_path) regions = partition_into_regions(vcf_path, num_parts=4) assert regions is not None part_variant_counts = [ count_variants(vcf_path, region) for region in regions ] total_variants = count_variants(vcf_path) assert sum(part_variant_counts) == total_variants
def test_partition_into_regions__num_parts_large(shared_datadir, is_path): vcf_path = path_for_test(shared_datadir, "CEUTrio.20.21.gatk3.4.g.vcf.bgz", is_path) regions = partition_into_regions(vcf_path, num_parts=100) assert regions is not None assert len(regions) == 18 part_variant_counts = [ count_variants(vcf_path, region) for region in regions ] total_variants = count_variants(vcf_path) assert sum(part_variant_counts) == total_variants
def test_record_counts_csi(shared_datadir, vcf_file, is_path): # Check record counts in csi with actual count of VCF vcf_path = path_for_test(shared_datadir, vcf_file, is_path) csi_path = get_csi_path(vcf_path) assert csi_path is not None csi = read_csi(csi_path) for i, contig in enumerate(VCF(vcf_path).seqnames): assert csi.record_counts[i] == count_variants(vcf_path, contig)
def test_record_counts_tbi(shared_datadir, vcf_file, is_path): # Check record counts in tabix with actual count of VCF vcf_path = path_for_test(shared_datadir, vcf_file, is_path) tabix_path = get_tabix_path(vcf_path) assert tabix_path is not None tabix = read_tabix(tabix_path) for i, contig in enumerate(tabix.sequence_names): assert tabix.record_counts[i] == count_variants(vcf_path, contig)