def test_vcf_variant_hashability(): v = [ VcfVariant(10, 'A', 'TC'), VcfVariant(10, 'A', 'TCA'), VcfVariant(10, 'C', 'TC'), VcfVariant(20, 'A', 'TC'), VcfVariant(10, 'A', 'TCA'), VcfVariant(20, 'A', 'TC') ] assert len(set(v)) == 4
def test_vcf_variant_hashability(): v = [ VcfVariant(10, "A", "TC"), VcfVariant(10, "A", "TCA"), VcfVariant(10, "C", "TC"), VcfVariant(20, "A", "TC"), VcfVariant(10, "A", "TCA"), VcfVariant(20, "A", "TC"), ] assert len(set(v)) == 4
def test_normalize(): assert VcfVariant(100, 'A', 'C').normalized() == VcfVariant(100, 'A', 'C') assert VcfVariant(100, '', 'A').normalized() == VcfVariant(100, '', 'A') assert VcfVariant(100, 'A', '').normalized() == VcfVariant(100, 'A', '') assert VcfVariant(100, 'A', 'AC').normalized() == VcfVariant(101, '', 'C') assert VcfVariant(100, 'AC', 'A').normalized() == VcfVariant(101, 'C', '') assert VcfVariant(100, 'ACAGACC', 'ACAGACT').normalized() == VcfVariant(106, 'C', 'T') assert VcfVariant(100, 'GCTG', 'GCTAAA').normalized() == VcfVariant(103, 'G', 'AAA') assert VcfVariant(100, 'ATTA', 'ATA').normalized() == VcfVariant(101, 'T', '') assert VcfVariant(100, 'ATTTC', 'ATTTTTTC').normalized() == VcfVariant(101, '', 'TTT') assert VcfVariant(100, 'GCTGTT', 'GCTAAATT').normalized() == VcfVariant(103, 'G', 'AAA')
def test_phasing_to_reads(): for filename in [ 'tests/data/phased-via-HP.vcf', 'tests/data/phased-via-PS.vcf' ]: tables = list(VcfReader(filename, phases=True)) assert len(tables) == 2 table_a, table_b = tables phase_reads_sample1 = list( table_a.phased_blocks_as_reads('sample1', table_a.variants, 17, 18, default_quality=90, mapq=101)) print(phase_reads_sample1) assert len(phase_reads_sample1) == 1 read = phase_reads_sample1[0] assert len(read) == 2 assert read.name == 'sample1_block_300' assert read.source_id == 17 assert read.mapqs == (101, ) assert read[0].position == 300 - 1 assert read[0].allele == 1 assert read[0].quality == 23 assert read[1].position == 350 - 1 assert read[1].allele == 0 assert read[1].quality == 42 phase_reads_sample2 = list( table_a.phased_blocks_as_reads('sample2', table_a.variants, 11, 12, default_quality=91, mapq=102)) print(phase_reads_sample2) assert len(phase_reads_sample2) == 2 read1, read2 = phase_reads_sample2 assert len(read1) == len(read2) == 2 if read1[0].position > read2[0].position: read1, read2 = read2, read1 assert read1.name == 'sample2_block_100' assert read1.source_id == 11 assert read1.mapqs == (102, ) assert read1[0].position == 100 - 1 assert read1[0].allele == 0 assert read1[0].quality == 10 assert read1[1].position == 150 - 1 assert read1[1].allele == 1 assert read1[1].quality == 20 assert read2.name == 'sample2_block_300' assert read2.source_id == 11 assert read2.mapqs == (102, ) assert read2[0].position == 300 - 1 assert read2[0].allele == 0 assert read2[0].quality == 30 assert read2[1].position == 350 - 1 assert read2[1].allele == 0 assert read2[1].quality == 91 variants = [ VcfVariant(350 - 1, 'G', 'T'), VcfVariant(300 - 1, 'G', 'T'), VcfVariant(17, 'A', 'TTC'), VcfVariant(1000, 'C', 'G') ] phase_reads_sample2 = list( table_a.phased_blocks_as_reads('sample2', variants, 11, 12, default_quality=91, mapq=102)) print(phase_reads_sample2) assert len(phase_reads_sample2) == 1 read = phase_reads_sample2[0] assert len(read) == 2 assert read.name == 'sample2_block_300' assert read.source_id == 11 assert read.mapqs == (102, ) assert read[0].position == 300 - 1 assert read[0].allele == 0 assert read[0].quality == 30 assert read[1].position == 350 - 1 assert read[1].allele == 0 assert read[1].quality == 91
def test_normalize(): assert VcfVariant(100, "A", "C").normalized() == VcfVariant(100, "A", "C") assert VcfVariant(100, "", "A").normalized() == VcfVariant(100, "", "A") assert VcfVariant(100, "A", "").normalized() == VcfVariant(100, "A", "") assert VcfVariant(100, "A", "AC").normalized() == VcfVariant(101, "", "C") assert VcfVariant(100, "AC", "A").normalized() == VcfVariant(101, "C", "") assert VcfVariant(100, "ACAGACC", "ACAGACT").normalized() == VcfVariant(106, "C", "T") assert VcfVariant(100, "GCTG", "GCTAAA").normalized() == VcfVariant(103, "G", "AAA") assert VcfVariant(100, "ATTA", "ATA").normalized() == VcfVariant(101, "T", "") assert VcfVariant(100, "ATTTC", "ATTTTTTC").normalized() == VcfVariant(101, "", "TTT") assert VcfVariant(100, "GCTGTT", "GCTAAATT").normalized() == VcfVariant(103, "G", "AAA")