def test_find_multiple_single_base_deletion(self): ref = ReferenceChromosome("TTAAAAAGAAAAT") seq = Sequence(ref, "..*.....*....") self.assertEqual(seq.variants, { Variant(ref.chrom, 1, "TA", "T"), Variant(ref.chrom, 7, "GA", "G") })
def test_should_find_multiple_snps(self): ref = ReferenceChromosome("AAAAAAAAAAAAA") seq = Sequence(ref, ".C.........T.") self.assertEqual(seq.variants, { Variant(ref.chrom, 1, "A", "C"), Variant(ref.chrom, 11, "A", "T") })
def test_phase_not_aligns_for_hom_snp_in_first_cluster(self): sample_name = "a_sample" chrom = "1" svc_driver = SVCDriver(self)\ .with_ref_sequence( "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \ .with_read( ".......A.....................A...........", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_read( ".......A.................................", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_output_phased_genotypes(True) \ .with_allow_MNP_calls(False) \ .with_max_cluster_distance(10) vcf_expect = svc_driver.call()\ .with_output_vcf()\ .record_count(2) vcf_expect.has_record_for_variants(Variant(chrom, 7, "C", "A"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|1")\ .has_phase_set_id("7") vcf_expect.has_record_for_variants(Variant(chrom, 29, "G", "A"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|0")\ .has_phase_set_id("28")
def test_find_adjacent_snp_and_deletion(self): ref = ReferenceChromosome("TTAAAAAAAAAT") seq = Sequence(ref, ".G*.........") self.assertEqual(seq.variants, { Variant(ref.chrom, 1, "T", "G"), Variant(ref.chrom, 1, "TA", "T") })
def test_should_handle_complex_variant_input(self): chrom = "22" variant = Variant(chrom, 10, "CAA", "CA") gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf")) gv_builder.with_record_from_variant(variant) gv_builder.build().index() driver = SVCDriver(self) dodgy_sample = "bobs_your_uncle" driver.with_ref_sequence( "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read( "...........C.........", n_fwd=5, n_rev=5, chrom=chrom, sample_name=dodgy_sample).with_genotype_alleles( gv_builder.compressed_filename) expect = driver.call() expect.with_log()\ .input_variant_trimmed_warning(variant, Variant(chrom, 11, "A", "")) expect.with_output_vcf()\ .record_count(1)
def test_phase_alignment_for_two_snps_in_different_clusters_on_different_strands( self): sample_name = "a_sample" chrom = "1" svc_driver = SVCDriver(self)\ .with_ref_sequence( "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \ .with_read( ".......A.................................", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_read( ".............................A...........", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_output_phased_genotypes(True) \ .with_max_cluster_distance(10) \ vcf_expect = svc_driver.call()\ .with_output_vcf()\ .record_count(2) vcf_expect.has_record_for_variants(Variant(chrom, 7, "C", "A"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|0")\ .has_phase_set_id("7") vcf_expect.has_record_for_variants(Variant(chrom, 29, "G", "A"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("0|1")\ .has_phase_set_id("7")
def test_min_depth_computation_with_mixed_depth_of_reads(self): sample_name = "bah" chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom).with_read( ".............................. ", n_rev=5, n_fwd=5, sample_name=sample_name).with_read( " ..............................", n_rev=3, n_fwd=3, sample_name=sample_name).with_output_ref_calls(True) expect = driver.call() expect\ .with_output_vcf()\ .has_record_for_variant(Variant(chrom, 0, "A", ref_alt))\ .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10) expect\ .with_output_vcf()\ .has_record_for_variant(Variant(chrom, 10, "A", ref_alt))\ .with_sample(sample_name).has_read_depth(16).has_min_read_depth(16) expect\ .with_output_vcf()\ .has_record_for_variant(Variant(chrom, 30, "A", ref_alt))\ .with_sample(sample_name).has_read_depth(6).has_min_read_depth(6)
def test_depth_computation_all_reads_spanning_reference_with_insertion( self): sample_name = "bah" chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAAAAAAAAAC*AAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom).with_read( "................T.......................", n_rev=5, n_fwd=5, sample_name=sample_name).with_output_ref_calls( True).with_allow_MNP_calls(False) expect = driver.call() vcf_expect = expect.with_output_vcf() vcf_expect \ .has_record_for_variant(Variant(chrom, 0, "A", ref_alt))\ .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10) vcf_expect \ .has_record_for_variant(Variant(chrom, 15, "C", "CT"))\ .with_sample(sample_name).has_read_depth(10) vcf_expect \ .has_record_for_variant(Variant(chrom, 16, "A", ref_alt))\ .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10)
def test_should_give_correct_output_for_different_sample_names(self): self.sample_name1 = "SAMPLE_A" self.sample_name2 = "SAMPLE_B" n_copies1 = 1 n_copies2 = 5 self.setParallelAndSerialVariantCallers(n_copies1, n_copies2) self.vc_wrapper_parallel.add_additional_command("numberOfJobs", "2") self.vc_wrapper_parallel.add_additional_command("workDir", self.vc_work_dir) self.vc_wrapper_parallel.run() expected_var_A_1 = Variant(self.chrom1, 3, "CTT", "C") expected_var_B_1 = Variant(self.chrom2, 7, "AT", "A") parallel_variants_with_genotypes = self.vc_wrapper_parallel \ .get_variant_callset(self) \ .get_variants_with_genotypes() self.assertTrue(expected_var_A_1 in list(parallel_variants_with_genotypes.keys())) self.assertTrue(expected_var_B_1 in list(parallel_variants_with_genotypes.keys())) self.assertEqual(GenotypeCall("1/1"), parallel_variants_with_genotypes[expected_var_A_1][self.sample_name1]) self.assertEqual(GenotypeCall("./."), parallel_variants_with_genotypes[expected_var_A_1][self.sample_name2]) self.assertEqual(GenotypeCall("./."), parallel_variants_with_genotypes[expected_var_B_1][self.sample_name1]) self.assertEqual(GenotypeCall("1/1"), parallel_variants_with_genotypes[expected_var_B_1][self.sample_name2])
def test_should_call_variants(self): chrom = 'chr1' sample_name = 'sample' svc = SVCDriver(self) \ .with_ploidy(3) svc.with_ref_sequence( "AAAGCGTACAACCGGGTTAGTC***AACCCGTTACGTATGCATG", chrom=chrom ).with_read( "......C.........G.....ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name ).with_read( "......C...............ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name ).with_read( "......................ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name) expect = svc.call() vcf = expect \ .with_output_vcf() \ .record_count(4) vcf.has_record_for_variant(Variant(chrom, 6, 'T', 'C')).with_sample(sample_name).has_genotype('0/1/1') vcf.has_record_for_variant(Variant(chrom, 16, 'T', 'G')).with_sample(sample_name).has_genotype('0/0/1') vcf.has_record_for_variant(Variant(chrom, 21, 'C', 'CATG')).with_sample(sample_name).has_genotype('1/1/1') vcf.has_record_for_variant(Variant(chrom, 28, 'TTAC', 'T')).with_sample(sample_name).has_genotype('1/1/1')
def test_calls_correct_reference_between_clusters_with_uncalled_indel_between( self): chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAATAACGCACGCCCCATAAAAAAATTTTTTTTTTT", chrom=chrom).with_read( " ..*....................... ", n_fwd=10, n_rev=10).with_read( ".......................*............ ", n_fwd=1, n_rev=1).with_read( ".......................T.. ", n_fwd=10, n_rev=10).with_output_ref_calls( True).with_max_cluster_distance(5) expect = driver.call() vcf_expect = expect.with_output_vcf() vcf_expect.has_reference_calls(ChromInterval(chrom, 0, 8)) vcf_expect.has_record_for_variant(Variant(chrom, 8, "TA", "T")) vcf_expect.has_reference_calls(ChromInterval(chrom, 10, 23)) vcf_expect.has_record_for_variant(Variant(chrom, 23, "A", "T")) vcf_expect.has_reference_calls(ChromInterval(chrom, 24, 41))
def test_calls_reference_on_location_with_low_quality_variant_support( self): chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAATAACGCACGCCCCATAAAAAAATTTTTTTTTTT", chrom=chrom).with_read( " ..*....................... ", n_fwd=2, n_rev=1).with_read( ".................T.....T......... ", " 1 ", n_fwd=1, n_rev=1).with_read( ".......................T.. ", n_fwd=1, n_rev=0).with_output_ref_calls(True) expect = driver.call() vcf_expect = expect.with_output_vcf() vcf_expect.has_reference_calls(ChromInterval(chrom, 0, 8)) vcf_expect.has_record_for_variant(Variant(chrom, 8, "TA", "T")) vcf_expect.has_reference_calls(ChromInterval(chrom, 10, 23)) vcf_expect.has_record_for_variant(Variant(chrom, 23, "A", "T")) vcf_expect.has_reference_calls(ChromInterval(chrom, 24, 41))
def test_phasing_for_two_heterozygous_variants_ocrn_same_strand(self): sample_name = "a_sample" chrom = "1" svc_driver = SVCDriver(self) \ .with_ref_sequence( "ACGCCCCTGCAAAAAAAAAAT", chrom=chrom) \ .with_read( "........T...T........", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_read( ".....................", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_output_phased_genotypes(True) \ .with_allow_MNP_calls(False) vcf_expect = svc_driver.call()\ .with_output_vcf()\ .record_count(2) records_expect = vcf_expect.has_record_for_variants( Variant(chrom, 8, "G", "T"), Variant(chrom, 12, "A", "T") ) records_expect\ .with_sample(sample_name)\ .has_phased_genotypes("0|1", "0|1")\ .has_phase_set_id("8")
def test_phase_quality_for_phase_with_2_out_of_3_support(self): sample_name = "a_sample" chrom = "1" svc_driver = SVCDriver(self)\ .with_ref_sequence( "ACGCCCCTGCAAAAAAAAAAT", chrom=chrom)\ .with_read( "........T...T..T.....", n_fwd=5, n_rev=5, sample_name=sample_name) \ .with_read( "............T........", n_fwd=5, n_rev=5, sample_name=sample_name) \ .with_read( "........T...T........", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_read( "............T..T.....", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_output_phased_genotypes(True)\ .with_allow_MNP_calls(False) vcf_expect = svc_driver.call()\ .with_output_vcf()\ .record_count(3) ratio_of_phase_to_total = 2.0 / 3.0 # actual value needs to be figured out from equations unknown_phase_quality = int(round(log10(1.0 - ratio_of_phase_to_total) * -10.0)) vcf_expect.has_record_for_variants( Variant(chrom, 8, "G", "T"), Variant(chrom, 12, "A", "T"), Variant(chrom, 15, "A", "T") )\ .with_sample(sample_name)\ .has_phased_genotypes("0|1", "1|1", "1|0")\ .has_phase_set_id("8")\ .has_phase_set_quality(unknown_phase_quality)
def test_phase_alignment_for_het_variants_for_three_clusters_when_first_cluster_is_homozygous( self): sample_name = "sample1" chrom = "1" svc_driver = SVCDriver(self)\ .with_ref_sequence( "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \ .with_read( "....A....................................", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_read( "....A.............C...............T......", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_output_phased_genotypes(True) \ .with_allow_MNP_calls(False) \ .with_max_cluster_distance(5) \ .with_min_cluster_distance(5) vcf_expect = svc_driver.call()\ .with_output_vcf()\ .record_count(3) vcf_expect.has_record_for_variants(Variant(chrom, 4, "C", "A"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|1")\ .has_phase_set_id("4") vcf_expect.has_record_for_variants(Variant(chrom, 18, "T", "C"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|0")\ .has_phase_set_id("13") vcf_expect.has_record_for_variants(Variant(chrom, 34, "A", "T"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|0")\ .has_phase_set_id("13")
def test_find_adjacent_insertion_and_snp(self): ref = ReferenceChromosome("T*ATAAAAAAAT") seq = Sequence(ref, ".CG.........") self.assertEqual(seq.variants, { Variant(ref.chrom, 0, "T", "TC"), Variant(ref.chrom, 1, "A", "G") })
def test_calls_deletion_and_snp_at_same_location_in_repeat_region_with_few_reads_as_anchors( self): chrom = "1" sample = "sample" svc_driver = SVCDriver(self) svc_driver.with_ref_sequence( 'CGAGAGAGAGAGAGAGAGAGATAGAGAGAGAGAGAGAGAGTC', chrom=chrom).with_read( '....................**....................', n_rev=5, n_fwd=0, chrom=chrom, sample_name=sample).with_read( '.....................G....................', n_rev=5, n_fwd=0, chrom=chrom, sample_name=sample) expect = svc_driver.call() vcf_expect = expect.with_output_vcf() vcf_expect \ .has_record_for_variants( Variant(chrom, 21, "T", "G"), Variant(chrom, 19, "GAT", "G") ).with_sample(sample).has_phased_genotypes(".|1", "1|.")
def test_symmetry_in_repetitive_reference(self): sample_name = "a_sample" chrom = "1" m = 2 svc_driver = SVCDriver(self) \ .with_ref_sequence( "TAAAAAAAAAAAAAAAAAAAAAAAAAT", chrom=chrom) \ .with_read( "........T..................", n_fwd=m, n_rev=m, sample_name=sample_name) \ .with_read( "...........................", n_fwd=m, n_rev=m, sample_name=sample_name) \ .with_read( ".................T.........", n_fwd=m, n_rev=m, sample_name=sample_name) \ .with_allow_MNP_calls(False) vcf_expect = svc_driver.call()\ .with_output_vcf()\ .record_count(2) vcf_expect.has_record_for_variants(Variant(chrom, 17, "A", "T"))\ .with_sample(sample_name)\ .has_phased_genotypes("0/1") vcf_expect.has_record_for_variants(Variant(chrom, 8, "A", "T"))\ .with_sample(sample_name)\ .has_phased_genotypes("0/1")
def test_find_multiple_variants(self): ref = ReferenceChromosome("TA*AAAGCTAACT") seq = Sequence(ref, ".GC...T...**.") self.assertEqual( seq.variants, { Variant(ref.chrom, 1, "A", "G"), Variant(ref.chrom, 1, "A", "AC"), Variant(ref.chrom, 5, "G", "T"), Variant(ref.chrom, 8, "AAC", "A") })
def test_eq(self): reference = Record(None, Variant("1", 20, "A", "G"), set(), 0.0, set(), InfoData(None, {}), SampleData([], []), False) self.assertTrue( reference == Record(None, Variant("1", 20, "A", "G"), set( ), 0.0, set(), InfoData(None, {}), SampleData([], []), False)) self.assertFalse( reference == Record(None, Variant("2", 20, "A", "G"), set( ), 0.0, set(), InfoData(None, {}), SampleData([], []), False)) self.assertFalse(reference == Record(None, Variant( "1", 20, "A", "G"), set("rs0"), 0.0, set(), InfoData(None, {}), SampleData([], []), False)) self.assertFalse( reference == Record(None, Variant("1", 20, "A", "G"), set( ), 5.0, set(), InfoData(None, {}), SampleData([], []), False)) self.assertFalse( reference == Record(None, Variant("1", 20, "A", "G"), set( ), 0.0, set("CV"), InfoData(None, {}), SampleData([], []), False)) self.assertFalse(reference == Record(None, Variant( "1", 20, "A", "G"), set(), 0.0, set(), InfoData(None, {'AF': []}), SampleData([], []), False)) self.assertFalse(reference == Record( None, Variant("1", 20, "A", "G"), set(), 0.0, set(), InfoData(None, {}), SampleData([], ['NA12787']), False)) self.assertFalse( reference == Record(None, Variant("1", 20, "A", "G"), set( ), 0.0, set(), InfoData(None, {}), SampleData([], []), True))
def test_should_return_all_variants(self): sample_bank = SampleBank("AAATTTTGGGAG") sample_bank.add_sample_name("SAMPLE1") sample_bank.add_sample_name("SAMPLE2") sample_bank["SAMPLE1"].add_sequence(".....G......") sample_bank["SAMPLE2"].add_sequence("..........*.") exp_variant1 = Variant(sample_bank.reference.chrom, 5, "T", "G") exp_variant2 = Variant(sample_bank.reference.chrom, 9, "GA", "G") self.assertEqual(sample_bank["SAMPLE1"].variants, {exp_variant1}) self.assertEqual(sample_bank["SAMPLE2"].variants, {exp_variant2}) self.assertEqual(sample_bank.variants, {exp_variant1, exp_variant2})
def test_should_place_variants_at_custom_position(self): sample_bank = SampleBank("AAATTTTGGGAG", 100) sample_bank.add_sample_name("SAMPLE1") sample_bank.add_sample_name("SAMPLE2") sample_bank["SAMPLE1"].add_sequence(".....G......") sample_bank["SAMPLE2"].add_sequence("..........*.") exp_variant1 = Variant(sample_bank.reference.chrom, 105, "T", "G") exp_variant2 = Variant(sample_bank.reference.chrom, 109, "GA", "G") self.assertEqual(sample_bank["SAMPLE1"].variants, {exp_variant1}) self.assertEqual(sample_bank["SAMPLE2"].variants, {exp_variant2}) self.assertEqual(sample_bank.variants, {exp_variant1, exp_variant2})
def test_should_find_correct_variants(self): n_copies1 = 1 n_copies2 = 5 self.setParallelAndSerialVariantCallers(n_copies1, n_copies2) self.vc_wrapper_parallel.add_additional_command("numberOfJobs", "2") self.vc_wrapper_parallel.add_additional_command("workDir", self.vc_work_dir) self.vc_wrapper_parallel.add_additional_command("allowMNPCalls", False) self.vc_wrapper_parallel.run() expected_vars = set() for i in range(0, n_copies1): expected_vars.update({ Variant(self.chrom1, 3 + i * self.repeat_length1, "CTT", "C"), Variant(self.chrom1, 11 + i * self.repeat_length1, "T", "TCTG"), Variant(self.chrom1, 18 + i * self.repeat_length1, "GT", "G"), Variant(self.chrom1, 25 + i * self.repeat_length1, "C", "T"), Variant(self.chrom1, 37 + i * self.repeat_length1, "G", "A"), Variant(self.chrom1, 40 + i * self.repeat_length1, "G", "T"), }) for i in range(0, n_copies2): expected_vars.update({ Variant(self.chrom2, 7 + i * self.repeat_length2, "AT", "A"), Variant(self.chrom2, 22 + i * self.repeat_length2, "C", "T"), }) actual_parallel_variants = self.vc_wrapper_parallel.get_variant_callset(self).get_variants() self.assertEqual(expected_vars, actual_parallel_variants)
def test_bad_reads_filter_not_applied_when_median_read_is_good(self): svc = SVCDriver(self) \ .with_var_filters("BR") \ .with_bad_reads_window_size(7) \ .with_min_bad_reads_score(20) svc.with_ref_sequence( # 1234567890123456789 "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG").with_read( "................G...........................", " 1 1 ", n_rev=10, n_fwd=10).with_read( "................G...........................", " 4444444 4444444 ", n_rev=11, n_fwd=10) expect = svc.call() vcf_expectation = expect.with_output_vcf() vcf_expectation.record_count(1) vcf_expectation \ .has_record_for_variant(Variant(DEFAULT_CHROM, 16, "T", "G")) \ .with_no_filters()
def test_should_have_near_zero_RR_genotype_likelihood_for_hom_alt_call( self): chr1 = 'chr1' sample_bank = SampleBank("TTTTTAAAAAAAAAAAAAAAAAAAA", chrom=chr1) sequence_bank_1 = sample_bank.add_sample_name('sample_1') sequence_bank_1.add_sequence(".........................", n_fwd=20, n_rev=20) sequence_bank_2 = sample_bank.add_sample_name('sample_2') sequence_bank_2.add_sequence("............C............", n_fwd=20, n_rev=20) vc_wrapper_builder = VariantCallerBuilderFromSampleBank( sample_bank, self.work_dir) variant_output = vc_wrapper_builder.build().run().output_vcf vcf_expectation = VCFExpectation(self, variant_output) record_expectation = vcf_expectation.has_record_for_variant( Variant(chr1, 12, "A", "C")) sample_expectation = record_expectation.with_sample("sample_1") sample_expectation.has_genotype("0|0").has_RR_genotype_likelihood(0.0)
def test_should_not_apply_bad_reads_to_insertion_if_all_supporting_reads_have_high_base_qualities( self): svc = SVCDriver(self) \ .with_var_filters("BR") \ .with_bad_reads_window_size(3) \ .with_min_bad_reads_score(15) svc.with_ref_sequence( # 1234567890123 456789 "AAAGCGTACAACCG*GGTTAGTCACAAACCCGTTACGTATGCATG").with_read( "..............*..G...........................", " 1 ", n_rev=11, n_fwd=10) svc.with_read("..............T..............................", n_rev=10, n_fwd=10) expect = svc.call() vcf_expectation = expect.with_output_vcf() vcf_expectation.record_count(1) vcf_expectation \ .has_record_for_variant(Variant(DEFAULT_CHROM, 13, "G", "GT")) \ .with_no_filters()
def test_bad_reads_filter_not_applied_if_one_sample_is_not_naughty(self): svc = SVCDriver(self) svc.with_var_filters("BR") svc.with_bad_reads_window_size(7) svc.with_min_bad_reads_score(13) svc.with_ref_sequence( # 1234567890123456789 "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG").with_read( "................G...........................", " 3333333 3333333 ", sample_name="GOOD", n_rev=2, n_fwd=2).with_read( "................G...........................", " 0000000 0000000 ", sample_name="BAD", n_rev=10, n_fwd=10).with_read( "................G...........................", " 00000 0000 ", sample_name="UGLY", n_rev=10, n_fwd=10) expect = svc.call() vcf_expectation = expect.with_output_vcf() vcf_expectation.record_count(1) vcf_expectation \ .has_record_for_variant(Variant(DEFAULT_CHROM, 16, "T", "G")) \ .with_no_filters()
def test_should_generate_variant_from_ascii_text(self): ref = "ATAAAAAAAAAT" alt_1 = ".A........*." alt_2 = ".C.........." variant_generator = AsciiVariantGenerator(ReferenceChromosome(ref)) gen_vars = variant_generator.get_variants([alt_1, alt_2]) self.assertEqual( gen_vars, { Variant(variant_generator.reference.chrom, 1, "T", "A"), Variant(variant_generator.reference.chrom, 1, "T", "C"), Variant(variant_generator.reference.chrom, 9, "AA", "A") } )
def test_doesnt_give_a_flying_damn_about_spurious_filter_header(self): chrom = "22" variant = Variant(chrom, 11, "A", "C") schema = Schema() complex_filter_name = '.+-*\\/~@?!%^&><=\"\'(){}[]_|' schema.set_filter(complex_filter_name, 'unusual characters') gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"), schema=schema) gv_builder.with_record_from_variant(variant, filters={complex_filter_name}) gv_builder.build().index() driver = SVCDriver(self) dodgy_sample = "bobs_your_uncle" driver.with_ref_sequence( "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read( "...........C.........", n_fwd=5, n_rev=5, chrom=chrom, sample_name=dodgy_sample).with_genotype_alleles( gv_builder.compressed_filename) expect = driver.call(expected_success=True) expect .with_output_vcf()\ .has_record_for_variant(variant)\ .with_sample(dodgy_sample)\ .has_genotype("1/1")
def test_doesnt_give_a_flying_damn_about_spurious_filters(self): chrom = "22" variant = Variant(chrom, 11, "A", "C") gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf")) gv_builder.with_record_from_variant(variant, filters={"#$.:@$%$%^&**()7!"}) gv_builder.build().index() driver = SVCDriver(self) dodgy_sample = "bobs_your_uncle" driver.with_ref_sequence( "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read( "...........C.........", n_fwd=5, n_rev=5, chrom=chrom, sample_name=dodgy_sample).with_genotype_alleles( gv_builder.compressed_filename) expect = driver.call(expected_success=True) expect.with_output_vcf()\ .has_record_for_variant(variant)\ .with_sample(dodgy_sample)\ .has_genotype("1/1")