예제 #1
0
 def test_find_multiple_single_base_deletion(self):
     ref = ReferenceChromosome("TTAAAAAGAAAAT")
     seq = Sequence(ref, "..*.....*....")
     self.assertEqual(seq.variants, {
         Variant(ref.chrom, 1, "TA", "T"),
         Variant(ref.chrom, 7, "GA", "G")
     })
예제 #2
0
 def test_should_find_multiple_snps(self):
     ref = ReferenceChromosome("AAAAAAAAAAAAA")
     seq = Sequence(ref, ".C.........T.")
     self.assertEqual(seq.variants, {
         Variant(ref.chrom, 1, "A", "C"),
         Variant(ref.chrom, 11, "A", "T")
     })
    def test_phase_not_aligns_for_hom_snp_in_first_cluster(self):
        sample_name = "a_sample"
        chrom = "1"

        svc_driver = SVCDriver(self)\
            .with_ref_sequence(
                "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \
            .with_read(
                ".......A.....................A...........", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_read(
                ".......A.................................", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_output_phased_genotypes(True) \
            .with_allow_MNP_calls(False) \
            .with_max_cluster_distance(10)

        vcf_expect = svc_driver.call()\
            .with_output_vcf()\
            .record_count(2)

        vcf_expect.has_record_for_variants(Variant(chrom, 7, "C", "A"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|1")\
            .has_phase_set_id("7")

        vcf_expect.has_record_for_variants(Variant(chrom, 29, "G", "A"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|0")\
            .has_phase_set_id("28")
예제 #4
0
 def test_find_adjacent_snp_and_deletion(self):
     ref = ReferenceChromosome("TTAAAAAAAAAT")
     seq = Sequence(ref, ".G*.........")
     self.assertEqual(seq.variants, {
         Variant(ref.chrom, 1, "T", "G"),
         Variant(ref.chrom, 1, "TA", "T")
     })
예제 #5
0
    def test_should_handle_complex_variant_input(self):
        chrom = "22"

        variant = Variant(chrom, 10, "CAA", "CA")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call()
        expect.with_log()\
            .input_variant_trimmed_warning(variant, Variant(chrom, 11, "A", ""))
        expect.with_output_vcf()\
            .record_count(1)
    def test_phase_alignment_for_two_snps_in_different_clusters_on_different_strands(
            self):
        sample_name = "a_sample"
        chrom = "1"

        svc_driver = SVCDriver(self)\
            .with_ref_sequence(
                "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \
            .with_read(
                ".......A.................................", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_read(
                ".............................A...........", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_output_phased_genotypes(True) \
            .with_max_cluster_distance(10) \

        vcf_expect = svc_driver.call()\
            .with_output_vcf()\
            .record_count(2)

        vcf_expect.has_record_for_variants(Variant(chrom, 7, "C", "A"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|0")\
            .has_phase_set_id("7")

        vcf_expect.has_record_for_variants(Variant(chrom, 29, "G", "A"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("0|1")\
            .has_phase_set_id("7")
예제 #7
0
    def test_min_depth_computation_with_mixed_depth_of_reads(self):
        sample_name = "bah"
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom).with_read(
                "..............................          ",
                n_rev=5,
                n_fwd=5,
                sample_name=sample_name).with_read(
                    "          ..............................",
                    n_rev=3,
                    n_fwd=3,
                    sample_name=sample_name).with_output_ref_calls(True)

        expect = driver.call()

        expect\
            .with_output_vcf()\
            .has_record_for_variant(Variant(chrom, 0, "A", ref_alt))\
            .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10)

        expect\
            .with_output_vcf()\
            .has_record_for_variant(Variant(chrom, 10, "A", ref_alt))\
            .with_sample(sample_name).has_read_depth(16).has_min_read_depth(16)

        expect\
            .with_output_vcf()\
            .has_record_for_variant(Variant(chrom, 30, "A", ref_alt))\
            .with_sample(sample_name).has_read_depth(6).has_min_read_depth(6)
예제 #8
0
    def test_depth_computation_all_reads_spanning_reference_with_insertion(
            self):
        sample_name = "bah"
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAAAAAAAAAC*AAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom).with_read(
                "................T.......................",
                n_rev=5,
                n_fwd=5,
                sample_name=sample_name).with_output_ref_calls(
                    True).with_allow_MNP_calls(False)

        expect = driver.call()
        vcf_expect = expect.with_output_vcf()

        vcf_expect \
            .has_record_for_variant(Variant(chrom, 0, "A", ref_alt))\
            .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10)

        vcf_expect \
            .has_record_for_variant(Variant(chrom, 15, "C", "CT"))\
            .with_sample(sample_name).has_read_depth(10)

        vcf_expect \
            .has_record_for_variant(Variant(chrom, 16, "A", ref_alt))\
            .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10)
예제 #9
0
    def test_should_give_correct_output_for_different_sample_names(self):
        self.sample_name1 = "SAMPLE_A"
        self.sample_name2 = "SAMPLE_B"

        n_copies1 = 1
        n_copies2 = 5
        self.setParallelAndSerialVariantCallers(n_copies1, n_copies2)
        self.vc_wrapper_parallel.add_additional_command("numberOfJobs", "2")
        self.vc_wrapper_parallel.add_additional_command("workDir", self.vc_work_dir)
        self.vc_wrapper_parallel.run()

        expected_var_A_1 = Variant(self.chrom1, 3, "CTT", "C")
        expected_var_B_1 = Variant(self.chrom2, 7, "AT", "A")

        parallel_variants_with_genotypes = self.vc_wrapper_parallel \
            .get_variant_callset(self) \
            .get_variants_with_genotypes()

        self.assertTrue(expected_var_A_1 in list(parallel_variants_with_genotypes.keys()))
        self.assertTrue(expected_var_B_1 in list(parallel_variants_with_genotypes.keys()))

        self.assertEqual(GenotypeCall("1/1"), parallel_variants_with_genotypes[expected_var_A_1][self.sample_name1])
        self.assertEqual(GenotypeCall("./."), parallel_variants_with_genotypes[expected_var_A_1][self.sample_name2])
        self.assertEqual(GenotypeCall("./."), parallel_variants_with_genotypes[expected_var_B_1][self.sample_name1])
        self.assertEqual(GenotypeCall("1/1"), parallel_variants_with_genotypes[expected_var_B_1][self.sample_name2])
예제 #10
0
    def test_should_call_variants(self):
        chrom = 'chr1'
        sample_name = 'sample'
        svc = SVCDriver(self) \
            .with_ploidy(3)

        svc.with_ref_sequence(
            "AAAGCGTACAACCGGGTTAGTC***AACCCGTTACGTATGCATG", chrom=chrom
        ).with_read(
            "......C.........G.....ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name
        ).with_read(
            "......C...............ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name
        ).with_read(
            "......................ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name)

        expect = svc.call()

        vcf = expect \
            .with_output_vcf() \
            .record_count(4)

        vcf.has_record_for_variant(Variant(chrom, 6, 'T', 'C')).with_sample(sample_name).has_genotype('0/1/1')
        vcf.has_record_for_variant(Variant(chrom, 16, 'T', 'G')).with_sample(sample_name).has_genotype('0/0/1')
        vcf.has_record_for_variant(Variant(chrom, 21, 'C', 'CATG')).with_sample(sample_name).has_genotype('1/1/1')
        vcf.has_record_for_variant(Variant(chrom, 28, 'TTAC', 'T')).with_sample(sample_name).has_genotype('1/1/1')
예제 #11
0
    def test_calls_correct_reference_between_clusters_with_uncalled_indel_between(
            self):
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAATAACGCACGCCCCATAAAAAAATTTTTTTTTTT",
            chrom=chrom).with_read(
                "       ..*.......................        ",
                n_fwd=10,
                n_rev=10).with_read(
                    ".......................*............     ",
                    n_fwd=1,
                    n_rev=1).with_read(
                        ".......................T..               ",
                        n_fwd=10,
                        n_rev=10).with_output_ref_calls(
                            True).with_max_cluster_distance(5)

        expect = driver.call()
        vcf_expect = expect.with_output_vcf()
        vcf_expect.has_reference_calls(ChromInterval(chrom, 0, 8))

        vcf_expect.has_record_for_variant(Variant(chrom, 8, "TA", "T"))

        vcf_expect.has_reference_calls(ChromInterval(chrom, 10, 23))

        vcf_expect.has_record_for_variant(Variant(chrom, 23, "A", "T"))

        vcf_expect.has_reference_calls(ChromInterval(chrom, 24, 41))
예제 #12
0
    def test_calls_reference_on_location_with_low_quality_variant_support(
            self):
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAATAACGCACGCCCCATAAAAAAATTTTTTTTTTT",
            chrom=chrom).with_read(
                "       ..*.......................        ", n_fwd=2,
                n_rev=1).with_read(
                    ".................T.....T.........        ",
                    "                 1                       ",
                    n_fwd=1,
                    n_rev=1).with_read(
                        ".......................T..               ",
                        n_fwd=1,
                        n_rev=0).with_output_ref_calls(True)

        expect = driver.call()
        vcf_expect = expect.with_output_vcf()
        vcf_expect.has_reference_calls(ChromInterval(chrom, 0, 8))

        vcf_expect.has_record_for_variant(Variant(chrom, 8, "TA", "T"))

        vcf_expect.has_reference_calls(ChromInterval(chrom, 10, 23))

        vcf_expect.has_record_for_variant(Variant(chrom, 23, "A", "T"))

        vcf_expect.has_reference_calls(ChromInterval(chrom, 24, 41))
예제 #13
0
    def test_phasing_for_two_heterozygous_variants_ocrn_same_strand(self):
        sample_name = "a_sample"
        chrom = "1"

        svc_driver = SVCDriver(self) \
            .with_ref_sequence(
                "ACGCCCCTGCAAAAAAAAAAT", chrom=chrom) \
            .with_read(
                "........T...T........", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_read(
                ".....................", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_output_phased_genotypes(True) \
            .with_allow_MNP_calls(False)

        vcf_expect = svc_driver.call()\
            .with_output_vcf()\
            .record_count(2)

        records_expect = vcf_expect.has_record_for_variants(
            Variant(chrom, 8, "G", "T"),
            Variant(chrom, 12, "A", "T")
        )
        records_expect\
            .with_sample(sample_name)\
            .has_phased_genotypes("0|1", "0|1")\
            .has_phase_set_id("8")
예제 #14
0
    def test_phase_quality_for_phase_with_2_out_of_3_support(self):
        sample_name = "a_sample"
        chrom = "1"

        svc_driver = SVCDriver(self)\
            .with_ref_sequence(
                "ACGCCCCTGCAAAAAAAAAAT", chrom=chrom)\
            .with_read(
                "........T...T..T.....", n_fwd=5, n_rev=5, sample_name=sample_name) \
            .with_read(
                "............T........", n_fwd=5, n_rev=5, sample_name=sample_name) \
            .with_read(
                "........T...T........", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_read(
                "............T..T.....", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_output_phased_genotypes(True)\
            .with_allow_MNP_calls(False)

        vcf_expect = svc_driver.call()\
            .with_output_vcf()\
            .record_count(3)

        ratio_of_phase_to_total = 2.0 / 3.0
        # actual value needs to be figured out from equations
        unknown_phase_quality = int(round(log10(1.0 - ratio_of_phase_to_total) * -10.0))

        vcf_expect.has_record_for_variants(
            Variant(chrom, 8, "G", "T"),
            Variant(chrom, 12, "A", "T"),
            Variant(chrom, 15, "A", "T")
        )\
            .with_sample(sample_name)\
            .has_phased_genotypes("0|1", "1|1", "1|0")\
            .has_phase_set_id("8")\
            .has_phase_set_quality(unknown_phase_quality)
예제 #15
0
    def test_phase_alignment_for_het_variants_for_three_clusters_when_first_cluster_is_homozygous(
            self):
        sample_name = "sample1"
        chrom = "1"

        svc_driver = SVCDriver(self)\
            .with_ref_sequence(
                "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \
            .with_read(
                "....A....................................", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_read(
                "....A.............C...............T......", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_output_phased_genotypes(True) \
            .with_allow_MNP_calls(False) \
            .with_max_cluster_distance(5) \
            .with_min_cluster_distance(5)

        vcf_expect = svc_driver.call()\
            .with_output_vcf()\
            .record_count(3)

        vcf_expect.has_record_for_variants(Variant(chrom, 4, "C", "A"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|1")\
            .has_phase_set_id("4")

        vcf_expect.has_record_for_variants(Variant(chrom, 18, "T", "C"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|0")\
            .has_phase_set_id("13")

        vcf_expect.has_record_for_variants(Variant(chrom, 34, "A", "T"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|0")\
            .has_phase_set_id("13")
예제 #16
0
 def test_find_adjacent_insertion_and_snp(self):
     ref = ReferenceChromosome("T*ATAAAAAAAT")
     seq = Sequence(ref, ".CG.........")
     self.assertEqual(seq.variants, {
         Variant(ref.chrom, 0, "T", "TC"),
         Variant(ref.chrom, 1, "A", "G")
     })
예제 #17
0
    def test_calls_deletion_and_snp_at_same_location_in_repeat_region_with_few_reads_as_anchors(
            self):
        chrom = "1"
        sample = "sample"

        svc_driver = SVCDriver(self)
        svc_driver.with_ref_sequence(
            'CGAGAGAGAGAGAGAGAGAGATAGAGAGAGAGAGAGAGAGTC',
            chrom=chrom).with_read(
                '....................**....................',
                n_rev=5,
                n_fwd=0,
                chrom=chrom,
                sample_name=sample).with_read(
                    '.....................G....................',
                    n_rev=5,
                    n_fwd=0,
                    chrom=chrom,
                    sample_name=sample)
        expect = svc_driver.call()
        vcf_expect = expect.with_output_vcf()
        vcf_expect \
            .has_record_for_variants(
                Variant(chrom, 21, "T", "G"),
                Variant(chrom, 19, "GAT", "G")
            ).with_sample(sample).has_phased_genotypes(".|1", "1|.")
예제 #18
0
    def test_symmetry_in_repetitive_reference(self):
        sample_name = "a_sample"
        chrom = "1"

        m = 2
        svc_driver = SVCDriver(self) \
            .with_ref_sequence(
                "TAAAAAAAAAAAAAAAAAAAAAAAAAT", chrom=chrom) \
            .with_read(
                "........T..................", n_fwd=m, n_rev=m, sample_name=sample_name) \
            .with_read(
                "...........................", n_fwd=m, n_rev=m, sample_name=sample_name) \
            .with_read(
                ".................T.........", n_fwd=m, n_rev=m, sample_name=sample_name) \
            .with_allow_MNP_calls(False)

        vcf_expect = svc_driver.call()\
            .with_output_vcf()\
            .record_count(2)

        vcf_expect.has_record_for_variants(Variant(chrom, 17, "A", "T"))\
            .with_sample(sample_name)\
            .has_phased_genotypes("0/1")

        vcf_expect.has_record_for_variants(Variant(chrom, 8, "A", "T"))\
            .with_sample(sample_name)\
            .has_phased_genotypes("0/1")
예제 #19
0
 def test_find_multiple_variants(self):
     ref = ReferenceChromosome("TA*AAAGCTAACT")
     seq = Sequence(ref, ".GC...T...**.")
     self.assertEqual(
         seq.variants, {
             Variant(ref.chrom, 1, "A", "G"),
             Variant(ref.chrom, 1, "A", "AC"),
             Variant(ref.chrom, 5, "G", "T"),
             Variant(ref.chrom, 8, "AAC", "A")
         })
예제 #20
0
    def test_eq(self):
        reference = Record(None, Variant("1", 20, "A", "G"), set(), 0.0, set(),
                           InfoData(None, {}), SampleData([], []), False)

        self.assertTrue(
            reference == Record(None, Variant("1", 20, "A", "G"), set(
            ), 0.0, set(), InfoData(None, {}), SampleData([], []), False))

        self.assertFalse(
            reference == Record(None, Variant("2", 20, "A", "G"), set(
            ), 0.0, set(), InfoData(None, {}), SampleData([], []), False))

        self.assertFalse(reference == Record(None, Variant(
            "1", 20, "A", "G"), set("rs0"), 0.0, set(), InfoData(None, {}),
                                             SampleData([], []), False))

        self.assertFalse(
            reference == Record(None, Variant("1", 20, "A", "G"), set(
            ), 5.0, set(), InfoData(None, {}), SampleData([], []), False))

        self.assertFalse(
            reference == Record(None, Variant("1", 20, "A", "G"), set(
            ), 0.0, set("CV"), InfoData(None, {}), SampleData([], []), False))

        self.assertFalse(reference == Record(None, Variant(
            "1", 20, "A", "G"), set(), 0.0, set(), InfoData(None, {'AF': []}),
                                             SampleData([], []), False))

        self.assertFalse(reference == Record(
            None, Variant("1", 20, "A", "G"), set(), 0.0, set(),
            InfoData(None, {}), SampleData([], ['NA12787']), False))

        self.assertFalse(
            reference == Record(None, Variant("1", 20, "A", "G"), set(
            ), 0.0, set(), InfoData(None, {}), SampleData([], []), True))
예제 #21
0
    def test_should_return_all_variants(self):
        sample_bank = SampleBank("AAATTTTGGGAG")
        sample_bank.add_sample_name("SAMPLE1")
        sample_bank.add_sample_name("SAMPLE2")

        sample_bank["SAMPLE1"].add_sequence(".....G......")
        sample_bank["SAMPLE2"].add_sequence("..........*.")

        exp_variant1 = Variant(sample_bank.reference.chrom, 5, "T", "G")
        exp_variant2 = Variant(sample_bank.reference.chrom, 9, "GA", "G")
        self.assertEqual(sample_bank["SAMPLE1"].variants, {exp_variant1})
        self.assertEqual(sample_bank["SAMPLE2"].variants, {exp_variant2})
        self.assertEqual(sample_bank.variants, {exp_variant1, exp_variant2})
예제 #22
0
    def test_should_place_variants_at_custom_position(self):
        sample_bank = SampleBank("AAATTTTGGGAG", 100)
        sample_bank.add_sample_name("SAMPLE1")
        sample_bank.add_sample_name("SAMPLE2")

        sample_bank["SAMPLE1"].add_sequence(".....G......")
        sample_bank["SAMPLE2"].add_sequence("..........*.")

        exp_variant1 = Variant(sample_bank.reference.chrom, 105, "T", "G")
        exp_variant2 = Variant(sample_bank.reference.chrom, 109, "GA", "G")
        self.assertEqual(sample_bank["SAMPLE1"].variants, {exp_variant1})
        self.assertEqual(sample_bank["SAMPLE2"].variants, {exp_variant2})
        self.assertEqual(sample_bank.variants, {exp_variant1, exp_variant2})
예제 #23
0
    def test_should_find_correct_variants(self):
        n_copies1 = 1
        n_copies2 = 5
        self.setParallelAndSerialVariantCallers(n_copies1, n_copies2)
        self.vc_wrapper_parallel.add_additional_command("numberOfJobs", "2")
        self.vc_wrapper_parallel.add_additional_command("workDir", self.vc_work_dir)
        self.vc_wrapper_parallel.add_additional_command("allowMNPCalls", False)
        self.vc_wrapper_parallel.run()

        expected_vars = set()
        for i in range(0, n_copies1):
            expected_vars.update({
                Variant(self.chrom1, 3 + i * self.repeat_length1, "CTT", "C"),
                Variant(self.chrom1, 11 + i * self.repeat_length1, "T", "TCTG"),
                Variant(self.chrom1, 18 + i * self.repeat_length1, "GT", "G"),
                Variant(self.chrom1, 25 + i * self.repeat_length1, "C", "T"),
                Variant(self.chrom1, 37 + i * self.repeat_length1, "G", "A"),
                Variant(self.chrom1, 40 + i * self.repeat_length1, "G", "T"),
            })

        for i in range(0, n_copies2):
            expected_vars.update({
                Variant(self.chrom2, 7 + i * self.repeat_length2, "AT", "A"),
                Variant(self.chrom2, 22 + i * self.repeat_length2, "C", "T"),
            })

        actual_parallel_variants = self.vc_wrapper_parallel.get_variant_callset(self).get_variants()
        self.assertEqual(expected_vars, actual_parallel_variants)
예제 #24
0
    def test_bad_reads_filter_not_applied_when_median_read_is_good(self):
        svc = SVCDriver(self) \
            .with_var_filters("BR") \
            .with_bad_reads_window_size(7) \
            .with_min_bad_reads_score(20)

        svc.with_ref_sequence(
            # 1234567890123456789
            "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG").with_read(
                "................G...........................",
                "           1      1                         ",
                n_rev=10,
                n_fwd=10).with_read(
                    "................G...........................",
                    "         4444444 4444444                    ",
                    n_rev=11,
                    n_fwd=10)

        expect = svc.call()
        vcf_expectation = expect.with_output_vcf()
        vcf_expectation.record_count(1)

        vcf_expectation \
            .has_record_for_variant(Variant(DEFAULT_CHROM, 16, "T", "G")) \
            .with_no_filters()
예제 #25
0
    def test_should_have_near_zero_RR_genotype_likelihood_for_hom_alt_call(
            self):
        chr1 = 'chr1'
        sample_bank = SampleBank("TTTTTAAAAAAAAAAAAAAAAAAAA", chrom=chr1)

        sequence_bank_1 = sample_bank.add_sample_name('sample_1')
        sequence_bank_1.add_sequence(".........................",
                                     n_fwd=20,
                                     n_rev=20)

        sequence_bank_2 = sample_bank.add_sample_name('sample_2')
        sequence_bank_2.add_sequence("............C............",
                                     n_fwd=20,
                                     n_rev=20)

        vc_wrapper_builder = VariantCallerBuilderFromSampleBank(
            sample_bank, self.work_dir)
        variant_output = vc_wrapper_builder.build().run().output_vcf

        vcf_expectation = VCFExpectation(self, variant_output)
        record_expectation = vcf_expectation.has_record_for_variant(
            Variant(chr1, 12, "A", "C"))
        sample_expectation = record_expectation.with_sample("sample_1")

        sample_expectation.has_genotype("0|0").has_RR_genotype_likelihood(0.0)
예제 #26
0
    def test_should_not_apply_bad_reads_to_insertion_if_all_supporting_reads_have_high_base_qualities(
            self):
        svc = SVCDriver(self) \
            .with_var_filters("BR") \
            .with_bad_reads_window_size(3) \
            .with_min_bad_reads_score(15)

        svc.with_ref_sequence(
            # 1234567890123 456789
            "AAAGCGTACAACCG*GGTTAGTCACAAACCCGTTACGTATGCATG").with_read(
                "..............*..G...........................",
                "                 1                           ",
                n_rev=11,
                n_fwd=10)
        svc.with_read("..............T..............................",
                      n_rev=10,
                      n_fwd=10)

        expect = svc.call()
        vcf_expectation = expect.with_output_vcf()
        vcf_expectation.record_count(1)

        vcf_expectation \
            .has_record_for_variant(Variant(DEFAULT_CHROM, 13, "G", "GT")) \
            .with_no_filters()
예제 #27
0
    def test_bad_reads_filter_not_applied_if_one_sample_is_not_naughty(self):
        svc = SVCDriver(self)
        svc.with_var_filters("BR")
        svc.with_bad_reads_window_size(7)
        svc.with_min_bad_reads_score(13)

        svc.with_ref_sequence(
            # 1234567890123456789
            "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG").with_read(
                "................G...........................",
                "         3333333 3333333                    ",
                sample_name="GOOD",
                n_rev=2,
                n_fwd=2).with_read(
                    "................G...........................",
                    "         0000000 0000000                    ",
                    sample_name="BAD",
                    n_rev=10,
                    n_fwd=10).with_read(
                        "................G...........................",
                        "         00000      0000                    ",
                        sample_name="UGLY",
                        n_rev=10,
                        n_fwd=10)

        expect = svc.call()
        vcf_expectation = expect.with_output_vcf()
        vcf_expectation.record_count(1)

        vcf_expectation \
            .has_record_for_variant(Variant(DEFAULT_CHROM, 16, "T", "G")) \
            .with_no_filters()
예제 #28
0
    def test_should_generate_variant_from_ascii_text(self):
        ref = "ATAAAAAAAAAT"
        alt_1 = ".A........*."
        alt_2 = ".C.........."
        variant_generator = AsciiVariantGenerator(ReferenceChromosome(ref))

        gen_vars = variant_generator.get_variants([alt_1, alt_2])

        self.assertEqual(
            gen_vars,
            {
                Variant(variant_generator.reference.chrom, 1, "T", "A"),
                Variant(variant_generator.reference.chrom, 1, "T", "C"),
                Variant(variant_generator.reference.chrom, 9, "AA", "A")
            }
        )
예제 #29
0
    def test_doesnt_give_a_flying_damn_about_spurious_filter_header(self):
        chrom = "22"
        variant = Variant(chrom, 11, "A", "C")

        schema = Schema()
        complex_filter_name = '.+-*\\/~@?!%^&><=\"\'(){}[]_|'
        schema.set_filter(complex_filter_name, 'unusual characters')

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"),
                                schema=schema)
        gv_builder.with_record_from_variant(variant,
                                            filters={complex_filter_name})
        gv_builder.build().index()
        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call(expected_success=True)
        expect .with_output_vcf()\
            .has_record_for_variant(variant)\
            .with_sample(dodgy_sample)\
            .has_genotype("1/1")
예제 #30
0
    def test_doesnt_give_a_flying_damn_about_spurious_filters(self):
        chrom = "22"
        variant = Variant(chrom, 11, "A", "C")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant,
                                            filters={"#$.:@$%$%^&**()7!"})
        gv_builder.build().index()
        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call(expected_success=True)
        expect.with_output_vcf()\
            .has_record_for_variant(variant)\
            .with_sample(dodgy_sample)\
            .has_genotype("1/1")