Exemplo n.º 1
0
    def test_raises_if_genotyping_file_is_not_expected_format(self):

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.build()  # note: not compressed or indexed

        driver = SVCDriver(self)
        driver.with_ref_sequence("ACGCCCCCTGCAAAAAAAAAA", ).with_read(
            "....G................",
            n_fwd=5,
            n_rev=5,
        ).with_genotype_alleles(gv_builder.filename).with_verbosity(0)

        driver.call(expected_success=False)\
            .unexpected_genotype_file_format(gv_builder.filename)
Exemplo n.º 2
0
    def test_should_handle_complex_variant_input(self):
        chrom = "22"

        variant = Variant(chrom, 10, "CAA", "CA")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call()
        expect.with_log()\
            .input_variant_trimmed_warning(variant, Variant(chrom, 11, "A", ""))
        expect.with_output_vcf()\
            .record_count(1)
Exemplo n.º 3
0
    def test_doesnt_give_a_flying_damn_about_spurious_filters(self):
        chrom = "22"
        variant = Variant(chrom, 11, "A", "C")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant,
                                            filters={"#$.:@$%$%^&**()7!"})
        gv_builder.build().index()
        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call(expected_success=True)
        expect.with_output_vcf()\
            .has_record_for_variant(variant)\
            .with_sample(dodgy_sample)\
            .has_genotype("1/1")
Exemplo n.º 4
0
    def test_doesnt_give_a_flying_damn_about_spurious_filter_header(self):
        chrom = "22"
        variant = Variant(chrom, 11, "A", "C")

        schema = Schema()
        complex_filter_name = '.+-*\\/~@?!%^&><=\"\'(){}[]_|'
        schema.set_filter(complex_filter_name, 'unusual characters')

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"),
                                schema=schema)
        gv_builder.with_record_from_variant(variant,
                                            filters={complex_filter_name})
        gv_builder.build().index()
        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call(expected_success=True)
        expect .with_output_vcf()\
            .has_record_for_variant(variant)\
            .with_sample(dodgy_sample)\
            .has_genotype("1/1")
Exemplo n.º 5
0
    def test_genotypes_mnp_correctly_with_supporting_reads(self):
        chrom = "22"
        variant = Variant(chrom, 11, "AAA", "CAC")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.C.......",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_read(
                    "...........C.........",
                    n_fwd=5,
                    n_rev=5,
                    chrom=chrom,
                    sample_name=dodgy_sample).with_genotype_alleles(
                        gv_builder.compressed_filename)

        expect = driver.call()
        expect.with_output_vcf()\
            .has_record_for_variant(variant)\
            .with_sample(dodgy_sample)\
            .has_genotype("./1")
Exemplo n.º 6
0
    def test_genotypes_variant_correctly_complex_indel_which_is_snp_and_deletion(
            self):
        chrom = "22"
        variant = Variant(chrom, 10, "CA", "T")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        sample = "bobs_your_uncle"
        driver.with_ref_sequence("ACGCCCCCTGCAAAAAAAAAAA",
                                 chrom=chrom,
                                 pos_from=0).with_read(
                                     "..........T*..........",
                                     n_fwd=5,
                                     n_rev=5,
                                     chrom=chrom,
                                     sample_name=sample).with_genotype_alleles(
                                         gv_builder.compressed_filename)

        expect = driver.call()
        expect.with_output_vcf()\
            .has_record_for_variant(variant)\
            .with_sample(sample)\
            .has_genotype("1/1")
Exemplo n.º 7
0
    def test_gets_correct_genotype_if_not_fully_left_aligned(self):
        chrom = "22"

        variant = Variant(chrom, 12, "AA", "A")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAATACGCCCCCTACGCCCCCT", chrom=chrom,
            pos_from=0).with_read(
                "...................*...................",
                n_fwd=10,
                n_rev=10,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call()
        expect.with_output_vcf()\
            .record_count(1)\
            .has_record_for_variant(variant)\
            .with_sample(dodgy_sample).has_genotype("1/1")
Exemplo n.º 8
0
    def calls_variants(self, ref, sequence_list, candidate_ascii_haplotypes, prior, expected_ascii_haplotypes):
        sample_bank = SampleBank(ref)
        sample_bank.add_sample_with_seqs_and_quals("TEST", sequence_list, 1, 0)

        variant_generator = AsciiVariantGenerator(sample_bank.reference)
        candidate_variants = variant_generator.get_variants(candidate_ascii_haplotypes)
        expected_variants = variant_generator.get_variants(expected_ascii_haplotypes)

        candidate_variant_list = VCFBuilder(path.join(self.work_dir, "candiate_variants.vcf"))
        candidate_variant_list.schema.set_info_data('AF', 'A', 'Float', 'Allele Frequency')
        for var in candidate_variants:
            candidate_variant_list.with_record_from_variant(
                var, info=InfoData(candidate_variant_list.schema, {"AF": prior})
            )
        candidate_variant_list.build().index()

        vc_wrapper_builder = VariantCallerBuilderFromSampleBank(sample_bank, self.work_dir)
        vc_wrapper_builder.configuration[CANDIDATE_VARIANTS_FILE_KEY] = candidate_variant_list.compressed_filename
        callset = vc_wrapper_builder.build().run().get_variant_callset(self)

        self.assertEqual(callset.get_variants(), set(expected_variants))
Exemplo n.º 9
0
    def test_raises_if_genotyping_file_not_indexed(self):
        chrom = "22"
        variant = Variant(chrom, 11, "A", "C")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().bgzip()  # note: no index

        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename).with_verbosity(0)

        driver.call(expected_success=False)\
            .missing_genotype_index_file(gv_builder.compressed_filename_index)
Exemplo n.º 10
0
    def test_should_have_zero_bad_reads_for_candidate_variant_with_no_reads_covering_variant(self):
        chrom = "1"
        candidate_variant_list = VCFBuilder(path.join(self.work_dir, "candiate_variants.vcf"))
        candidate_variant_list.schema.set_info_data('AF', 'A', 'Float', 'Allele Frequency')
        variant_1 = Variant(chrom, 30, 'T', 'C')
        candidate_variant_list.with_record_from_variant(
            variant_1, info=InfoData(candidate_variant_list.schema, {"AF": [0.72]}))
        candidate_variant_list.build().index()

        svc_driver = SVCDriver(self)\
            .with_allow_MNP_calls(True)\
            .with_ref_sequence(
                "TGTTATTAATCCCTTGTCAGATGTTATTAATCCCTTGTCAGTCCCTTGTCAGT", chrom=chrom)\
            .with_read(
                "...........................C.. ......................", n_fwd=10, n_rev=10, sample_name='sample_1')\
            .with_read(
                "                                                     ", n_fwd=10, n_rev=10, sample_name='sample_2')\
            .with_candidate_variants_file(candidate_variant_list.compressed_filename)

        expect = svc_driver.call()

        vcf_expect = expect.with_output_vcf()
        vcf_expect.missing_record_for_variant(variant_1)
Exemplo n.º 11
0
    def test_should_raise_if_output_ref_calls_is_switched_on(self):
        chrom = "22"
        variant = Variant(chrom, 10, "CAA", "CA")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename).with_output_ref_calls(True)

        driver.call(
            False
        ).genotyping_is_incompatible_with_outputting_reference_calls_error()
Exemplo n.º 12
0
    def test_doesnt_call_extra_variants(self):
        chrom = "22"
        variant = Variant("1", 11, "A", "C")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "....G................",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call()
        expect \
            .with_output_vcf() \
            .record_count(0)