示例#1
0
 def test_getitem_method_returns_expected_value(self):
     genotype_data = self.sample_data.get_genotype_data("sample_name1")
     self.assertEqual(genotype_data["GT"], GenotypeCall("./."))
     self.assertEqual(genotype_data["key"], [1, 2])
     genotype_data = self.sample_data.get_genotype_data("sample_name2")
     self.assertEqual(genotype_data["GT"], GenotypeCall("0/1"))
     self.assertEqual(genotype_data["key"], [])
示例#2
0
 def test_should_get_homozygous_alt_if_one_is_homozyzgous_ref_and_other_is_homozygous_alt(
         self):
     genotype_call_1 = GenotypeCall("0/0")
     genotype_call_2 = GenotypeCall("1/1")
     self.assertEqual(
         merge_genotype_calls(genotype_call_1, genotype_call_2),
         genotype_call_2)
示例#3
0
 def test_should_combine_two_unphased_heterozygous_genotypes_to_homozygous_alt(
         self):
     genotype_call_1 = GenotypeCall("0/1")
     genotype_call_2 = GenotypeCall("0/1")
     self.assertEqual(
         GenotypeCall("1/1"),
         merge_genotype_calls(genotype_call_1, genotype_call_2))
示例#4
0
    def test_read_sample_data(self):
        schema = self.__get_example_schema("vcf_example.vcf")
        sample_schema = [key for key, _ in schema.iter_sample_data()]

        sample_data = SampleData(sample_schema, ['sample1'])

        sample_data.add_sample_data("sample1", "GT", GenotypeCall("1|0"))
        sample_data.add_sample_data("sample1", "PL", [3000, 0, 3000])
        sample_data.add_sample_data("sample1", "GQ", [1000])
        sample_data.add_sample_data("sample1", "PQ", [2000])
        sample_data.add_sample_data("sample1", "PS", [60000])
        sample_data.add_sample_data("sample1", "AD", [140, 110])
        sample_data.add_sample_data("sample1", "DP", [250])
        sample_data.add_sample_data("sample1", "VAF", [0.4])

        self.assertTrue(sample_data.has_sample("sample1"))
        self.assertEqual(sample_data.genotypes(),
                         {"sample1": GenotypeCall("1|0")})
        self.assertEqual(sample_data.get_field("sample1", 'GT'),
                         GenotypeCall("1|0"))
        self.assertEqual(sample_data.get_field("sample1", 'PL'),
                         [3000, 0, 3000])

        genotype_data = sample_data.get_genotype_data("sample1")
        self.assertEqual(genotype_data.genotype(), GenotypeCall("1|0"))
        self.assertEqual(genotype_data['GT'], GenotypeCall("1|0"))
        self.assertEqual(genotype_data['PL'], [3000, 0, 3000])
示例#5
0
 def test_should_get_homozygous_ref_if_combining_two_homozygous_ref_genotypes(
         self):
     genotype_call_1 = GenotypeCall("0/0")
     genotype_call_2 = GenotypeCall("0/0")
     self.assertEqual(
         merge_genotype_calls(genotype_call_1, genotype_call_2),
         GenotypeCall("0/0"))
示例#6
0
    def test_should_give_correct_output_for_different_sample_names(self):
        self.sample_name1 = "SAMPLE_A"
        self.sample_name2 = "SAMPLE_B"

        n_copies1 = 1
        n_copies2 = 5
        self.setParallelAndSerialVariantCallers(n_copies1, n_copies2)
        self.vc_wrapper_parallel.add_additional_command("numberOfJobs", "2")
        self.vc_wrapper_parallel.add_additional_command("workDir", self.vc_work_dir)
        self.vc_wrapper_parallel.run()

        expected_var_A_1 = Variant(self.chrom1, 3, "CTT", "C")
        expected_var_B_1 = Variant(self.chrom2, 7, "AT", "A")

        parallel_variants_with_genotypes = self.vc_wrapper_parallel \
            .get_variant_callset(self) \
            .get_variants_with_genotypes()

        self.assertTrue(expected_var_A_1 in list(parallel_variants_with_genotypes.keys()))
        self.assertTrue(expected_var_B_1 in list(parallel_variants_with_genotypes.keys()))

        self.assertEqual(GenotypeCall("1/1"), parallel_variants_with_genotypes[expected_var_A_1][self.sample_name1])
        self.assertEqual(GenotypeCall("./."), parallel_variants_with_genotypes[expected_var_A_1][self.sample_name2])
        self.assertEqual(GenotypeCall("./."), parallel_variants_with_genotypes[expected_var_B_1][self.sample_name1])
        self.assertEqual(GenotypeCall("1/1"), parallel_variants_with_genotypes[expected_var_B_1][self.sample_name2])
示例#7
0
 def test_values_method_returns_expected_data(self):
     genotype_data = self.sample_data.get_genotype_data("sample_name1")
     self.assertEqual(list(genotype_data.values()),
                      [GenotypeCall("./."), [1, 2]])
     genotype_data = self.sample_data.get_genotype_data("sample_name2")
     self.assertEqual(list(genotype_data.values()),
                      [GenotypeCall("0/1"), []])
示例#8
0
    def test_should_merge_genotype_call_object_in_sample_data(self):
        sample_data1 = SampleData(['GT'], ['sample_name'])
        sample_data1.add_sample_data('sample_name', 'GT', GenotypeCall('0/1'))
        sample_data2 = SampleData(['GT'], ['sample_name'])
        sample_data2.add_sample_data('sample_name', 'GT', GenotypeCall('0/1'))

        sample_data1.merge_genotype_calls(sample_data2.genotypes())

        self.assertEqual(sample_data1.get_field("sample_name", "GT"),
                         GenotypeCall("1/1"))
示例#9
0
    def test_should_fail_if_sample_data_objects_have_different_sample(self):
        sample_data1 = SampleData(['GT'], ['sample_name_1'])
        sample_data1.add_sample_data('sample_name_1', 'GT',
                                     GenotypeCall('0/0'))
        sample_data2 = SampleData(['GT'], ['sample_name_2'])
        sample_data2.add_sample_data('sample_name_2', 'GT',
                                     GenotypeCall('0/0'))

        self.assertRaises(Exception, sample_data1.merge_genotype_calls,
                          sample_data2.genotypes())
示例#10
0
 def test_default_values_are_assigned_when_sample_data_is_constructed(self):
     sample_data = SampleData(['GT', 'key1', 'key2'],
                              ['sample_name1', 'sample_name2'])
     self.assertEqual(sample_data.get_field('sample_name1', 'GT'),
                      GenotypeCall("./."))
     self.assertEqual(sample_data.get_field('sample_name2', 'GT'),
                      GenotypeCall("./."))
     self.assertEqual(sample_data.get_field('sample_name1', 'key1'), [])
     self.assertEqual(sample_data.get_field('sample_name2', 'key1'), [])
     self.assertEqual(sample_data.get_field('sample_name1', 'key2'), [])
     self.assertEqual(sample_data.get_field('sample_name2', 'key2'), [])
示例#11
0
 def test_homozygous_unphased_genotypes(self):
     self.assertEqual((1, ), GenotypeCall('0').normalized_allele_count)
     self.assertEqual((1, ), GenotypeCall('0/0').normalized_allele_count)
     self.assertEqual((1, ), GenotypeCall('0/0/0').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('1').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('1/1').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('1/1/1').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('2').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('2/2').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('2/2/2').normalized_allele_count)
示例#12
0
 def test_should_not_mark_following_as_called(self):
     self.assertFalse(GenotypeCall("./.").is_called())
     self.assertFalse(GenotypeCall("./0").is_called())
     self.assertFalse(GenotypeCall("0/.").is_called())
     self.assertFalse(GenotypeCall("0|.").is_called())
     self.assertFalse(GenotypeCall(".|0").is_called())
     self.assertFalse(GenotypeCall(".").is_called())
     self.assertFalse(GenotypeCall("0").is_called())
     self.assertFalse(GenotypeCall("././.").is_called())
     self.assertFalse(GenotypeCall(".|0|.").is_called())
示例#13
0
 def test_homozygous_phased_genotypes(self):
     self.assertEqual((1, ), GenotypeCall('0').normalized_allele_count)
     self.assertEqual((1, ), GenotypeCall('0|0').normalized_allele_count)
     self.assertEqual((1, ), GenotypeCall('0|0|0').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('1').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('1|1').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('1|1|1').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('2').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('2|2').normalized_allele_count)
     self.assertEqual((0, 1), GenotypeCall('2|2|2').normalized_allele_count)
示例#14
0
 def test_should_mark_following_as_heterozygous(self):
     self.assertTrue(GenotypeCall("0/1").is_heterozygous())
     self.assertTrue(GenotypeCall("1/0").is_heterozygous())
     self.assertTrue(GenotypeCall("1/.").is_heterozygous())
     self.assertTrue(GenotypeCall("./1").is_heterozygous())
     self.assertTrue(GenotypeCall("0|1").is_heterozygous())
     self.assertTrue(GenotypeCall("1|0").is_heterozygous())
     self.assertTrue(GenotypeCall("1|.").is_heterozygous())
     self.assertTrue(GenotypeCall(".|1").is_heterozygous())
     self.assertTrue(GenotypeCall("1|2").is_heterozygous())
示例#15
0
    def calls_variants_with_genotype(
            self,
            ref,
            sequence_list,
            expected_haplotypes=None,
            expected_variants_with_genotypes=None,
            config_dict=None):
        self.__validate_expected_calls(
            expected_haplotypes,
            expected_variants_with_genotypes)
        sample_bank = self.__build_default_sample_bank(ref, sequence_list)
        variant_callset = self.__run_wecall(sample_bank, config_dict)
        wecall_calls = variant_callset.get_variants_with_genotypes()

        if expected_variants_with_genotypes is None:
            expected_calls_for_default_sample = {
                sample_bank.sample_names[0]: expected_haplotypes}
            expected_calls = self.__get_expected_calls_from_sample_ascii_haplotypes(
                expected_calls_for_default_sample, sample_bank.reference)
        else:
            expected_calls = {}
            sample_name = sample_bank.sample_names[0]
            for expected_stub in expected_variants_with_genotypes:
                variant = self._variant_from_stub(
                    sample_bank.reference.chrom, expected_stub)
                expected_calls[variant] = {
                    sample_name: GenotypeCall(
                        expected_stub[3])}

        self.assertDictEqual(expected_calls, wecall_calls)
示例#16
0
    def has_genotype(self, genotype_string):
        actual_genotype_call = self.get_genotype()
        expected_genotype_call = GenotypeCall(genotype_string)

        self.__test_case.assertEqual(expected_genotype_call,
                                     actual_genotype_call)
        return self
示例#17
0
 def test_should_mark_following_as_called(self):
     self.assertTrue(GenotypeCall("0/1").is_called())
     self.assertTrue(GenotypeCall("0|1").is_called())
     self.assertTrue(GenotypeCall("./1").is_called())
     self.assertTrue(GenotypeCall(".|1").is_called())
     self.assertTrue(GenotypeCall("0/2").is_called())
     self.assertTrue(GenotypeCall("1/2").is_called())
     self.assertTrue(GenotypeCall("././1").is_called())
     self.assertTrue(GenotypeCall("0/0/1").is_called())
示例#18
0
    def test_read_VCF_line(self):
        with open(os.path.join(self.data_dir, "vcf_example.vcf"), "r") as vcf_file:
            vcf_handler = VCFReader(vcf_file)
            vcf_handler.read_header()
            self.assertEqual(len(vcf_handler.header.file_metadata), 7)
            self.assertEqual(len(vcf_handler.header.samples), 2)

            records = list(vcf_handler.read_records())
            self.assertEqual(len(records), 2)

            # test first record fully
            self.variant_is_equal(records[0], ("20", 9, set(), "CT", "C"))  # zero=based representation
            self.assertEqual(records[0].filters, set())
            self.assertEqual(records[0].passes_filter, True)

            self.assertEqual(len(records[0].info), 12)
            self.assertEqual(records[0].info["PP"], [3000])
            self.assertEqual(records[0].info["DP"], [250])
            self.assertEqual(records[0].info["DPR"], [140])
            self.assertEqual(records[0].info["DPF"], [110])
            self.assertEqual(records[0].info["VC"], [100])
            self.assertEqual(records[0].info["VCR"], [49])
            self.assertEqual(records[0].info["VCF"], [51])
            self.assertEqual(records[0].info["ABPV"], [0.2])
            self.assertEqual(records[0].info["SBPV"], [0.3])
            self.assertEqual(records[0].info["MQ"], [70])
            self.assertEqual(records[0].info["BR"], [31])
            self.assertEqual(records[0].info["QD"], [None])

            self.assertEqual(records[0].samples, ['sample1', 'sample2'])
            self.assertEqual(records[0].sample_info.get_field('sample1', "GT"), GenotypeCall("0/1"))
            self.assertEqual(records[0].sample_info.get_field('sample2', "GT"), GenotypeCall("1/1"))

            self.assertEqual(records[0].sample_info.get_field('sample1', 'PL'), [3000, 0, 3000])
            self.assertEqual(records[0].sample_info.get_field('sample2', 'PL'), [114, 0, 0])

            self.assertEqual(records[0].sample_info.get_field('sample1', 'GQ'), [1000])
            self.assertEqual(records[0].sample_info.get_field('sample2', 'GQ'), [None])

            # check that ordering in the dictionaries is preserved
            expected_keys = ["PP", "DP", "DPR", "DPF", "VC", "VCR",
                             "VCF", "ABPV", "SBPV", "MQ", "BR", "QD"]

            self.assertEqual(list(records[0].info.keys()), expected_keys)

            # ensure last record is still being read correctly
            self.variant_is_equal(records[-1], ("20", 10, set(), "T", "G"))
示例#19
0
 def test_split_genotype_likelihood_with_missing_genotype_likelihood_diploid(self, log):
     split_func = make_split_sample_alt_func("G", lambda x: x)
     self.assertEqual(
         [[None, None, None], [None, None, None]],
         split_func([1.0, 2.0, 3.0], 2, GenotypeCall("0/1"))
     )
     log.check(
         ('wecall.vcfutils.fieldmetadata', 'WARNING',
          "Incorrect number of values 'G' cardinality, expected 6, got 3"),
     )
示例#20
0
 def test_split_genotype_likelihood_warns_for_non_haploid_diploid(self, log):
     split_func = make_split_sample_alt_func("G", lambda x: x)
     self.assertEqual(
         [[1.0, 2.0], [1.0, 2.0]],
         split_func([1.0, 2.0], 2, GenotypeCall("0/1/2"))
     )
     log.check(
         ('wecall.vcfutils.fieldmetadata', 'WARNING',
          "Unable to handle ploidy other than haploid or diploid."),
     )
示例#21
0
    def test_should_drop_genotype_likelihood_with_mismatch_ploidy(self):
        schema = Schema()
        schema.set_sample_data('GT', '1', 'String', '')
        schema.set_sample_data('GL', 'G', 'Float', '')
        schema.samples = ['foo']
        records = list(
            generate_records(schema, [
                'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GT:GL',
                '0/1:1,2,3,4'
            ]))

        self.assertEqual(GenotypeCall("0/1"),
                         records[0].sample_info.get_field('foo', 'GT'))
        self.assertEqual([None, None, None],
                         records[0].sample_info.get_field('foo', 'GL'))
        self.assertEqual(GenotypeCall("0/0"),
                         records[1].sample_info.get_field('foo', 'GT'))
        self.assertEqual([None, None, None],
                         records[1].sample_info.get_field('foo', 'GL'))
示例#22
0
    def test_should_split_genotype_likelihood_properly(self):
        schema = Schema()
        schema.set_sample_data('GT', '1', 'String', '')
        schema.set_sample_data('GL', 'G', 'Float', '')
        schema.samples = ['foo']
        records = list(
            generate_records(schema, [
                'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GT:GL',
                '0/1:1,2,3,4,5,6'
            ]))

        self.assertEqual(GenotypeCall("0/1"),
                         records[0].sample_info.get_field('foo', 'GT'))
        self.assertEqual([1.0, 2.0, 3.0],
                         records[0].sample_info.get_field('foo', 'GL'))
        self.assertEqual(GenotypeCall("0/0"),
                         records[1].sample_info.get_field('foo', 'GT'))
        self.assertEqual([1.0, 4.0, 6.0],
                         records[1].sample_info.get_field('foo', 'GL'))
示例#23
0
    def __get_expected_calls_from_haplotypes(ascii_strings, reference):
        if len(ascii_strings) != 2:
            raise weCallException(
                "Expected calls have to be defined as a diploid.")
        if not all(len(str) == reference.length_with_deletions()
                   for str in ascii_strings):
            raise weCallException(
                "Ascii haplotypes have to be of the same length as the reference")

        vars_from_hap1 = Sequence(reference, ascii_strings[0]).variants
        vars_from_hap2 = Sequence(reference, ascii_strings[1]).variants

        calls = {}
        for var in vars_from_hap1.intersection(vars_from_hap2):
            calls[var] = GenotypeCall("1/1")
        for var in vars_from_hap1.symmetric_difference(vars_from_hap2):
            calls[var] = GenotypeCall("0/1")

        return calls
示例#24
0
 def test_should_mark_following_as_not_heterozygous(self):
     self.assertFalse(GenotypeCall("./.").is_heterozygous())
     self.assertFalse(GenotypeCall(".|.").is_heterozygous())
     self.assertFalse(GenotypeCall("1/1").is_heterozygous())
     self.assertFalse(GenotypeCall("1|1").is_heterozygous())
     self.assertFalse(GenotypeCall("2/2").is_heterozygous())
     self.assertFalse(GenotypeCall("2|2").is_heterozygous())
示例#25
0
    def test_should_write_missing_values_in_sample_data(self):
        with VCFReaderContextManager(
                os.path.join(self.data_dir, "vcf_example.vcf")) as vcf_handler:
            first_record = next(vcf_handler.read_records())

        sample_data = SampleData(['GT', 'PL', 'GQ'],
                                 ['sample1', 'sample2', 'sample3'])

        sample_data.add_sample_data("sample1", "GT", GenotypeCall("1|0"))
        sample_data.add_sample_data("sample1", "PL", [3000, 0, 3000])
        sample_data.add_sample_data("sample1", "GQ", [1000])

        sample_data.add_sample_data("sample2", "GT", GenotypeCall("1|1"))
        sample_data.add_sample_data("sample2", "PL", [2000, 0, 1000])
        sample_data.add_sample_data("sample2", "GQ", [3])

        first_record.sample_info = sample_data

        print((sample_data.to_vcf_columns()))
        vcf_string = vcf_row_from_record(first_record)
        expected_vcf_string = "20	10	.	CT	C	3000	PASS	PP=3000;DP=250;DPR=140;DPF=110;VC=100;VCR=49;VCF=51;ABPV=0.2;SBPV=0.3;MQ=70.0;BR=31.0;QD=None	GT:PL:GQ	1|0:3000,0,3000:1000	1|1:2000,0,1000:3	./.:.:."  # noqa
        self.assertEqual(expected_vcf_string, vcf_string)
示例#26
0
    def test_read_record_line(self):
        with VCFReaderContextManager(
                os.path.join(self.data_dir, "vcf_example.vcf")) as vcf_handler:

            record_gen = vcf_handler.read_records()
            next_record = next(record_gen)

            self.assertEqual(next_record.chrom, "20")
            self.assertEqual(next_record.pos_from, 9)
            self.assertEqual(next_record.ids, set())
            self.assertEqual(next_record.ref, "CT")
            self.assertEqual(next_record.alt, "C")
            self.assertEqual(next_record.quality, 3000)
            self.assertEqual(next_record.filters, set())
            self.assertEqual(next_record.passes_filter, True)
            self.assertEqual(next_record.from_multi_alt, False)
            self.assertEqual(next_record.type, variant.TYPE_DEL)

            self.assertEqual(next_record.info['PP'], [3000])
            self.assertEqual(next_record.info['DP'], [250])
            self.assertEqual(next_record.info['VC'], [100])
            self.assertEqual(next_record.info['ABPV'], [0.2])
            self.assertEqual(next_record.info['SBPV'], [0.3])
            self.assertEqual(next_record.info['MQ'], [70])
            self.assertEqual(next_record.info['QD'], [None])

            self.assertTrue(next_record.sample_info.has_sample("sample1"))
            self.assertEqual(next_record.genotypes, {
                "sample1": GenotypeCall("1|0"),
                "sample2": GenotypeCall("1|1")
            })
            self.assertEqual(
                next_record.sample_info.get_field("sample1", 'GT'),
                GenotypeCall("1|0"))
            self.assertEqual(
                next_record.sample_info.get_field("sample1", 'PL'),
                [3000, 0, 3000])
            self.assertEqual(
                next_record.sample_info.get_field("sample1", "GQ"), [1000])
示例#27
0
    def __init__(self, key_names, sample_names):
        self.__sample_names = sample_names
        self.__key_to_sample_values = OrderedDict()
        self.__merged_genotypes = False

        for key_name in key_names:
            default_value = []

            if key_name == GENOTYPE_KEY:
                default_value = GenotypeCall('./.')

            self.__key_to_sample_values[key_name] = [
                default_value for _ in range(len(self.__sample_names))
            ]
示例#28
0
 def test_binary_heterozygous_phased_genotypes(self):
     self.assertEqual((1, 1), GenotypeCall('0|1').normalized_allele_count)
     self.assertEqual((1, 1), GenotypeCall('1|0').normalized_allele_count)
     self.assertEqual((1, 1), GenotypeCall('0|2').normalized_allele_count)
     self.assertEqual((1, 1), GenotypeCall('2|0').normalized_allele_count)
     self.assertEqual((0, 1, 1),
                      GenotypeCall('1|2').normalized_allele_count)
     self.assertEqual((0, 1, 1),
                      GenotypeCall('2|1').normalized_allele_count)
示例#29
0
    def test_should_return_phased_heterozygous_genotype_when_merging_two_phased_identical_heterozygous_genotypes(
            self):
        genotype_call_1 = GenotypeCall("1|0")
        genotype_call_2 = GenotypeCall("1|0")
        self.assertEqual(
            merge_genotype_calls(genotype_call_1, genotype_call_2),
            GenotypeCall("1|0"))

        genotype_call_1 = GenotypeCall("0|1")
        genotype_call_2 = GenotypeCall("0|1")
        self.assertEqual(
            merge_genotype_calls(genotype_call_1, genotype_call_2),
            GenotypeCall("0|1"))
示例#30
0
    def test_should_return_default_diploid_genotype(self):
        sample_data = SampleData(['GT', 'GL'], ["NA12878"])

        self.assertEqual(GenotypeCall("./."), GenotypeCall("./."))

        self.assertTrue(sample_data.has_sample("NA12878"))
        self.assertEqual(sample_data.genotypes(),
                         {"NA12878": GenotypeCall("./.")})
        self.assertEqual(sample_data.get_field("NA12878", 'GT'),
                         GenotypeCall("./."))
        self.assertEqual(sample_data.get_field("NA12878", 'GL'), [])

        genotype_data = sample_data.get_genotype_data("NA12878")
        self.assertEqual(genotype_data.genotype(), GenotypeCall("./."))
        self.assertEqual(genotype_data['GT'], GenotypeCall("./."))
        self.assertEqual(genotype_data['GL'], [])