示例#1
0
    def test_split_empty_sample_data_string(self):
        schema = self.__get_example_schema("vcf_example.vcf")
        cols = """1\t11082325\tRS1\tG\tC,A\t.\t.\tPP=.;DP=.;DPR=.;DPF=.;VC=.;VCR=.;VCF=.;ABPV=.;SBPV=.;MQ=.;BR=.;QD=.\tGT:PL:GQ\t1|0:3000,0,3000:1000\t1|1:2000,0,1000:3""".split(
        )  # noqa
        first_record = next(record.generate_records(schema, cols))
        self.assertEqual(first_record.alt, 'C')
        self.assertTrue(first_record.from_multi_alt)

        second_record = next(record.generate_records(schema, cols))
        self.assertEqual(first_record.info, second_record.info)
示例#2
0
 def test_should_return_true_if_no_GL_or_PL_present(self):
     schema = Schema()
     schema.set_sample_data('GT', '1', 'String', '')
     schema.samples = ['foo']
     records = list(
         generate_records(
             schema,
             ['chrZ', '200', '.', 'C', 'A', '.', 'PASS', '.', 'GT', '0/1']))
     self.assertTrue(records[0].sample_info.has_no_likelihoods())
示例#3
0
 def test_should_return_false_if_one_sample_okay_for_GL(self):
     schema = Schema()
     schema.set_sample_data('GT', '1', 'String', '')
     schema.set_sample_data('GL', 'G', 'Float', '')
     schema.samples = ['foo']
     records = list(
         generate_records(schema, [
             'chrZ', '200', '.', 'C', 'A', '.', 'PASS', '.', 'GT:GL',
             '0/1:90,1,120', '0/1:.,.,.'
         ]))
     self.assertFalse(records[0].sample_info.has_no_likelihoods())
示例#4
0
 def test_should_return_true_if_all_likelihoods_are_none_for_PL(self):
     schema = Schema()
     schema.set_sample_data('GT', '1', 'String', '')
     schema.set_sample_data('PL', 'G', 'Float', '')
     schema.samples = ['foo']
     records = list(
         generate_records(schema, [
             'chrZ', '200', '.', 'C', 'A', '.', 'PASS', '.', 'GT:PL',
             '0/1:.,.,.'
         ]))
     self.assertTrue(records[0].sample_info.has_no_likelihoods())
示例#5
0
 def test_should_warn_about_unrecognised_key_in_monoallelic_line(self, log):
     records = list(generate_records(Schema(), [
         'chrZ', '200', '.', 'C', 'T', '.', 'PASS', 'NEW_KEY=value'
     ]))
     for index, record in enumerate(records):
         self.assertEqual(
             (index, ['value']), (index, record.info['NEW_KEY']))
     log.check(
         ('root',
          'WARNING',
          'info field {!r} not defined in schema'.format('NEW_KEY')),
     )
示例#6
0
 def test_should_warn_about_too_many_alts_in_field_of_allelic_cardinality(
         self,
         log):
     schema = Schema()
     schema.set_info_data('key', 'A', 'String', '')
     records = list(generate_records(schema, [
         'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', 'key=a,b,c'
     ]))
     expected = [['a'], ['b']]
     for index, record in enumerate(records):
         self.assertEqual(expected[index], record.info['key'])
     log.check(('wecall.vcfutils.fieldmetadata', 'WARNING',
                'expected 2 items in {!r}'.format([['a'], ['b'], ['c']])), )
示例#7
0
 def test_should_warn_when_GT_is_not_present(self, log):
     schema = Schema()
     schema.set_sample_data('GL', 'G', 'Float', '')
     schema.samples = ['foo']
     records = list(
         generate_records(schema, [
             'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GL', '1,2,3'
         ]))
     for index, record in enumerate(records):
         self.assertEqual(
             (index, ['1', '2', '3']),
             (index, record.sample_info.get_field('foo', 'GL')))
     log.check(('wecall.vcfutils.fieldmetadata', 'WARNING',
                'Unknown ploidy when parsing genotype likelihood'), )
示例#8
0
    def test_should_drop_genotype_likelihood_with_mismatch_ploidy(self):
        schema = Schema()
        schema.set_sample_data('GT', '1', 'String', '')
        schema.set_sample_data('GL', 'G', 'Float', '')
        schema.samples = ['foo']
        records = list(
            generate_records(schema, [
                'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GT:GL',
                '0/1:1,2,3,4'
            ]))

        self.assertEqual(GenotypeCall("0/1"),
                         records[0].sample_info.get_field('foo', 'GT'))
        self.assertEqual([None, None, None],
                         records[0].sample_info.get_field('foo', 'GL'))
        self.assertEqual(GenotypeCall("0/0"),
                         records[1].sample_info.get_field('foo', 'GT'))
        self.assertEqual([None, None, None],
                         records[1].sample_info.get_field('foo', 'GL'))
示例#9
0
    def test_should_split_genotype_likelihood_properly(self):
        schema = Schema()
        schema.set_sample_data('GT', '1', 'String', '')
        schema.set_sample_data('GL', 'G', 'Float', '')
        schema.samples = ['foo']
        records = list(
            generate_records(schema, [
                'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GT:GL',
                '0/1:1,2,3,4,5,6'
            ]))

        self.assertEqual(GenotypeCall("0/1"),
                         records[0].sample_info.get_field('foo', 'GT'))
        self.assertEqual([1.0, 2.0, 3.0],
                         records[0].sample_info.get_field('foo', 'GL'))
        self.assertEqual(GenotypeCall("0/0"),
                         records[1].sample_info.get_field('foo', 'GT'))
        self.assertEqual([1.0, 4.0, 6.0],
                         records[1].sample_info.get_field('foo', 'GL'))
示例#10
0
    def test_should_add_default_parsing_rule_for_unknown_key_in_multiallelic_line(self):
        schema = Schema()
        records = list(generate_records(schema, [
            'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', 'NEW_KEY=value'
        ]))

        self.assertEqual(0, len(list(schema.iter_info_data())))
        for index, record in enumerate(records):
            self.assertEqual(
                (index, ['value']), (index, record.info['NEW_KEY']))
        self.assertEqual(1, len(list(schema.iter_info_data())))

        info_metadata = schema.get_info_data('NEW_KEY')
        self.assertEqual('.', info_metadata.number)
        self.assertEqual('String', info_metadata.data_type)
        self.assertEqual(
            'Inferred from file content during parsing',
            info_metadata.description)
        self.assertEqual('vcfutils', info_metadata.source)
        self.assertEqual('undefined', info_metadata.version)