示例#1
0
  def test_create_processed_variant_move_alt_info_extra_values(self):
    header_fields = vcf_header_util.make_header({'A1': '1', 'A2': 'A'})
    variant = self._get_sample_variant()
    # Add a value to `A2` (it only has two alternate bases, so this is invalid).
    variant.info['A2'] = ['data1', 'data2', 'data3']

    # Ensure error is raised by default.
    factory = processed_variant.ProcessedVariantFactory(
        header_fields,
        split_alternate_allele_info_fields=True)
    with self.assertRaises(ValueError):
      _ = factory.create_processed_variant(variant)

    # Try again with allow_alternate_allele_info_mismatch=True.
    factory = processed_variant.ProcessedVariantFactory(
        header_fields,
        split_alternate_allele_info_fields=True,
        allow_alternate_allele_info_mismatch=True)
    proc_var = factory.create_processed_variant(variant)
    alt1 = processed_variant.AlternateBaseData('A')
    alt1._info = {'A2': 'data1'}
    alt2 = processed_variant.AlternateBaseData('TT')
    alt2._info = {'A2': 'data2'}
    self.assertEqual(proc_var.alternate_data_list, [alt1, alt2])
    self.assertFalse(proc_var.non_alt_info.has_key('A2'))
示例#2
0
 def test_create_processed_variant_move_alt_info_and_annotation(self):
   variant, header_fields = self._get_sample_variant_and_header_with_csq()
   counter_factory = _CounterSpyFactory()
   factory = processed_variant.ProcessedVariantFactory(
       header_fields,
       split_alternate_allele_info_fields=True,
       annotation_fields=['CSQ'],
       counter_factory=counter_factory)
   proc_var = factory.create_processed_variant(variant)
   alt1 = processed_variant.AlternateBaseData('A')
   alt1._info = {
       'A2': 'data1',
       'CSQ': [
           {annotation_parser.ANNOTATION_ALT: 'A', 'Consequence': 'C1',
            'IMPACT': 'I1', 'SYMBOL': 'S1', 'Gene': 'G1'},
           {annotation_parser.ANNOTATION_ALT: 'A', 'Consequence': 'C3',
            'IMPACT': 'I3', 'SYMBOL': 'S3', 'Gene': 'G3'}]
   }
   alt2 = processed_variant.AlternateBaseData('TT')
   alt2._info = {
       'A2': 'data2',
       'CSQ': [
           {annotation_parser.ANNOTATION_ALT: 'TT', 'Consequence': 'C2',
            'IMPACT': 'I2', 'SYMBOL': 'S2', 'Gene': 'G2'}]
   }
   self.assertEqual(proc_var.alternate_data_list, [alt1, alt2])
   self.assertFalse(proc_var.non_alt_info.has_key('A2'))
   self.assertFalse(proc_var.non_alt_info.has_key('CSQ'))
   self.assertEqual(counter_factory.counter_map[
       CEnum.VARIANT.value].get_value(), 1)
   self.assertEqual(counter_factory.counter_map[
       CEnum.ANNOTATION_ALT_MATCH.value].get_value(), 3)
   self.assertEqual(
       counter_factory.counter_map[
           CEnum.ANNOTATION_ALT_MISMATCH.value].get_value(), 0)
示例#3
0
 def test_create_processed_variant_annotation_alt_prefix(self):
     # The returned variant is ignored as we create a custom one next.
     _, header_fields = self._get_sample_variant_and_header_with_csq()
     variant = vcfio.Variant(
         reference_name='19',
         start=11,
         end=12,
         reference_bases='C',
         alternate_bases=['CT', 'CC', 'CCC'],
         names=['rs1'],
         quality=2,
         filters=['PASS'],
         info={'CSQ': ['T|C1|I1|S1|G1', 'C|C2|I2|S2|G2', 'CC|C3|I3|S3|G3']})
     counter_factory = _CounterSpyFactory()
     factory = processed_variant.ProcessedVariantFactory(
         header_fields,
         split_alternate_allele_info_fields=True,
         annotation_fields=['CSQ'],
         counter_factory=counter_factory)
     proc_var = factory.create_processed_variant(variant)
     alt1 = processed_variant.AlternateBaseData('CT')
     alt1._info = {
         'CSQ': [{
             annotation_parser.ANNOTATION_ALT: 'T',
             'Consequence': 'C1',
             'IMPACT': 'I1',
             'SYMBOL': 'S1',
             'Gene': 'G1'
         }]
     }
     alt2 = processed_variant.AlternateBaseData('CC')
     alt2._info = {
         'CSQ': [{
             annotation_parser.ANNOTATION_ALT: 'C',
             'Consequence': 'C2',
             'IMPACT': 'I2',
             'SYMBOL': 'S2',
             'Gene': 'G2'
         }]
     }
     alt3 = processed_variant.AlternateBaseData('CCC')
     alt3._info = {
         'CSQ': [{
             annotation_parser.ANNOTATION_ALT: 'CC',
             'Consequence': 'C3',
             'IMPACT': 'I3',
             'SYMBOL': 'S3',
             'Gene': 'G3'
         }]
     }
     self.assertEqual(proc_var.alternate_data_list, [alt1, alt2, alt3])
     self.assertFalse(proc_var.non_alt_info.has_key('CSQ'))
     self.assertEqual(
         counter_factory.counter_map[CEnum.VARIANT.value].get_value(), 1)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MATCH.value].get_value(), 3)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MISMATCH.value].get_value(), 0)
 def test_create_processed_variant_mismatched_annotation_alt(self):
     # This is like `test_create_processed_variant_move_alt_info_and_annotation`
     # with the difference that it has an extra alt annotation which does not
     # match any alts.
     variant, header_fields = self._get_sample_variant_and_header_with_csq()
     variant.info['CSQ'] = vcfio.VariantInfo(data=[
         'A|C1|I1|S1|G1', 'TT|C2|I2|S2|G2', 'A|C3|I3|S3|G3',
         'ATAT|C3|I3|S3|G3'
     ],
                                             field_count='.')
     counter_factory = _CounterSpyFactory()
     factory = processed_variant.ProcessedVariantFactory(
         header_fields,
         split_alternate_allele_info_fields=True,
         annotation_fields=['CSQ'],
         counter_factory=counter_factory)
     proc_var = factory.create_processed_variant(variant)
     alt1 = processed_variant.AlternateBaseData('A')
     alt1._info = {
         'A2':
         'data1',
         'CSQ': [{
             processed_variant._ANNOTATION_ALT: 'A',
             'Consequence': 'C1',
             'IMPACT': 'I1',
             'SYMBOL': 'S1',
             'Gene': 'G1'
         }, {
             processed_variant._ANNOTATION_ALT: 'A',
             'Consequence': 'C3',
             'IMPACT': 'I3',
             'SYMBOL': 'S3',
             'Gene': 'G3'
         }]
     }
     alt2 = processed_variant.AlternateBaseData('TT')
     alt2._info = {
         'A2':
         'data2',
         'CSQ': [{
             processed_variant._ANNOTATION_ALT: 'TT',
             'Consequence': 'C2',
             'IMPACT': 'I2',
             'SYMBOL': 'S2',
             'Gene': 'G2'
         }]
     }
     self.assertEqual(proc_var.alternate_data_list, [alt1, alt2])
     self.assertFalse(proc_var.non_alt_info.has_key('A2'))
     self.assertFalse(proc_var.non_alt_info.has_key('CSQ'))
     self.assertEqual(
         counter_factory.counter_map[CEnum.VARIANT.value].get_value(), 1)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MATCH.value].get_value(), 2)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MISMATCH.value].get_value(), 1)
示例#5
0
 def test_create_processed_variant_annotation_alt_allele_num(self):
   csq_info = parser._Info(
       id=None, num='.', type=None,
       desc='some desc Allele|Consequence|IMPACT|ALLELE_NUM',
       source=None, version=None)
   header_fields = vcf_header_io.VcfHeader(infos={'CSQ': csq_info})
   variant = vcfio.Variant(
       reference_name='19', start=11, end=12, reference_bases='C',
       # The following represent a SNV and an insertion, resp.
       alternate_bases=['T', 'CT'],
       names=['rs1'], quality=2,
       filters=['PASS'],
       # Note that in the minimal mode of VEP, 'T' is an ambiguous annotation
       # ALT because it can map to either the 'T' SNV or the 'CT' insertion.
       # But because there is ALLELE_NUM there should be no ambiguity.
       # The last four annotations have incorrect ALLELE_NUMs.
       info={'CSQ': ['T|C1|I1|1', 'T|C2|I2|2', 'T|C3|I3|0', 'T|C4|I4|3',
                     'T|C5|I5|TEST', 'T|C6|I6|']})
   counter_factory = _CounterSpyFactory()
   factory = processed_variant.ProcessedVariantFactory(
       header_fields,
       split_alternate_allele_info_fields=True,
       annotation_fields=['CSQ'],
       use_allele_num=True,
       minimal_match=True,  # This should be ignored by the factory method.
       counter_factory=counter_factory)
   proc_var = factory.create_processed_variant(variant)
   alt1 = processed_variant.AlternateBaseData('T')
   alt1._info = {
       'CSQ': [
           {annotation_parser.ANNOTATION_ALT: 'T',
            'Consequence': 'C1', 'IMPACT': 'I1', 'ALLELE_NUM': '1'}]
   }
   alt2 = processed_variant.AlternateBaseData('CT')
   alt2._info = {
       'CSQ': [
           {annotation_parser.ANNOTATION_ALT: 'T',
            'Consequence': 'C2', 'IMPACT': 'I2', 'ALLELE_NUM': '2'}]
   }
   self.assertEqual(proc_var.alternate_data_list, [alt1, alt2])
   self.assertFalse(proc_var.non_alt_info.has_key('CSQ'))
   self.assertEqual(counter_factory.counter_map[
       CEnum.VARIANT.value].get_value(), 1)
   self.assertEqual(counter_factory.counter_map[
       CEnum.ANNOTATION_ALT_MATCH.value].get_value(), 2)
   self.assertEqual(
       counter_factory.counter_map[
           CEnum.ANNOTATION_ALT_MISMATCH.value].get_value(), 0)
   self.assertEqual(
       counter_factory.counter_map[
           CEnum.ANNOTATION_ALT_MINIMAL_AMBIGUOUS.value].get_value(), 0)
   self.assertEqual(
       counter_factory.counter_map[
           CEnum.ALLELE_NUM_INCORRECT.value].get_value(), 4)
 def test_create_processed_variant_move_alt_info(self):
     variant = self._get_sample_variant()
     header_fields = vcf_header_parser.HeaderFields({}, {})
     factory = processed_variant.ProcessedVariantFactory(
         header_fields, split_alternate_allele_info_fields=True)
     proc_var = factory.create_processed_variant(variant)
     alt1 = processed_variant.AlternateBaseData('A')
     alt1._info = {'A2': 'data1'}
     alt2 = processed_variant.AlternateBaseData('TT')
     alt2._info = {'A2': 'data2'}
     self.assertEqual(proc_var.alternate_data_list, [alt1, alt2])
     self.assertFalse(proc_var.non_alt_info.has_key('A2'))
示例#7
0
 def test_create_processed_variant_annotation_alt_minimal(self):
     # The returned variant is ignored as we create a custom one next.
     _, header_fields = self._get_sample_variant_and_header_with_csq()
     variant = vcfio.Variant(
         reference_name='19',
         start=11,
         end=12,
         reference_bases='CC',
         # The following represent a SNV, an insertion, and a deletion, resp.
         alternate_bases=['CT', 'CCT', 'C'],
         names=['rs1'],
         quality=2,
         filters=['PASS'],
         # Note that in the minimal mode, 'T' is an ambiguous annotation ALT
         # because it can map to either the 'CT' SNV or the 'CCT' insertion.
         # It is not ambiguous in the non-minimal mode (it only maps to `CT`).
         info={'CSQ': ['T|C1|I1|S1|G1', '-|C2|I2|S2|G2']})
     counter_factory = _CounterSpyFactory()
     factory = processed_variant.ProcessedVariantFactory(
         header_fields,
         split_alternate_allele_info_fields=True,
         annotation_fields=['CSQ'],
         minimal_match=True,
         counter_factory=counter_factory)
     proc_var = factory.create_processed_variant(variant)
     alt1 = processed_variant.AlternateBaseData('CT')
     alt1._info = {}
     alt2 = processed_variant.AlternateBaseData('CCT')
     alt2._info = {}
     alt3 = processed_variant.AlternateBaseData('C')
     alt3._info = {
         'CSQ': [{
             annotation_parser.ANNOTATION_ALT: '-',
             'Consequence': 'C2',
             'IMPACT': 'I2',
             'SYMBOL': 'S2',
             'Gene': 'G2'
         }]
     }
     self.assertEqual(proc_var.alternate_data_list, [alt1, alt2, alt3])
     self.assertFalse(proc_var.non_alt_info.has_key('CSQ'))
     self.assertEqual(
         counter_factory.counter_map[CEnum.VARIANT.value].get_value(), 1)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MATCH.value].get_value(), 1)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MISMATCH.value].get_value(), 0)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MINIMAL_AMBIGUOUS.value].get_value(), 1)
示例#8
0
 def test_create_processed_variant_no_change(self):
   variant = self._get_sample_variant()
   header_fields = vcf_header_util.make_header({'A1': '1', 'A2': 'A'})
   counter_factory = _CounterSpyFactory()
   factory = processed_variant.ProcessedVariantFactory(
       header_fields,
       split_alternate_allele_info_fields=False,
       counter_factory=counter_factory)
   proc_var = factory.create_processed_variant(variant)
   # In this mode, the only difference between the original `variant` and
   # `proc_var` should be that INFO fields are copied to `_non_alt_info` map
   # and `_alternate_datas` are filled with alternate bases information only.
   proc_var_synthetic = processed_variant.ProcessedVariant(variant)
   proc_var_synthetic._non_alt_info = {'A1': 'some data',
                                       'A2': ['data1', 'data2']}
   proc_var_synthetic._alternate_datas = [
       processed_variant.AlternateBaseData(a) for a in ['A', 'TT']]
   self.assertEqual([proc_var_synthetic], [proc_var])
   self.assertEqual(counter_factory.counter_map[
       CEnum.VARIANT.value].get_value(), 1)
   self.assertEqual(counter_factory.counter_map[
       CEnum.ANNOTATION_ALT_MATCH.value].get_value(), 0)
   self.assertEqual(
       counter_factory.counter_map[
           CEnum.ANNOTATION_ALT_MISMATCH.value].get_value(), 0)
 def test_create_processed_variant_symbolic_and_breakend_annotation_alt(
         self):
     # The returned variant is ignored as we create a custom one next.
     _, header_fields = self._get_sample_variant_and_header_with_csq()
     variant = vcfio.Variant(
         reference_name='19',
         start=11,
         end=12,
         reference_bases='C',
         alternate_bases=['<SYMBOLIC>', '[13:123457[.', 'C[10:10357[.'],
         names=['rs1'],
         quality=2,
         filters=['PASS'],
         info={
             'CSQ':
             vcfio.VariantInfo(
                 data=[
                     'SYMBOLIC|C1|I1|S1|G1', '[13|C2|I2|S2|G2',
                     'C[10|C3|I3|S3|G3', 'C[1|C3|I3|S3|G3'
                 ],  # The last one does not match any alts.
                 field_count='.')
         })
     counter_factory = _CounterSpyFactory()
     factory = processed_variant.ProcessedVariantFactory(
         header_fields,
         split_alternate_allele_info_fields=True,
         annotation_fields=['CSQ'],
         counter_factory=counter_factory)
     proc_var = factory.create_processed_variant(variant)
     alt1 = processed_variant.AlternateBaseData('<SYMBOLIC>')
     alt1._info = {
         'CSQ': [{
             processed_variant._ANNOTATION_ALT: 'SYMBOLIC',
             'Consequence': 'C1',
             'IMPACT': 'I1',
             'SYMBOL': 'S1',
             'Gene': 'G1'
         }]
     }
     alt2 = processed_variant.AlternateBaseData('[13:123457[.')
     alt2._info = {
         'CSQ': [{
             processed_variant._ANNOTATION_ALT: '[13',
             'Consequence': 'C2',
             'IMPACT': 'I2',
             'SYMBOL': 'S2',
             'Gene': 'G2'
         }]
     }
     alt3 = processed_variant.AlternateBaseData('C[10:10357[.')
     alt3._info = {
         'CSQ': [{
             processed_variant._ANNOTATION_ALT: 'C[10',
             'Consequence': 'C3',
             'IMPACT': 'I3',
             'SYMBOL': 'S3',
             'Gene': 'G3'
         }]
     }
     self.assertEqual(proc_var.alternate_data_list, [alt1, alt2, alt3])
     self.assertFalse(proc_var.non_alt_info.has_key('CSQ'))
     self.assertEqual(
         counter_factory.counter_map[CEnum.VARIANT.value].get_value(), 1)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MATCH.value].get_value(), 3)
     self.assertEqual(
         counter_factory.counter_map[
             CEnum.ANNOTATION_ALT_MISMATCH.value].get_value(), 1)