def test_infer_annotation_types_with_multiple_annotation_fields(self): anno_fields = ['CSQ', 'CSQ_VT'] infos = self._get_annotation_infos() infos['CSQ_VT'] = createInfo( 'CSQ_VT', 'A', 'String', 'Annotations from VEP. Format: Allele|Gene|Position|Score', 'source', 'v') variant = self._get_sample_variant_1() variant.info['CSQ_VT'] = ['A|1|100|1.2', 'A|2|101|1.3'] variant.info['CSQ'] = ['A|1|100|1.2', 'A|2|101|1.3'] inferred_infos = infer_headers_util.infer_info_fields( variant, vcf_header_io.VcfHeader(infos=infos), False, anno_fields) expected_infos = { 'CSQ_Gene_TYPE': self._get_inferred_info('CSQ', 'Gene', 'Integer'), 'CSQ_Position_TYPE': self._get_inferred_info('CSQ', 'Position', 'Integer'), 'CSQ_Score_TYPE': self._get_inferred_info('CSQ', 'Score', 'Float'), 'CSQ_VT_Gene_TYPE': self._get_inferred_info('CSQ_VT', 'Gene', 'Integer'), 'CSQ_VT_Position_TYPE': self._get_inferred_info('CSQ_VT', 'Position', 'Integer'), 'CSQ_VT_Score_TYPE': self._get_inferred_info('CSQ_VT', 'Score', 'Float') } self.assertDictEqual(expected_infos, inferred_infos)
def test_infer_annotation_empty_info(self): anno_fields = ['CSQ'] infos = self._get_annotation_infos() variant = self._get_sample_variant_1() variant.info['CSQ'] = [] inferred_infos = infer_headers_util.infer_info_fields( variant, vcf_header_io.VcfHeader(infos=infos), False, anno_fields) self.assertEqual({}, inferred_infos)
def test_infer_info_fields_no_conflicts(self): variant = self._get_sample_variant_1() infos = { 'IS': createInfo('IS', 1, 'String', ''), 'ISI': createInfo('ISI', 1, 'Integer', ''), 'ISF': createInfo('ISF', 1, 'Float', ''), 'IF': createInfo('IF', 1, 'Float', ''), 'IB': createInfo('IB', 0, 'Flag', ''), 'IA': createInfo('IA', 'A', 'Float', '') } inferred_infos = infer_headers_util.infer_info_fields( variant, vcf_header_io.VcfHeader(infos=infos), infer_headers=True) self.assertEqual({}, inferred_infos)
def test_infer_annotation_types_with_missing(self): anno_fields = ['CSQ'] infos = self._get_annotation_infos() variant = self._get_sample_variant_1() variant.info['CSQ'] = ['A||100|', 'A||101|1.3', 'A|||1.4', 'TT|||'] inferred_infos = infer_headers_util.infer_info_fields( variant, vcf_header_io.VcfHeader(infos=infos), False, anno_fields) expected_infos = { 'CSQ_Gene_TYPE': self._get_inferred_info('CSQ', 'Gene', '.'), 'CSQ_Position_TYPE': self._get_inferred_info('CSQ', 'Position', 'Integer'), 'CSQ_Score_TYPE': self._get_inferred_info('CSQ', 'Score', 'Float') } self.assertDictEqual(expected_infos, inferred_infos)
def test_infer_annotation_types_no_conflicts(self): anno_fields = ['CSQ'] infos = self._get_annotation_infos() variant = self._get_sample_variant_1() variant.info['CSQ'] = ['A|GENE1|100|1.2', 'TT|GENE1|101|1.3'] inferred_infos = infer_headers_util.infer_info_fields( variant, vcf_header_io.VcfHeader(infos=infos), True, anno_fields) expected_infos = { 'CSQ_Gene_TYPE': self._get_inferred_info('CSQ', 'Gene', 'String'), 'CSQ_Position_TYPE': self._get_inferred_info('CSQ', 'Position', 'Integer'), 'CSQ_Score_TYPE': self._get_inferred_info('CSQ', 'Score', 'Float') } self.assertDictEqual(expected_infos, inferred_infos)
def test_infer_info_fields_combined_conflicts(self): variant = self._get_sample_variant_info_ia_cardinality_mismatch() infos = { 'IS': createInfo('IS', 1, 'String', ''), 'ISI': createInfo('ISI', 1, 'Integer', ''), 'ISF': createInfo('ISF', 1, 'Float', ''), 'IB': createInfo('IB', 0, 'Flag', ''), 'IA': createInfo('IA', 'A', 'Integer', '') } inferred_infos = infer_headers_util.infer_info_fields( variant, vcf_header_io.VcfHeader(infos=infos), infer_headers=True) expected_infos = { 'IF': createInfo('IF', 1, 'Float', ''), 'IA': createInfo('IA', '.', 'Float', '') } self.assertEqual(expected_infos, inferred_infos)
def process(self, variant, # type: vcfio.Variant defined_headers # type: vcf_header_io.VcfHeader ): # type: (...) -> Iterable[vcf_header_io.VcfHeader] """ Args: defined_headers: header fields defined in header section of VCF files. """ infos = infer_headers_util.infer_info_fields( variant, defined_headers, self._infer_headers, self._annotation_fields_to_infer) formats = {} if self._infer_headers: formats = infer_headers_util.infer_format_fields(variant, defined_headers) yield vcf_header_io.VcfHeader(infos=infos, formats=formats)