def test_infer_annotation_types_with_multiple_annotation_fields(self): anno_fields = ['CSQ', 'CSQ_VT'] csq_vt = [ ('CSQ_VT', createInfo( 'CSQ_VT', 'A', 'String', 'Annotations from VEP. Format: Allele|Gene|Position|Score', 'source', 'v')) ] header = self._get_sample_header_fields(with_annotation=csq_vt) variant = self._get_sample_variant_1() variant.info['CSQ_VT'] = ['A|1|100|1.2', 'A|2|101|1.3'] variant.info['CSQ'] = ['A|1|100|1.2', 'A|2|101|1.3'] infer_header_fields = infer_headers._InferHeaderFields( False, anno_fields) inferred_headers = next(infer_header_fields.process(variant, header)) expected_types = { 'CSQ_Gene_TYPE': 'Integer', 'CSQ_Position_TYPE': 'Integer', 'CSQ_Score_TYPE': 'Float', 'CSQ_VT_Gene_TYPE': 'Integer', 'CSQ_VT_Position_TYPE': 'Integer', 'CSQ_VT_Score_TYPE': 'Float' } for key, item in inferred_headers.infos.items(): self.assertEqual(item['type'], expected_types[key]) self.assertEqual(len(expected_types), len(inferred_headers.infos))
def test_infer_format_fields_no_conflicts(self): variant = self._get_sample_variant_1() formats = OrderedDict([ ('FS', Format('FS', 1, 'String', 'desc')), ('FI', Format('FI', 2, 'Integer', 'desc')), ('FU', Format('FU', field_counts['.'], 'Float', 'desc')), ('GT', Format('GT', 2, 'Integer', 'Special GT key')), ('PS', Format('PS', 1, 'Integer', 'Special PS key'))]) infer_header_fields = infer_headers._InferHeaderFields() header = infer_header_fields._infer_format_fields( variant, vcf_header_io.VcfHeader(formats=formats)) self.assertEqual({}, header)
def test_infer_info_fields_no_conflicts(self): variant = self._get_sample_variant_1() infos = {'IS': Info('IS', 1, 'String', '', '', ''), 'ISI': Info('ISI', 1, 'Integer', '', '', ''), 'ISF': Info('ISF', 1, 'Float', '', '', ''), 'IF': Info('IF', 1, 'Float', '', '', ''), 'IB': Info('IB', 0, 'Flag', '', '', ''), 'IA': Info('IA', -1, 'Float', '', '', '')} infer_header_fields = infer_headers._InferHeaderFields() inferred_infos = infer_header_fields._infer_info_fields( variant, vcf_header_io.VcfHeader(infos=infos)) self.assertEqual({}, inferred_infos)
def test_infer_format_fields_combined_conflicts(self): variant = self._get_sample_variant_format_fi_float_value() formats = OrderedDict([ ('FS', Format('FS', 1, 'String', 'desc')), ('FI', Format('FI', 2, 'Integer', 'desc')), ('GT', Format('GT', 2, 'Integer', 'Special GT key')), ('PS', Format('PS', 1, 'Integer', 'Special PS key'))]) infer_header_fields = infer_headers._InferHeaderFields() inferred_formats = infer_header_fields._infer_format_fields( variant, vcf_header_io.VcfHeader(formats=formats)) expected_formats = {'FI': Format('FI', 2, 'Float', 'desc'), 'FU': Format('FU', field_counts['.'], 'Float', '')} self.assertEqual(expected_formats, inferred_formats)
def test_infer_info_fields_combined_conflicts(self): variant = self._get_sample_variant_info_ia_cardinality_mismatch() infos = {'IS': Info('IS', 1, 'String', '', '', ''), 'ISI': Info('ISI', 1, 'Integer', '', '', ''), 'ISF': Info('ISF', 1, 'Float', '', '', ''), 'IB': Info('IB', 0, 'Flag', '', '', ''), 'IA': Info('IA', -1, 'Integer', '', '', '')} infer_header_fields = infer_headers._InferHeaderFields() inferred_infos = infer_header_fields._infer_info_fields( variant, vcf_header_io.VcfHeader(infos=infos)) expected_infos = {'IF': Info('IF', 1, 'Float', '', '', ''), 'IA': Info('IA', None, 'Float', '', '', '')} self.assertEqual(expected_infos, inferred_infos)
def test_infer_mismatched_format_field(self): variant = self._get_sample_variant_format_fi_float_value() formats = OrderedDict([ ('FS', Format('FS', 1, 'String', 'desc')), ('FI', Format('FI', 2, 'Integer', 'desc')), ('FU', Format('FU', field_counts['.'], 'Float', 'desc')), ('GT', Format('GT', 2, 'Integer', 'Special GT key')), ('PS', Format('PS', 1, 'Integer', 'Special PS key'))]) infer_header_fields = infer_headers._InferHeaderFields() corrected_format = infer_header_fields._infer_mismatched_format_field( 'FI', variant.calls[0].info.get('FI'), vcf_header_io.VcfHeader(formats=formats).formats.get('FI')) expected_formats = Format('FI', 2, 'Float', 'desc') self.assertEqual(expected_formats, corrected_format)
def test_infer_mismatched_info_field_no_mismatches(self): variant = self._get_sample_variant_info_ia_float_2_0_in_list() infos = {'IS': Info('IS', 1, 'String', '', '', ''), 'ISI': Info('ISI', 1, 'Integer', '', '', ''), 'ISF': Info('ISF', 1, 'Float', '', '', ''), 'IF': Info('IF', 1, 'Float', '', '', ''), 'IB': Info('IB', 0, 'Flag', '', '', ''), 'IA': Info('IA', 'A', 'Integer', '', '', '')} infer_header_fields = infer_headers._InferHeaderFields() corrected_info = infer_header_fields._infer_mismatched_info_field( 'IA', variant.info.get('IA'), vcf_header_io.VcfHeader(infos=infos).infos.get('IA'), len(variant.alternate_bases)) self.assertEqual(None, corrected_info)
def test_infer_mismatched_info_field_correct_num(self): variant = self._get_sample_variant_info_ia_cardinality_mismatch() infos = {'IS': Info('IS', 1, 'String', '', '', ''), 'ISI': Info('ISI', 1, 'Integer', '', '', ''), 'ISF': Info('ISF', 1, 'Float', '', '', ''), 'IF': Info('IF', 1, 'Float', '', '', ''), 'IB': Info('IB', 0, 'Flag', '', '', ''), 'IA': Info('IA', -1, 'Float', '', '', '')} infer_header_fields = infer_headers._InferHeaderFields() corrected_info = infer_header_fields._infer_mismatched_info_field( 'IA', variant.info.get('IA'), vcf_header_io.VcfHeader(infos=infos).infos.get('IA'), len(variant.alternate_bases)) expected = Info('IA', None, 'Float', '', '', '') self.assertEqual(expected, corrected_info)
def test_infer_annotation_types_no_conflicts(self): anno_fields = ['CSQ'] header = self._get_sample_header_fields(with_annotation=True) variant = self._get_sample_variant_1() variant.info['CSQ'] = ['A|GENE1|100|1.2', 'TT|GENE1|101|1.3'] infer_header_fields = infer_headers._InferHeaderFields( infer_headers=False, annotation_fields_to_infer=anno_fields) inferred_headers = next(infer_header_fields.process(variant, header)) expected_types = { 'CSQ_Gene_TYPE': 'String', 'CSQ_Position_TYPE': 'Integer', 'CSQ_Score_TYPE': 'Float' } for key, item in inferred_headers.infos.items(): self.assertEqual(item['type'], expected_types[key]) self.assertEqual(len(expected_types), len(inferred_headers.infos))
def test_infer_annotation_types_with_missing(self): anno_fields = ['CSQ'] header = self._get_sample_header_fields(with_annotation=True) variant = self._get_sample_variant_1() variant.info['CSQ'] = ['A||100|', 'A||101|1.3', 'A|||1.4', 'TT|||'] infer_header_fields = infer_headers._InferHeaderFields( False, anno_fields) inferred_headers = next(infer_header_fields.process(variant, header)) expected_types = { 'CSQ_Gene_TYPE': '.', 'CSQ_Position_TYPE': 'Integer', 'CSQ_Score_TYPE': 'Float' } for key, item in inferred_headers.infos.items(): self.assertEqual(item['type'], expected_types[key]) self.assertEqual(len(expected_types), len(inferred_headers.infos)) variant.info['CSQ'] = [] inferred_headers = next(infer_header_fields.process(variant, header)) expected = vcf_header_io.VcfHeader() self.assertEqual(expected, inferred_headers)