def _add_info_fields(field, infos, allow_incompatible_schema=False): # type: (bigquery.TableFieldSchema, Dict[str, _Info], bool) -> None if field.name == bigquery_util.ColumnKeyConstants.ALTERNATE_BASES: _add_info_fields_from_alternate_bases(field, infos, allow_incompatible_schema) elif (field.name in list(vcf_reserved_fields.INFO_FIELDS.keys()) and not allow_incompatible_schema): reserved_definition = vcf_reserved_fields.INFO_FIELDS.get(field.name) _validate_reserved_field(field, reserved_definition) infos.update({ field.name: vcf_header_io.CreateInfoField( field.name, reserved_definition.num, reserved_definition.type, _remove_special_characters(field.description or reserved_definition.desc)) }) else: infos.update({ field.name: vcf_header_io.CreateInfoField( field.name, bigquery_util.get_vcf_num_from_bigquery_schema( field.mode, field.type), bigquery_util.get_vcf_type_from_bigquery_type(field.type), _remove_special_characters(field.description)) })
def _add_format_fields(schema, formats, allow_incompatible_schema=False): # type: (bigquery.TableFieldSchema, Dict[str, _Format], bool) -> None for field in schema.fields: if field.name in _CONSTANT_CALL_FIELDS: continue if (field.name in list(vcf_reserved_fields.FORMAT_FIELDS.keys()) and not allow_incompatible_schema): reserved_definition = vcf_reserved_fields.FORMAT_FIELDS.get( field.name) _validate_reserved_field(field, reserved_definition) formats.update({ field.name: vcf_header_io.CreateFormatField( field.name, reserved_definition.num, reserved_definition.type, _remove_special_characters(field.description or reserved_definition.desc)) }) else: formats.update({ field.name: vcf_header_io.CreateFormatField( field.name, bigquery_util.get_vcf_num_from_bigquery_schema( field.mode, field.type), bigquery_util.get_vcf_type_from_bigquery_type(field.type), _remove_special_characters(field.description)) })
def test_get_vcf_type_from_bigquery_type(self): self.assertEqual(vcf_header_io.VcfHeaderFieldTypeConstants.INTEGER, bigquery_util.get_vcf_type_from_bigquery_type( bigquery_util.TableFieldConstants.TYPE_INTEGER)) self.assertEqual(vcf_header_io.VcfHeaderFieldTypeConstants.FLOAT, bigquery_util.get_vcf_type_from_bigquery_type( bigquery_util.TableFieldConstants.TYPE_FLOAT)) self.assertEqual(vcf_header_io.VcfHeaderFieldTypeConstants.FLAG, bigquery_util.get_vcf_type_from_bigquery_type( bigquery_util.TableFieldConstants.TYPE_BOOLEAN)) self.assertEqual(vcf_header_io.VcfHeaderFieldTypeConstants.STRING, bigquery_util.get_vcf_type_from_bigquery_type( bigquery_util.TableFieldConstants.TYPE_STRING)) self.assertRaises( ValueError, bigquery_util.get_vcf_type_from_bigquery_type, 'DUMMY')
def _validate_reserved_field_type(field_schema, reserved_definition): schema_type = bigquery_util.get_vcf_type_from_bigquery_type(field_schema.type) reserved_type = reserved_definition.type if schema_type != reserved_type: raise ValueError( 'The type of field {} is different from the VCF spec: {} vs {}.' .format(field_schema.name, schema_type, reserved_type))
def _add_info_fields_from_alternate_bases(schema, infos, allow_incompatible_schema=False): # type: (bigquery.TableFieldSchema, Dict[str, _Info], bool) -> None """Adds schema nested fields in alternate bases to `infos`. Notice that the validation of field mode is skipped for reserved fields since the mode (NULLABLE) of field in alternate bases is expected to be different from the mode (REPEATED) in reserved field definition. Any `Record` field within alternate bases is considered as an annotation field. """ for field in schema.fields: if field.name in _CONSTANT_ALTERNATE_BASES_FIELDS: continue elif field.type == bigquery_util.TableFieldConstants.TYPE_RECORD: infos.update({ field.name: _Info(id=field.name, num=parser.field_counts[vcfio.MISSING_FIELD_VALUE], type=bigquery_util._VcfHeaderTypeConstants.STRING, desc=_remove_special_characters( _get_annotation_description(field)), source=None, version=None) }) elif (field.name in vcf_reserved_fields.INFO_FIELDS.keys() and not allow_incompatible_schema): reserved_definition = vcf_reserved_fields.INFO_FIELDS.get( field.name) _validate_reserved_field_type(field, reserved_definition) infos.update({ field.name: _Info(id=field.name, num=reserved_definition.num, type=reserved_definition.type, desc=_remove_special_characters( field.description or reserved_definition.desc), source=None, version=None) }) else: infos.update({ field.name: _Info(id=field.name, num=parser.field_counts[ vcfio.FIELD_COUNT_ALTERNATE_ALLELE], type=bigquery_util.get_vcf_type_from_bigquery_type( field.type), desc=_remove_special_characters(field.description), source=None, version=None) })