def test_generate_header_fields_from_schema_none_mode(self):
        schema_non_reserved_fields = bigquery.TableSchema()
        schema_non_reserved_fields.fields.append(
            bigquery.TableFieldSchema(
                name='field',
                type=bigquery_util.TableFieldConstants.TYPE_STRING,
                description='desc'))
        header = schema_converter.generate_header_fields_from_schema(
            schema_non_reserved_fields)
        infos = OrderedDict([('field',
                              Info('field', 1, 'String', 'desc', None, None))])
        formats = OrderedDict()
        expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats)
        self.assertEqual(header, expected_header)

        schema_reserved_fields = bigquery.TableSchema()
        schema_reserved_fields.fields.append(
            bigquery.TableFieldSchema(
                name='AA',
                type=bigquery_util.TableFieldConstants.TYPE_STRING,
                description='desc'))
        header = schema_converter.generate_header_fields_from_schema(
            schema_reserved_fields)
        infos = OrderedDict([('AA', Info('AA', 1, 'String', 'desc', None,
                                         None))])
        formats = OrderedDict()
        expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats)
        self.assertEqual(header, expected_header)
    def test_add_format_fields_reserved_field_schema_compatibility(self):
        schema_conflict_format = bigquery.TableSchema()
        calls_record = bigquery.TableFieldSchema(
            name=bigquery_util.ColumnKeyConstants.CALLS,
            type=bigquery_util.TableFieldConstants.TYPE_RECORD,
            mode=bigquery_util.TableFieldConstants.MODE_REPEATED,
            description='One record for each call.')
        calls_record.fields.append(
            bigquery.TableFieldSchema(
                name='GQ',
                type=bigquery_util.TableFieldConstants.TYPE_STRING,
                mode=bigquery_util.TableFieldConstants.MODE_NULLABLE,
                description='desc'))
        schema_conflict_format.fields.append(calls_record)
        with self.assertRaises(ValueError):
            schema_converter.generate_header_fields_from_schema(
                schema_conflict_format)

        formats_allow_incompatible_schema = OrderedDict()
        schema_converter._add_format_fields(calls_record,
                                            formats_allow_incompatible_schema,
                                            allow_incompatible_schema=True)
        expected_formats = OrderedDict([('GQ', Format('GQ', 1, 'String',
                                                      'desc'))])
        self.assertEqual(formats_allow_incompatible_schema, expected_formats)
def _write_vcf_meta_info(input_table, representative_header_file,
                         allow_incompatible_schema):
    # type: (str, str, bool) -> None
    """Writes the meta information generated from BigQuery schema."""
    header_fields = (schema_converter.generate_header_fields_from_schema(
        _get_schema(input_table), allow_incompatible_schema))
    write_header_fn = vcf_header_io.WriteVcfHeaderFn(
        representative_header_file)
    write_header_fn.process(header_fields, _VCF_VERSION_LINE)
    def test_schema_to_vcf_header_to_schema(self):
        original_schema = bigquery_schema_util.get_sample_table_schema()
        header = schema_converter.generate_header_fields_from_schema(
            original_schema)
        reconstructed_schema = (
            schema_converter.generate_schema_from_header_fields(
                header, processed_variant.ProcessedVariantFactory(header)))

        self.assertEqual(_get_fields_from_schema(reconstructed_schema),
                         _get_fields_from_schema(original_schema))
    def test_generate_header_fields_from_schema_schema_compatibility(self):
        schema_conflict = bigquery.TableSchema()
        schema_conflict.fields.append(
            bigquery.TableFieldSchema(
                name='AA',
                type=bigquery_util.TableFieldConstants.TYPE_INTEGER,
                mode=bigquery_util.TableFieldConstants.MODE_NULLABLE,
                description='desc'))
        with self.assertRaises(ValueError):
            schema_converter.generate_header_fields_from_schema(
                schema_conflict)

        header = schema_converter.generate_header_fields_from_schema(
            schema_conflict, allow_incompatible_schema=True)
        infos = OrderedDict([('AA', Info('AA', 1, 'Integer', 'desc', None,
                                         None))])
        expected_header = vcf_header_io.VcfHeader(infos=infos,
                                                  formats=OrderedDict())
        self.assertEqual(header, expected_header)
    def test_generate_header_fields_from_schema_date_type(self):
        schema = bigquery.TableSchema()
        schema.fields.append(
            bigquery.TableFieldSchema(
                name='partition_date_please_ignore',
                type='Date',
                mode=bigquery_util.TableFieldConstants.MODE_NULLABLE,
                description='Column required by BigQuery partitioning logic.'))
        header = schema_converter.generate_header_fields_from_schema(schema)

        expected_header = vcf_header_io.VcfHeader(infos=OrderedDict(),
                                                  formats=OrderedDict())
        self.assertEqual(header, expected_header)
Пример #7
0
  def test_generate_header_fields_from_schema(self):
    sample_schema = bigquery_schema_util.get_sample_table_schema()
    header = schema_converter.generate_header_fields_from_schema(
        sample_schema)

    infos = OrderedDict([
        ('AF', createInfo('AF', 'A', 'Float', 'desc', None, None)),
        ('AA', createInfo('AA', 1, 'String', 'desc', None, None)),
        ('IFR', createInfo('IFR', '.', 'Float', 'desc', None, None)),
        ('IS', createInfo('IS', 1, 'String', 'desc', None, None))])
    formats = OrderedDict([
        ('FB', createFormat('FB', 1, 'String', 'desc')),
        ('GQ', createFormat('GQ', 1, 'Integer', 'desc'))])
    expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats)
    self.assertEqual(header, expected_header)
    def test_generate_header_fields_from_schema_invalid_description(self):
        schema = bigquery.TableSchema()
        schema.fields.append(
            bigquery.TableFieldSchema(
                name='invalid_description',
                type=bigquery_util.TableFieldConstants.TYPE_STRING,
                mode=bigquery_util.TableFieldConstants.MODE_NULLABLE,
                description='Desc\nThis is added intentionally.'))
        header = schema_converter.generate_header_fields_from_schema(schema)

        infos = OrderedDict([('invalid_description',
                              Info('invalid_description', 1, 'String',
                                   'Desc This is added intentionally.', None,
                                   None))])
        expected_header = vcf_header_io.VcfHeader(infos=infos,
                                                  formats=OrderedDict())
        self.assertEqual(header, expected_header)
Пример #9
0
  def test_vcf_header_to_schema_to_vcf_header(self):
    infos = OrderedDict([
        ('I1', createInfo('I1', '.', 'String', 'desc', None, None)),
        ('IA', createInfo('IA', '.', 'Integer', 'desc', None, None))])
    formats = OrderedDict([
        ('F1', createFormat('F1', '.', 'String', 'desc')),
        ('F2', createFormat('F2', '.', 'Integer', 'desc')),
        ('FU', createFormat('FU', '.', 'Float', 'desc'))])
    original_header = vcf_header_io.VcfHeader(infos=infos, formats=formats)

    schema = schema_converter.generate_schema_from_header_fields(
        original_header,
        processed_variant.ProcessedVariantFactory(original_header))
    reconstructed_header = (
        schema_converter.generate_header_fields_from_schema(
            schema))

    self.assertEqual(original_header, reconstructed_header)
    def test_generate_header_fields_from_schema_with_annotation(self):
        sample_schema = bigquery_schema_util.get_sample_table_schema(
            with_annotation_fields=True)
        header = schema_converter.generate_header_fields_from_schema(
            sample_schema)

        infos = OrderedDict([
            ('AF', Info('AF', field_counts['A'], 'Float', 'desc', None, None)),
            ('CSQ',
             Info('CSQ', field_counts['.'], 'String',
                  'desc Format: Consequence|IMPACT', None, None)),
            ('AA', Info('AA', 1, 'String', 'desc', None, None)),
            ('IFR', Info('IFR', field_counts['.'], 'Float', 'desc', None,
                         None)),
            ('IS', Info('IS', 1, 'String', 'desc', None, None))
        ])
        formats = OrderedDict([('FB', parser._Format('FB', 0, 'Flag', 'desc')),
                               ('GQ', parser._Format('GQ', 1, 'Integer',
                                                     'desc'))])
        expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats)
        self.assertEqual(header, expected_header)