示例#1
0
def test_is_nullable():
    assert json_schema.is_nullable({
        'type': ['array', 'null'],
        'items': {
            'type': ['boolean']
        }
    })
    assert json_schema.is_nullable({'type': ['integer', 'null']})
    assert not json_schema.is_nullable({'type': ['string']})
    assert not json_schema.is_nullable({})
示例#2
0
def _denest_schema(table_path,
                   table_json_schema,
                   key_prop_schemas,
                   subtables,
                   level=-1):

    new_properties = {}
    for prop, item_json_schema in _denest_schema__singular_schemas(
            table_json_schema):

        if json_schema.is_object(item_json_schema):
            _denest_schema_helper(table_path + (prop, ), (prop, ),
                                  item_json_schema,
                                  json_schema.is_nullable(item_json_schema),
                                  new_properties, key_prop_schemas, subtables,
                                  level)

        elif json_schema.is_iterable(item_json_schema):
            _create_subtable(table_path + (prop, ), item_json_schema,
                             key_prop_schemas, subtables, level + 1)

        elif json_schema.is_literal(item_json_schema):
            if (prop, ) in new_properties:
                new_properties[(prop, )]['anyOf'].append(item_json_schema)
            else:
                new_properties[(prop, )] = {'anyOf': [item_json_schema]}

    table_json_schema['properties'] = new_properties
def test_complex_objects__logical_statements():
    every_type = {
        'type':
        ['null', 'integer', 'number', 'boolean', 'string', 'array', 'object'],
        'items': {
            'type': 'integer'
        },
        'format':
        'date-time',
        'properties': {
            'a': {
                'type': 'integer'
            },
            'b': {
                'type': 'number'
            },
            'c': {
                'type': 'boolean'
            }
        }
    }

    assert json_schema.is_iterable(every_type)
    assert json_schema.is_nullable(every_type)
    assert json_schema.is_iterable(every_type)
    assert json_schema.is_object(every_type)
示例#4
0
def test__anyOf__schema__implicit_any_of():
    denested = error_check_denest(
        {
            'properties': {
                'every_type': {
                    'type': ['integer', 'null', 'number', 'boolean', 'string', 'array', 'object'],
                    'items': {'type': 'integer'},
                    'format': 'date-time',
                    'properties': {
                        'i': {'type': 'integer'},
                        'n': {'type': 'number'},
                        'b': {'type': 'boolean'}
                    }
                }
            }
        },
        [],
        [])
    assert 2 == len(denested)

    table_batch = _get_table_batch_with_path(denested, tuple())
    denested_props = table_batch['streamed_schema']['schema']['properties']

    assert 4 == len(denested_props)

    anyof_schemas = denested_props[('every_type',)]['anyOf']

    assert 4 == len(anyof_schemas)
    assert 4 == len([x for x in anyof_schemas if json_schema.is_literal(x)])
    assert 4 == len([x for x in anyof_schemas if json_schema.is_nullable(x)])
    assert 1 == len([x for x in anyof_schemas if json_schema.is_datetime(x)])
示例#5
0
def test_simplify__allOf__nullable():
    assert json_schema.is_nullable(
        json_schema.simplify(
            {'allOf': [{
                'type': ['integer']
            }, {
                'type': ['string', 'null']
            }]}))
示例#6
0
 def denest_schema_helper(self, table_name, table_json_schema, not_null,
                          top_level_schema, current_path, key_prop_schemas,
                          subtables, level):
     for prop, item_json_schema in table_json_schema['properties'].items():
         next_path = current_path + self.NESTED_SEPARATOR + prop
         if json_schema.is_object(item_json_schema):
             self.denest_schema_helper(table_name, item_json_schema,
                                       not_null, top_level_schema,
                                       next_path, key_prop_schemas,
                                       subtables, level)
         elif json_schema.is_iterable(item_json_schema):
             self.create_subtable(table_name + self.NESTED_SEPARATOR + prop,
                                  item_json_schema, key_prop_schemas,
                                  subtables, level + 1)
         else:
             if not_null and json_schema.is_nullable(item_json_schema):
                 item_json_schema['type'].remove('null')
             elif not json_schema.is_nullable(item_json_schema):
                 item_json_schema['type'].append('null')
             top_level_schema[next_path] = item_json_schema
示例#7
0
def _literal_only_schema(schema):

    ret_types = json_schema.get_type(schema)

    if json_schema.is_object(schema):
        ret_types.remove(json_schema.OBJECT)
    if json_schema.is_iterable(schema):
        ret_types.remove(json_schema.ARRAY)
    if json_schema.is_nullable(schema):
        ret_types.remove(json_schema.NULL)

    ret_schemas = []
    for t in ret_types:
        s = deepcopy(schema)
        s['type'] = [t]

        if json_schema.is_nullable(schema):
            s = json_schema.make_nullable(s)

        ret_schemas.append(s)

    return {'anyOf': ret_schemas}
示例#8
0
def _denest_schema_helper(table_path, table_json_schema, nullable,
                          top_level_schema, key_prop_schemas, subtables,
                          level):
    for prop, item_json_schema in table_json_schema['properties'].items():
        if json_schema.is_object(item_json_schema):
            _denest_schema_helper(table_path + (prop, ), item_json_schema,
                                  nullable, top_level_schema, key_prop_schemas,
                                  subtables, level)

        if json_schema.is_iterable(item_json_schema):
            _create_subtable(table_path + (prop, ), item_json_schema,
                             key_prop_schemas, subtables, level + 1)

        if json_schema.is_literal(item_json_schema):
            if nullable and not json_schema.is_nullable(item_json_schema):
                item_json_schema['type'].append('null')

            top_level_schema[table_path +
                             (prop, )] = _literal_only_schema(item_json_schema)
示例#9
0
    def merge_put_schemas(self, cur, table_schema, table_name, existing_schema,
                          new_schema):
        new_properties = new_schema['properties']
        existing_properties = existing_schema['schema']['properties']
        for name, schema in new_properties.items():
            ## Mapping exists
            if self.get_mapping(existing_schema, name, schema) is not None:
                pass

            ## New column
            elif name not in existing_properties:

                existing_properties[name] = schema
                self.add_column(cur, table_schema, table_name, name, schema)

            ## Existing column non-nullable, new column is nullable
            elif not json_schema.is_nullable(existing_properties[name]) \
                    and json_schema.get_type(schema) \
                    == json_schema.get_type(json_schema.make_nullable(existing_properties[name])):

                existing_properties[name] = json_schema.make_nullable(
                    existing_properties[name])
                self.make_column_nullable(cur, table_schema, table_name, name)

            ## Existing column, types compatible
            elif json_schema.to_sql(json_schema.make_nullable(schema)) \
                    == json_schema.to_sql(json_schema.make_nullable(existing_properties[name])):
                pass

            ## Column type change
            elif self.mapping_name(name, schema) not in existing_properties \
                and self.mapping_name(name, existing_properties[name]) not in existing_properties:

                self.split_column(cur, table_schema, table_name, name, schema,
                                  existing_properties)

            ## Error
            else:
                raise PostgresError(
                    'Cannot handle column type change for: {}.{} columns {} and {}. Name collision likely.'
                    .format(table_schema, table_name, name,
                            self.mapping_name(name, schema)))
示例#10
0
def test__anyOf__schema__stitch_date_times():
    denested = error_check_denest(
        {'properties': {
            'a': {
                "anyOf": [
                    {
                        "type": "string",
                        "format": "date-time"
                    },
                    {"type": ["string", "null"]}]}}},
        [],
        [])
    table_batch = _get_table_batch_with_path(denested, tuple())

    anyof_schemas = table_batch['streamed_schema']['schema']['properties'][('a',)]['anyOf']

    assert 2 == len(anyof_schemas)
    assert 2 == len([x for x in anyof_schemas if json_schema.is_literal(x)])
    assert 2 == len([x for x in anyof_schemas if json_schema.is_nullable(x)])
    assert 1 == len([x for x in anyof_schemas if json_schema.is_datetime(x)])
示例#11
0
    def add_column(self, cur, table_schema, table_name, column_name,
                   column_schema):
        data_type = json_schema.to_sql(column_schema)

        if not json_schema.is_nullable(column_schema) \
                and not self.is_table_empty(cur, table_schema, table_name):
            self.logger.warning(
                'Forcing new column `{}.{}.{}` to be nullable due to table not empty.'
                .format(table_schema, table_name, column_name))
            data_type = json_schema.to_sql(
                json_schema.make_nullable(column_schema))

        to_execute = sql.SQL('ALTER TABLE {table_schema}.{table_name} ' +
                             'ADD COLUMN {column_name} {data_type};').format(
                                 table_schema=sql.Identifier(table_schema),
                                 table_name=sql.Identifier(table_name),
                                 column_name=sql.Identifier(column_name),
                                 data_type=sql.SQL(data_type))

        cur.execute(to_execute)
示例#12
0
def _denest_schema(table_path,
                   table_json_schema,
                   key_prop_schemas,
                   subtables,
                   level=-1):
    new_properties = {}
    for prop, item_json_schema in table_json_schema['properties'].items():

        if json_schema.is_object(item_json_schema):
            _denest_schema_helper(table_path + (prop, ), item_json_schema,
                                  json_schema.is_nullable(item_json_schema),
                                  new_properties, key_prop_schemas, subtables,
                                  level)

        if json_schema.is_iterable(item_json_schema):
            _create_subtable(table_path + (prop, ), item_json_schema,
                             key_prop_schemas, subtables, level + 1)

        if json_schema.is_literal(item_json_schema):
            new_properties[(prop, )] = _literal_only_schema(item_json_schema)

    table_json_schema['properties'] = new_properties
示例#13
0
    def upsert_table_helper(self,
                            connection,
                            schema,
                            metadata,
                            log_schema_changes=True):
        """
        Upserts the `schema` to remote by:
        - creating table if necessary
        - adding columns
        - adding column mappings
        - migrating data from old columns to new, etc.

        :param connection: remote connection, type left to be determined by implementing class
        :param schema: TABLE_SCHEMA(local)
        :param metadata: additional information necessary for downstream operations,
        :param log_schema_changes: defaults to True, set to false to disable logging of table level schema changes
        :return: TABLE_SCHEMA(remote)
        """
        table_path = schema['path']

        _metadata = deepcopy(metadata)
        _metadata['schema_version'] = CURRENT_SCHEMA_VERSION

        table_name = self.add_table_mapping(connection, table_path, _metadata)

        existing_schema = self._get_table_schema(connection, table_path,
                                                 table_name)

        if existing_schema is None:
            self.add_table(connection, table_name, _metadata)
            existing_schema = self._get_table_schema(connection, table_path,
                                                     table_name)

        self.add_key_properties(connection, table_name,
                                schema.get('key_properties', None))

        ## Only process columns which have single, nullable, types
        single_type_columns = []
        for column_name__or__path, column_schema in schema['schema'][
                'properties'].items():
            column_path = column_name__or__path
            if isinstance(column_name__or__path, str):
                column_path = (column_name__or__path, )

            single_type_column_schema = deepcopy(column_schema)
            column_types = json_schema.get_type(single_type_column_schema)
            make_nullable = json_schema.is_nullable(column_schema)

            for type in column_types:
                if type == json_schema.NULL:
                    continue

                single_type_column_schema['type'] = [type]

                if make_nullable:
                    single_type_columns.append(
                        (column_path,
                         json_schema.make_nullable(single_type_column_schema)))
                else:
                    single_type_columns.append(
                        (column_path, deepcopy(single_type_column_schema)))

        ## Process new columns against existing
        raw_mappings = existing_schema.get('mappings', {})

        mappings = []

        for to, m in raw_mappings.items():
            mapping = json_schema.simple_type(m)
            mapping['from'] = tuple(m['from'])
            mapping['to'] = to
            mappings.append(mapping)

        table_empty = self.is_table_empty(connection, table_name)

        for column_path, column_schema in single_type_columns:
            upsert_table_helper__start__column = time.monotonic()

            canonicalized_column_name = self._canonicalize_column_identifier(
                column_path, column_schema, mappings)
            nullable_column_schema = json_schema.make_nullable(column_schema)

            def log_message(msg):
                if log_schema_changes:
                    self.LOGGER.info(
                        'Table Schema Change [`{}`.`{}`:`{}`] {} (took {} millis)'
                        .format(
                            table_name, column_path, canonicalized_column_name,
                            msg,
                            _duration_millis(
                                upsert_table_helper__start__column)))

            ## NEW COLUMN
            if not column_path in [m['from'] for m in mappings]:
                upsert_table_helper__column = "New column"
                ### NON EMPTY TABLE
                if not table_empty:
                    upsert_table_helper__column += ", non empty table"
                    self.LOGGER.warning(
                        'NOT EMPTY: Forcing new column `{}` in table `{}` to be nullable due to table not empty.'
                        .format(column_path, table_name))
                    column_schema = nullable_column_schema

                self.add_column(connection, table_name,
                                canonicalized_column_name, column_schema)
                self.add_column_mapping(connection, table_name, column_path,
                                        canonicalized_column_name,
                                        column_schema)

                mapping = json_schema.simple_type(column_schema)
                mapping['from'] = column_path
                mapping['to'] = canonicalized_column_name
                mappings.append(mapping)

                log_message(upsert_table_helper__column)

                continue

            ## EXISTING COLUMNS
            ### SCHEMAS MATCH
            if [
                    True for m in mappings if m['from'] == column_path
                    and self.json_schema_to_sql_type(
                        m) == self.json_schema_to_sql_type(column_schema)
            ]:
                continue
            ### NULLABLE SCHEMAS MATCH
            ###  New column _is not_ nullable, existing column _is_
            if [
                    True for m in mappings if m['from'] == column_path
                    and self.json_schema_to_sql_type(m) ==
                    self.json_schema_to_sql_type(nullable_column_schema)
            ]:
                continue

            ### NULL COMPATIBILITY
            ###  New column _is_ nullable, existing column is _not_
            non_null_original_column = [
                m for m in mappings
                if m['from'] == column_path and json_schema.shorthand(m) ==
                json_schema.shorthand(column_schema)
            ]
            if non_null_original_column:
                ## MAKE NULLABLE
                self.make_column_nullable(connection, table_name,
                                          canonicalized_column_name)
                self.drop_column_mapping(connection, table_name,
                                         canonicalized_column_name)
                self.add_column_mapping(connection, table_name, column_path,
                                        canonicalized_column_name,
                                        nullable_column_schema)

                mappings = [
                    m for m in mappings
                    if not (m['from'] == column_path and json_schema.shorthand(
                        m) == json_schema.shorthand(column_schema))
                ]

                mapping = json_schema.simple_type(nullable_column_schema)
                mapping['from'] = column_path
                mapping['to'] = canonicalized_column_name
                mappings.append(mapping)

                log_message(
                    "Made existing column nullable. New column is nullable, existing column is not"
                )

                continue

            ### FIRST MULTI TYPE
            ###  New column matches existing column path, but the types are incompatible
            duplicate_paths = [m for m in mappings if m['from'] == column_path]

            if 1 == len(duplicate_paths):
                existing_mapping = duplicate_paths[0]
                existing_column_name = existing_mapping['to']

                if existing_column_name:
                    self.drop_column_mapping(connection, table_name,
                                             existing_column_name)

                ## Update existing properties
                mappings = [m for m in mappings if m['from'] != column_path]

                mapping = json_schema.simple_type(nullable_column_schema)
                mapping['from'] = column_path
                mapping['to'] = canonicalized_column_name
                mappings.append(mapping)

                existing_column_new_normalized_name = self._canonicalize_column_identifier(
                    column_path, existing_mapping, mappings)

                mapping = json_schema.simple_type(
                    json_schema.make_nullable(existing_mapping))
                mapping['from'] = column_path
                mapping['to'] = existing_column_new_normalized_name
                mappings.append(mapping)

                ## Add new columns
                ### NOTE: all migrated columns will be nullable and remain that way

                #### Table Metadata
                self.add_column_mapping(
                    connection, table_name, column_path,
                    existing_column_new_normalized_name,
                    json_schema.make_nullable(existing_mapping))
                self.add_column_mapping(connection, table_name, column_path,
                                        canonicalized_column_name,
                                        nullable_column_schema)

                #### Columns
                self.add_column(connection, table_name,
                                existing_column_new_normalized_name,
                                json_schema.make_nullable(existing_mapping))

                self.add_column(connection, table_name,
                                canonicalized_column_name,
                                nullable_column_schema)

                ## Migrate existing data
                self.migrate_column(connection, table_name,
                                    existing_mapping['to'],
                                    existing_column_new_normalized_name)

                ## Drop existing column
                self.drop_column(connection, table_name,
                                 existing_mapping['to'])

                upsert_table_helper__column = "Splitting `{}` into `{}` and `{}`. New column matches existing column path, but the types are incompatible.".format(
                    existing_column_name, existing_column_new_normalized_name,
                    canonicalized_column_name)

            ## REST MULTI TYPE
            elif 1 < len(duplicate_paths):
                ## Add new column
                self.add_column_mapping(connection, table_name, column_path,
                                        canonicalized_column_name,
                                        nullable_column_schema)
                self.add_column(connection, table_name,
                                canonicalized_column_name,
                                nullable_column_schema)

                mapping = json_schema.simple_type(nullable_column_schema)
                mapping['from'] = column_path
                mapping['to'] = canonicalized_column_name
                mappings.append(mapping)

                upsert_table_helper__column = "Adding new column to split column `{}`. New column matches existing column's path, but no types were compatible.".format(
                    column_path)

            ## UNKNOWN
            else:
                raise Exception(
                    'UNKNOWN: Cannot handle merging column `{}` (canonicalized as: `{}`) in table `{}`.'
                    .format(column_path, canonicalized_column_name,
                            table_name))

            log_message(upsert_table_helper__column)

        return self._get_table_schema(connection, table_path, table_name)
示例#14
0
    def upsert_table_helper(self, connection, schema, metadata):
        """
        Upserts the `schema` to remote by:
        - creating table if necessary
        - adding columns
        - adding column mappings
        - migrating data from old columns to new, etc.

        :param connection: remote connection, type left to be determined by implementing class
        :param schema: TABLE_SCHEMA(local)
        :param metadata: additional information necessary for downstream operations
        :return: TABLE_SCHEMA(remote)
        """
        table_path = schema['path']

        table_name = self.add_table_mapping(connection, table_path, metadata)

        existing_schema = self.get_table_schema(connection, table_path,
                                                table_name)

        if existing_schema is None:
            self.add_table(connection, table_name, metadata)
            existing_schema = self.get_table_schema(connection, table_path,
                                                    table_name)

        self.add_key_properties(connection, table_name,
                                schema.get('key_properties', None))

        ## Only process columns which have single, nullable, types
        single_type_columns = []
        for column_name__or__path, column_schema in schema['schema'][
                'properties'].items():
            column_path = column_name__or__path
            if isinstance(column_name__or__path, str):
                column_path = (column_name__or__path, )

            single_type_column_schema = deepcopy(column_schema)
            column_types = json_schema.get_type(single_type_column_schema)
            make_nullable = json_schema.is_nullable(column_schema)

            for type in column_types:
                if type == json_schema.NULL:
                    continue

                single_type_column_schema['type'] = [type]

                if make_nullable:
                    single_type_columns.append(
                        (column_path,
                         json_schema.make_nullable(single_type_column_schema)))
                else:
                    single_type_columns.append(
                        (column_path, single_type_column_schema))

        ## Process new columns against existing
        raw_mappings = existing_schema.get('mappings', {})

        mappings = []

        for to, m in raw_mappings.items():
            mappings.append({
                'from': tuple(m['from']),
                'to': to,
                'type': m['type']
            })

        table_empty = self.is_table_empty(connection, table_name)

        for column_path, column_schema in single_type_columns:
            canonicalized_column_name = self._canonicalize_column_identifier(
                column_path, column_schema, mappings)
            nullable_column_schema = json_schema.make_nullable(column_schema)

            ## NEW COLUMN
            if not column_path in [m['from'] for m in mappings]:
                ### NON EMPTY TABLE
                if not table_empty:
                    self.LOGGER.warning(
                        'NOT EMPTY: Forcing new column `{}` in table `{}` to be nullable due to table not empty.'
                        .format(column_path, table_name))
                    column_schema = nullable_column_schema

                self.add_column(connection, table_name,
                                canonicalized_column_name, column_schema)
                self.add_column_mapping(connection, table_name, column_path,
                                        canonicalized_column_name,
                                        column_schema)
                mappings.append({
                    'from': column_path,
                    'to': canonicalized_column_name,
                    'type': json_schema.get_type(column_schema)
                })

                continue

            ## EXISTING COLUMNS
            ### SCHEMAS MATCH
            if [
                    True for m in mappings if m['from'] == column_path and
                    json_schema.to_sql(m) == json_schema.to_sql(column_schema)
            ]:
                continue
            ### NULLABLE SCHEMAS MATCH
            ###  New column _is not_ nullable, existing column _is_
            if [
                    True for m in mappings
                    if m['from'] == column_path and json_schema.to_sql(m) ==
                    json_schema.to_sql(nullable_column_schema)
            ]:
                continue

            ### NULL COMPATIBILITY
            ###  New column _is_ nullable, existing column is _not_
            non_null_original_column = [
                m for m in mappings
                if m['from'] == column_path and json_schema.sql_shorthand(m) ==
                json_schema.sql_shorthand(column_schema)
            ]
            if non_null_original_column:
                ## MAKE NULLABLE
                self.make_column_nullable(connection, table_name,
                                          canonicalized_column_name)
                self.drop_column_mapping(connection, table_name,
                                         canonicalized_column_name)
                self.add_column_mapping(connection, table_name, column_path,
                                        canonicalized_column_name,
                                        nullable_column_schema)

                mappings = [
                    m for m in mappings if not (
                        m['from'] == column_path and json_schema.sql_shorthand(
                            m) == json_schema.sql_shorthand(column_schema))
                ]
                mappings.append({
                    'from':
                    column_path,
                    'to':
                    canonicalized_column_name,
                    'type':
                    json_schema.get_type(nullable_column_schema)
                })

                continue

            ### FIRST MULTI TYPE
            ###  New column matches existing column path, but the types are incompatible
            duplicate_paths = [m for m in mappings if m['from'] == column_path]

            if 1 == len(duplicate_paths):

                existing_mapping = duplicate_paths[0]
                existing_column_name = existing_mapping['to']

                if existing_column_name:
                    self.drop_column_mapping(connection, table_name,
                                             existing_column_name)

                ## Update existing properties
                mappings = [m for m in mappings if m['from'] != column_path]
                mappings.append({
                    'from':
                    column_path,
                    'to':
                    canonicalized_column_name,
                    'type':
                    json_schema.get_type(nullable_column_schema)
                })

                existing_column_new_normalized_name = self._canonicalize_column_identifier(
                    column_path, existing_mapping, mappings)
                mappings.append({
                    'from':
                    column_path,
                    'to':
                    existing_column_new_normalized_name,
                    'type':
                    json_schema.get_type(
                        json_schema.make_nullable(existing_mapping))
                })

                ## Add new columns
                ### NOTE: all migrated columns will be nullable and remain that way

                #### Table Metadata
                self.add_column_mapping(
                    connection, table_name, column_path,
                    existing_column_new_normalized_name,
                    json_schema.make_nullable(existing_mapping))
                self.add_column_mapping(connection, table_name, column_path,
                                        canonicalized_column_name,
                                        nullable_column_schema)

                #### Columns
                self.add_column(connection, table_name,
                                existing_column_new_normalized_name,
                                json_schema.make_nullable(existing_mapping))

                self.add_column(connection, table_name,
                                canonicalized_column_name,
                                nullable_column_schema)

                ## Migrate existing data
                self.migrate_column(connection, table_name,
                                    existing_mapping['to'],
                                    existing_column_new_normalized_name)

                ## Drop existing column
                self.drop_column(connection, table_name,
                                 existing_mapping['to'])

            ## REST MULTI TYPE
            elif 1 < len(duplicate_paths):

                ## Add new column
                self.add_column_mapping(connection, table_name, column_path,
                                        canonicalized_column_name,
                                        nullable_column_schema)
                self.add_column(connection, table_name,
                                canonicalized_column_name,
                                nullable_column_schema)

                mappings.append({
                    'from':
                    column_path,
                    'to':
                    canonicalized_column_name,
                    'type':
                    json_schema.get_type(nullable_column_schema)
                })

            ## UNKNOWN
            else:
                raise Exception(
                    'UNKNOWN: Cannot handle merging column `{}` (canonicalized as: `{}`) in table `{}`.'
                    .format(column_path, canonicalized_column_name,
                            table_name))

        return self.get_table_schema(connection, table_path, table_name)