Exemplo n.º 1
0
    def json_schema_to_sql_type(self, schema):
        _type = json_schema.get_type(schema)
        not_null = True
        ln = len(_type)
        if ln == 1:
            _type = _type[0]
        if ln == 2 and json_schema.NULL in _type:
            not_null = False
            if _type.index(json_schema.NULL) == 0:
                _type = _type[1]
            else:
                _type = _type[0]
        elif ln > 2:
            raise PostgresError('Multiple types per column not supported')

        sql_type = 'text'

        if 'format' in schema and \
                schema['format'] == 'date-time' and \
                _type == 'string':
            sql_type = 'timestamp with time zone'
        elif _type == 'boolean':
            sql_type = 'boolean'
        elif _type == 'integer':
            sql_type = 'bigint'
        elif _type == 'number':
            sql_type = 'double precision'

        if not_null:
            sql_type += ' NOT NULL'

        return sql_type
Exemplo n.º 2
0
    def sql_type_to_json_schema(self, sql_type, is_nullable):
        """
        Given a string representing a SQL column type, and a boolean indicating whether
        the associated column is nullable, return a compatible JSONSchema structure.
        :param sql_type: string
        :param is_nullable: boolean
        :return: JSONSchema
        """
        _format = None
        if sql_type == 'timestamp with time zone':
            json_type = 'string'
            _format = 'date-time'
        elif sql_type == 'bigint':
            json_type = 'integer'
        elif sql_type == 'double precision':
            json_type = 'number'
        elif sql_type == 'boolean':
            json_type = 'boolean'
        elif sql_type == 'text':
            json_type = 'string'
        else:
            raise PostgresError(
                'Unsupported type `{}` in existing target table'.format(
                    sql_type))

        json_type = [json_type]
        if is_nullable:
            json_type.append(json_schema.NULL)

        ret_json_schema = {'type': json_type}
        if _format:
            ret_json_schema['format'] = _format

        return ret_json_schema
Exemplo n.º 3
0
    def _validate_identifier(self, identifier):
        if not identifier:
            raise PostgresError('Identifier must be non empty.')

        if self.IDENTIFIER_FIELD_LENGTH < len(identifier):
            raise PostgresError(
                'Length of identifier must be less than or equal to {}. Got {} for `{}`'
                .format(self.IDENTIFIER_FIELD_LENGTH, len(identifier),
                        identifier))

        if not re.match(r'^[a-z_].*', identifier):
            raise PostgresError(
                'Identifier must start with a lower case letter, or underscore. Got `{}` for `{}`'
                .format(identifier[0], identifier))

        if not re.match(r'^[a-z0-9_$]+$', identifier):
            raise PostgresError(
                'Identifier must only contain lower case letters, numbers, underscores, or dollar signs. Got `{}` for `{}`'
                .format(re.findall(r'[^0-9]', '1234a567')[0], identifier))

        return True
Exemplo n.º 4
0
    def activate_version(self, stream_buffer, version):
        with self.conn.cursor() as cur:
            try:
                cur.execute('BEGIN;')

                table_metadata = self._get_table_metadata(
                    cur, stream_buffer.stream)

                if not table_metadata:
                    self.LOGGER.error(
                        '{} - Table for stream does not exist'.format(
                            stream_buffer.stream))
                elif table_metadata.get(
                        'version'
                ) is not None and table_metadata.get('version') >= version:
                    self.LOGGER.warning(
                        '{} - Table version {} already active'.format(
                            stream_buffer.stream, version))
                else:
                    versioned_root_table = stream_buffer.stream + SEPARATOR + str(
                        version)

                    cur.execute(
                        sql.SQL('''
                        SELECT tablename FROM pg_tables
                        WHERE schemaname = {} AND tablename like {};
                        ''').format(sql.Literal(self.postgres_schema),
                                    sql.Literal(versioned_root_table + '%')))

                    for versioned_table_name in map(lambda x: x[0],
                                                    cur.fetchall()):
                        table_name = stream_buffer.stream + versioned_table_name[
                            len(versioned_root_table):]
                        cur.execute(
                            sql.SQL('''
                            ALTER TABLE {table_schema}.{stream_table} RENAME TO {stream_table_old};
                            ALTER TABLE {table_schema}.{version_table} RENAME TO {stream_table};
                            DROP TABLE {table_schema}.{stream_table_old};
                            COMMIT;''').format(
                                table_schema=sql.Identifier(
                                    self.postgres_schema),
                                stream_table_old=sql.Identifier(table_name +
                                                                SEPARATOR +
                                                                'old'),
                                stream_table=sql.Identifier(table_name),
                                version_table=sql.Identifier(
                                    versioned_table_name)))
            except Exception as ex:
                cur.execute('ROLLBACK;')
                message = '{} - Exception activating table version {}'.format(
                    stream_buffer.stream, version)
                self.LOGGER.exception(message)
                raise PostgresError(message, ex)
Exemplo n.º 5
0
    def activate_version(self, stream_buffer, version):
        with self.conn.cursor() as cur:
            try:
                cur.execute('BEGIN;')

                self.setup_table_mapping_cache(cur)
                root_table_name = self.add_table_mapping(
                    cur, (stream_buffer.stream, ), {})
                current_table_schema = self.get_table_schema(
                    cur, root_table_name)

                if not current_table_schema:
                    self.LOGGER.error(
                        '{} - Table for stream does not exist'.format(
                            stream_buffer.stream))
                elif current_table_schema.get(
                        'version') is not None and current_table_schema.get(
                            'version') >= version:
                    self.LOGGER.warning(
                        '{} - Table version {} already active'.format(
                            stream_buffer.stream, version))
                else:
                    versioned_root_table = root_table_name + SEPARATOR + str(
                        version)

                    names_to_paths = dict([
                        (v, k) for k, v in self.table_mapping_cache.items()
                    ])

                    cur.execute(
                        sql.SQL('''
                        SELECT tablename FROM pg_tables
                        WHERE schemaname = {} AND tablename like {};
                    ''').format(sql.Literal(self.postgres_schema),
                                sql.Literal(versioned_root_table + '%')))

                    for versioned_table_name in map(lambda x: x[0],
                                                    cur.fetchall()):
                        table_name = root_table_name + versioned_table_name[
                            len(versioned_root_table):]
                        table_path = names_to_paths[table_name]
                        cur.execute(
                            sql.SQL('''
                            ALTER TABLE {table_schema}.{stream_table} RENAME TO {stream_table_old};
                            ALTER TABLE {table_schema}.{version_table} RENAME TO {stream_table};
                            DROP TABLE {table_schema}.{stream_table_old};
                            COMMIT;
                        ''').format(
                                table_schema=sql.Identifier(
                                    self.postgres_schema),
                                stream_table_old=sql.Identifier(table_name +
                                                                SEPARATOR +
                                                                'old'),
                                stream_table=sql.Identifier(table_name),
                                version_table=sql.Identifier(
                                    versioned_table_name)))
                        metadata = self._get_table_metadata(cur, table_name)

                        self.LOGGER.info(
                            'Activated {}, setting path to {}'.format(
                                metadata, table_path))

                        metadata['path'] = table_path
                        self._set_table_metadata(cur, table_name, metadata)
            except Exception as ex:
                cur.execute('ROLLBACK;')
                message = '{} - Exception activating table version {}'.format(
                    stream_buffer.stream, version)
                self.LOGGER.exception(message)
                raise PostgresError(message, ex)
Exemplo n.º 6
0
    def write_batch(self, stream_buffer):
        if not self.persist_empty_tables and stream_buffer.count == 0:
            return None

        with self.conn.cursor() as cur:
            try:
                cur.execute('BEGIN;')

                self.setup_table_mapping_cache(cur)

                root_table_name = self.add_table_mapping_helper(
                    (stream_buffer.stream, ), self.table_mapping_cache)['to']
                current_table_schema = self.get_table_schema(
                    cur, root_table_name)

                current_table_version = None

                if current_table_schema:
                    current_table_version = current_table_schema.get(
                        'version', None)

                    if set(stream_buffer.key_properties) \
                            != set(current_table_schema.get('key_properties')):
                        raise PostgresError(
                            '`key_properties` change detected. Existing values are: {}. Streamed values are: {}'
                            .format(current_table_schema.get('key_properties'),
                                    stream_buffer.key_properties))

                    for key_property in stream_buffer.key_properties:
                        canonicalized_key, remote_column_schema = self.fetch_column_from_path(
                            (key_property, ), current_table_schema)
                        if self.json_schema_to_sql_type(remote_column_schema) \
                                != self.json_schema_to_sql_type(stream_buffer.schema['properties'][key_property]):
                            raise PostgresError((
                                '`key_properties` type change detected for "{}". '
                                + 'Existing values are: {}. ' +
                                'Streamed values are: {}, {}, {}').format(
                                    key_property,
                                    json_schema.get_type(
                                        current_table_schema['schema']
                                        ['properties'][key_property]),
                                    json_schema.get_type(
                                        stream_buffer.schema['properties']
                                        [key_property]),
                                    self.json_schema_to_sql_type(
                                        current_table_schema['schema']
                                        ['properties'][key_property]),
                                    self.json_schema_to_sql_type(
                                        stream_buffer.schema['properties']
                                        [key_property])))

                target_table_version = current_table_version or stream_buffer.max_version

                self.LOGGER.info(
                    'Stream {} ({}) with max_version {} targetting {}'.format(
                        stream_buffer.stream, root_table_name,
                        stream_buffer.max_version, target_table_version))

                root_table_name = stream_buffer.stream
                if current_table_version is not None and \
                        stream_buffer.max_version is not None:
                    if stream_buffer.max_version < current_table_version:
                        self.LOGGER.warning(
                            '{} - Records from an earlier table version detected.'
                            .format(stream_buffer.stream))
                        cur.execute('ROLLBACK;')
                        return None

                    elif stream_buffer.max_version > current_table_version:
                        root_table_name += SEPARATOR + str(
                            stream_buffer.max_version)
                        target_table_version = stream_buffer.max_version

                self.LOGGER.info('Root table name {}'.format(root_table_name))

                written_batches_details = self.write_batch_helper(
                    cur, root_table_name, stream_buffer.schema,
                    stream_buffer.key_properties, stream_buffer.get_batch(),
                    {'version': target_table_version})

                cur.execute('COMMIT;')

                return written_batches_details
            except Exception as ex:
                cur.execute('ROLLBACK;')
                message = 'Exception writing records'
                self.LOGGER.exception(message)
                raise PostgresError(message, ex)