Exemplo n.º 1
0
    def json_schema_to_sql_type(self, schema):
        _type = json_schema.get_type(schema)
        not_null = True
        ln = len(_type)
        if ln == 1:
            _type = _type[0]
        if ln == 2 and json_schema.NULL in _type:
            not_null = False
            if _type.index(json_schema.NULL) == 0:
                _type = _type[1]
            else:
                _type = _type[0]
        elif ln > 2:
            raise SnowflakeError('Multiple types per column not supported')

        sql_type = 'text'

        if 'format' in schema and \
                schema['format'] == 'date-time' and \
                _type == 'string':
            sql_type = 'TIMESTAMP_TZ'
        elif _type == 'boolean':
            sql_type = 'BOOLEAN'
        elif _type == 'integer':
            sql_type = 'NUMBER'
        elif _type == 'number':
            sql_type = 'FLOAT'

        if not_null:
            sql_type += ' NOT NULL'

        return sql_type
Exemplo n.º 2
0
    def _get_table_metadata(self, cur, table_name):
        cur.execute('''
            SHOW TABLES LIKE '{}' IN SCHEMA {}.{}
            '''.format(
            table_name,
            sql.identifier(self.connection.configured_database),
            sql.identifier(self.connection.configured_schema),
        ))
        tables = cur.fetchall()

        if not tables:
            return None

        if len(tables) != 1:
            raise SnowflakeError(
                '{} tables returned while searching for: {}.{}.{}'.format(
                    len(tables), self.connection.configured_database,
                    self.connection.configured_schema, table_name))

        comment = tables[0][5]

        if comment:
            try:
                comment_meta = json.loads(comment)
            except:
                self.LOGGER.exception('Could not load table comment metadata')
                raise
        else:
            comment_meta = None

        return comment_meta
Exemplo n.º 3
0
    def sql_type_to_json_schema(self, sql_type, is_nullable):
        """
        Given a string representing a SnowflakeSQL column type, and a boolean indicating whether
        the associated column is nullable, return a compatible JSONSchema structure.
        :param sql_type: String
        :param is_nullable: boolean
        :return: JSONSchema
        """
        _format = None
        if sql_type == 'TIMESTAMP_TZ':
            json_type = 'string'
            _format = 'date-time'
        elif sql_type == 'NUMBER':
            json_type = 'integer'
        elif sql_type == 'FLOAT':
            json_type = 'number'
        elif sql_type == 'BOOLEAN':
            json_type = 'boolean'
        elif sql_type == 'TEXT':
            json_type = 'string'
        else:
            raise SnowflakeError(
                'Unsupported type `{}` in existing target table'.format(
                    sql_type))

        json_type = [json_type]
        if is_nullable:
            json_type.append(json_schema.NULL)

        ret_json_schema = {'type': json_type}
        if _format:
            ret_json_schema['format'] = _format

        return ret_json_schema
Exemplo n.º 4
0
    def write_batch(self, stream_buffer):
        if not self.persist_empty_tables and stream_buffer.count == 0:
            return None

        with self.connection.cursor() as cur:
            try:
                self.setup_table_mapping_cache(cur)

                root_table_name = self.add_table_mapping_helper(
                    (stream_buffer.stream, ), self.table_mapping_cache)['to']
                current_table_schema = self.get_table_schema(
                    cur, root_table_name)

                current_table_version = None

                if current_table_schema:
                    current_table_version = current_table_schema.get(
                        'version', None)

                    if set(stream_buffer.key_properties) \
                            != set(current_table_schema.get('key_properties')):
                        raise SnowflakeError(
                            '`key_properties` change detected. Existing values are: {}. Streamed values are: {}'
                            .format(current_table_schema.get('key_properties'),
                                    stream_buffer.key_properties))

                    for key_property in stream_buffer.key_properties:
                        canonicalized_key, remote_column_schema = self.fetch_column_from_path(
                            (key_property, ), current_table_schema)
                        if self.json_schema_to_sql_type(remote_column_schema) \
                                != self.json_schema_to_sql_type(stream_buffer.schema['properties'][key_property]):
                            raise SnowflakeError((
                                '`key_properties` type change detected for "{}". '
                                + 'Existing values are: {}. ' +
                                'Streamed values are: {}, {}, {}').format(
                                    key_property,
                                    json_schema.get_type(
                                        current_table_schema['schema']
                                        ['properties'][key_property]),
                                    json_schema.get_type(
                                        stream_buffer.schema['properties']
                                        [key_property]),
                                    self.json_schema_to_sql_type(
                                        current_table_schema['schema']
                                        ['properties'][key_property]),
                                    self.json_schema_to_sql_type(
                                        stream_buffer.schema['properties']
                                        [key_property])))

                target_table_version = current_table_version or stream_buffer.max_version

                self.LOGGER.info(
                    'Stream {} ({}) with max_version {} targetting {}'.format(
                        stream_buffer.stream, root_table_name,
                        stream_buffer.max_version, target_table_version))

                root_table_name = stream_buffer.stream
                if current_table_version is not None and \
                        stream_buffer.max_version is not None:
                    if stream_buffer.max_version < current_table_version:
                        self.LOGGER.warning(
                            '{} - Records from an earlier table version detected.'
                            .format(stream_buffer.stream))
                        self.connection.rollback()
                        return None

                    elif stream_buffer.max_version > current_table_version:
                        root_table_name += SEPARATOR + str(
                            stream_buffer.max_version)
                        target_table_version = stream_buffer.max_version

                self.LOGGER.info('Root table name {}'.format(root_table_name))

                written_batches_details = self.write_batch_helper(
                    cur, root_table_name, stream_buffer.schema,
                    stream_buffer.key_properties, stream_buffer.get_batch(),
                    {'version': target_table_version})

                self.connection.commit()

                return written_batches_details
            except Exception as ex:
                self.connection.rollback()
                message = 'Exception writing records'
                self.LOGGER.exception(message)
                raise SnowflakeError(message, ex)
Exemplo n.º 5
0
    def activate_version(self, stream_buffer, version):
        with self.connection.cursor() as cur:
            try:
                self.setup_table_mapping_cache(cur)
                root_table_name = self.add_table_mapping(
                    cur, (stream_buffer.stream, ), {})
                current_table_schema = self.get_table_schema(
                    cur, root_table_name)

                if not current_table_schema:
                    self.LOGGER.error(
                        '{} - Table for stream does not exist'.format(
                            stream_buffer.stream))
                elif current_table_schema.get(
                        'version') is not None and current_table_schema.get(
                            'version') >= version:
                    self.LOGGER.warning(
                        '{} - Table version {} already active'.format(
                            stream_buffer.stream, version))
                else:
                    versioned_root_table = root_table_name + SEPARATOR + str(
                        version)

                    names_to_paths = dict([
                        (v, k) for k, v in self.table_mapping_cache.items()
                    ])

                    cur.execute('''
                        SHOW TABLES LIKE '{}%' IN SCHEMA {}.{}
                        '''.format(
                        versioned_root_table,
                        sql.identifier(self.connection.configured_database),
                        sql.identifier(self.connection.configured_schema)))

                    for versioned_table_name in [x[1] for x in cur.fetchall()]:
                        table_name = root_table_name + versioned_table_name[
                            len(versioned_root_table):]
                        table_path = names_to_paths[table_name]

                        args = {
                            'db_schema':
                            '{}.{}'.format(
                                sql.identifier(
                                    self.connection.configured_database),
                                sql.identifier(
                                    self.connection.configured_schema)),
                            'stream_table_old':
                            sql.identifier(table_name + SEPARATOR + 'OLD'),
                            'stream_table':
                            sql.identifier(table_name),
                            'version_table':
                            sql.identifier(versioned_table_name)
                        }

                        cur.execute('''
                            ALTER TABLE {db_schema}.{stream_table} RENAME TO {db_schema}.{stream_table_old}
                            '''.format(**args))

                        cur.execute('''
                            ALTER TABLE {db_schema}.{version_table} RENAME TO {db_schema}.{stream_table}
                            '''.format(**args))

                        cur.execute('''
                            DROP TABLE {db_schema}.{stream_table_old}
                            '''.format(**args))

                        self.connection.commit()

                        metadata = self._get_table_metadata(cur, table_name)

                        self.LOGGER.info(
                            'Activated {}, setting path to {}'.format(
                                metadata, table_path))

                        metadata['path'] = table_path
                        self._set_table_metadata(cur, table_name, metadata)
            except Exception as ex:
                self.connection.rollback()
                message = '{} - Exception activating table version {}'.format(
                    stream_buffer.stream, version)
                self.LOGGER.exception(message)
                raise SnowflakeError(message, ex)