Пример #1
0
def sync_table(mysql_conn, catalog_entry, state, columns, stream_version):
    common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry),
                                   catalog_entry.tap_stream_id, state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, 'initial_full_table_complete')

    state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream, version=stream_version)

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists
                                                and state_version is None):
        singer.write_message(activate_version_message)

    perform_resumable_sync = sync_is_resumable(mysql_conn, catalog_entry)

    pk_clause = ""

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)

            if perform_resumable_sync:
                LOGGER.info(
                    "Full table sync is resumable based on primary key definition, will replicate incrementally"
                )

                state = update_incremental_full_table_state(
                    catalog_entry, state, cur)
                pk_clause = generate_pk_clause(catalog_entry, state)

            select_sql += pk_clause

            try:
                select_sql = _create_temp_table(mysql_conn, catalog_entry,
                                                columns, pk_clause)
            except Exception as ex:
                logging.warning("creating temp table failed: {}".format(
                    str(ex)))

            params = {}

            common.sync_query(cur, catalog_entry, state, select_sql, columns,
                              stream_version, params)

    # clear max pk value and last pk fetched upon successful sync
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values')
    singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                          'last_pk_fetched')

    singer.write_message(activate_version_message)
Пример #2
0
def sync_table(mysql_conn,
               catalog_entry,
               state,
               columns,
               original_state_file=''):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id,
                                   state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    replication_key_metadata = stream_metadata.get('replication-key')
    replication_key_state = singer.get_bookmark(state,
                                                catalog_entry.tap_stream_id,
                                                'replication_key')

    replication_key_value = None

    if replication_key_metadata == replication_key_state:
        replication_key_value = singer.get_bookmark(
            state, catalog_entry.tap_stream_id, 'replication_key_value')
    else:
        state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key',
                                      replication_key_metadata)
        state = singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key_value')

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id,
                                               state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                  'version', stream_version)

    activate_version_message = singer.ActivateVersionMessage(
        stream='%s_%s' %
        (common.get_database_name(catalog_entry), catalog_entry.stream),
        version=stream_version)

    singer.write_message(activate_version_message)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)
            params = {}

            if replication_key_value is not None:
                if catalog_entry.schema.properties[
                        replication_key_metadata].format == 'date-time':
                    replication_key_value = pendulum.parse(
                        replication_key_value)

                select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format(
                    replication_key_metadata, replication_key_metadata)

                params['replication_key_value'] = replication_key_value

            common.sync_query(cur, catalog_entry, state, select_sql, columns,
                              stream_version, params, original_state_file)
Пример #3
0
def sync_table(mysql_conn, catalog_entry, state, columns):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id, state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    replication_key_metadata = stream_metadata.get("replication-key")
    replication_key_state = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, "replication_key"
    )

    replication_key_value = None

    if replication_key_metadata == replication_key_state:
        replication_key_value = singer.get_bookmark(
            state, catalog_entry.tap_stream_id, "replication_key_value"
        )
    else:
        state = singer.write_bookmark(
            state, catalog_entry.tap_stream_id, "replication_key", replication_key_metadata
        )
        state = singer.clear_bookmark(state, catalog_entry.tap_stream_id, "replication_key_value")

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id, "version", stream_version)

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream, version=stream_version
    )

    singer.write_message(activate_version_message)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)
            params = {}

            if replication_key_value is not None:
                if catalog_entry.schema.properties[replication_key_metadata].format == "date-time":
                    replication_key_value = pendulum.parse(replication_key_value)

                select_sql += " WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC".format(
                    replication_key_metadata, replication_key_metadata
                )

                params["replication_key_value"] = replication_key_value
            elif replication_key_metadata is not None:
                select_sql += " ORDER BY `{}` ASC".format(replication_key_metadata)

            common.sync_query(
                cur, catalog_entry, state, select_sql, columns, stream_version, params
            )
Пример #4
0
def sync_table(connection, catalog_entry, state, columns, stream_version):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id,
                                   state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, 'initial_full_table_complete')

    state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream, version=stream_version)

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists
                                                and state_version is None):
        yield activate_version_message

    with connection.cursor() as cursor:
        select_sql = common.generate_select_sql(catalog_entry, columns)

        params = {}

        for message in common.sync_query(cursor, catalog_entry, state,
                                         select_sql, columns, stream_version,
                                         params):
            yield message

    yield activate_version_message
Пример #5
0
def sync_table(mysql_conn, config, catalog_entry, state, columns,
               stream_version):
    common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry),
                                   catalog_entry.tap_stream_id, state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, 'initial_full_table_complete')

    state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream='%s_%s' %
        (common.get_database_name(catalog_entry), catalog_entry.stream),
        version=stream_version)

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists
                                                and state_version is None):
        singer.write_message(activate_version_message)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)
            params = {}

            # common.sync_query(cur, catalog_entry, state, select_sql, columns, stream_version, params)
            common.sync_query(cur, catalog_entry, state, select_sql, columns,
                              stream_version, params)

    # clear max pk value and last pk fetched upon successful sync
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values')
    singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                          'last_pk_fetched')

    singer.write_message(activate_version_message)
Пример #6
0
def sync_table(connection, catalog_entry, state, columns):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id,
                                   state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    replication_key_metadata = stream_metadata.get('replication-key')
    replication_key_state = singer.get_bookmark(state,
                                                catalog_entry.tap_stream_id,
                                                'replication_key')

    replication_key = replication_key_state or replication_key_metadata
    replication_key_value = None

    if replication_key_metadata == replication_key_state:
        replication_key_value = singer.get_bookmark(
            state, catalog_entry.tap_stream_id, 'replication_key_value')
    else:
        state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key', replication_key)
        state = singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key_value')

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id,
                                               state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                  'version', stream_version)

    yield singer.ActivateVersionMessage(stream=catalog_entry.stream,
                                        version=stream_version)

    with connection.cursor() as cursor:
        select_sql = common.generate_select_sql(catalog_entry, columns)
        params = {}

        if replication_key_value is not None:
            if catalog_entry.schema.properties[
                    replication_key].format == 'date-time':
                replication_key_value = pendulum.parse(replication_key_value)

            select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format(
                replication_key, replication_key)

            params['replication_key_value'] = replication_key_value
        elif replication_key is not None:
            select_sql += ' ORDER BY `{}` ASC'.format(replication_key)

        for message in common.sync_query(cursor, catalog_entry, state,
                                         select_sql, columns, stream_version,
                                         params):
            yield message
Пример #7
0
def sync_table(connection, catalog_entry, state):
    columns = common.generate_column_list(catalog_entry)

    if not columns:
        LOGGER.warning(
            'There are no columns selected for table %s, skipping it',
            catalog_entry.table)
        return

    bookmark_is_empty = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id) is None


    stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state)
    state = singer.write_bookmark(state,
                                  catalog_entry.tap_stream_id,
                                  'version',
                                  stream_version)

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream,
        version=stream_version
    )

    # If there is no bookmark at all for this stream, assume it is the
    # very first replication. Emity an ACTIVATE_VERSION message at the
    # beginning so the recors show up right away.
    if bookmark_is_empty:
       yield activate_version_message

    with connection.cursor() as cursor:
        select_sql = common.generate_select_sql(catalog_entry, columns)

        params = {}

        for message in common.sync_query(cursor,
                                         catalog_entry,
                                         state,
                                         select_sql,
                                         columns,
                                         stream_version,
                                         params):
            yield message

    # Clear the stream's version from the state so that subsequent invocations will
    # emit a distinct stream version.
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'version', None)

    yield activate_version_message
    yield singer.StateMessage(value=copy.deepcopy(state))
Пример #8
0
def sync_table(connection, catalog_entry, state):
    columns = common.generate_column_list(catalog_entry)

    if not columns:
        LOGGER.warning(
            'There are no columns selected for table %s, skipping it',
            catalog_entry.table)
        return

    replication_key_value = singer.get_bookmark(state,
                                                catalog_entry.tap_stream_id,
                                                'replication_key_value')

    replication_key = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                          'replication_key')

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id,
                                               state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                  'version', stream_version)

    yield singer.ActivateVersionMessage(stream=catalog_entry.stream,
                                        version=stream_version)

    with connection.cursor() as cursor:
        select_sql = common.generate_select_sql(catalog_entry, columns)
        params = {}

        if replication_key_value is not None:
            if catalog_entry.schema.properties[
                    replication_key].format == 'date-time':
                replication_key_value = pendulum.parse(replication_key_value)

            select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format(
                replication_key, replication_key)

            params['replication_key_value'] = replication_key_value
        elif replication_key is not None:
            select_sql += ' ORDER BY `{}` ASC'.format(replication_key)

        for message in common.sync_query(cursor, catalog_entry, state,
                                         select_sql, columns, stream_version,
                                         params):
            yield message
Пример #9
0
def sync_table(mysql_conn, catalog_entry, state, columns, stream_version):
    common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry), catalog_entry.tap_stream_id, state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(state,
                                                      catalog_entry.tap_stream_id,
                                                      'initial_full_table_complete')

    state_version = singer.get_bookmark(state,
                                        catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream,
        version=stream_version
    )

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists and state_version is None):
        singer.write_message(activate_version_message)

    key_props_are_auto_incrementing = pks_are_auto_incrementing(mysql_conn, catalog_entry)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)

            if key_props_are_auto_incrementing:
                LOGGER.info("Detected auto-incrementing primary key(s) - will replicate incrementally")
                max_pk_values = singer.get_bookmark(state,
                                                    catalog_entry.tap_stream_id,
                                                    'max_pk_values') or get_max_pk_values(cur, catalog_entry)


                if not max_pk_values:
                    LOGGER.info("No max value for auto-incrementing PK found for table {}".format(catalog_entry.table))
                else:
                    state = singer.write_bookmark(state,
                                                  catalog_entry.tap_stream_id,
                                                  'max_pk_values',
                                                  max_pk_values)

                    pk_clause = generate_pk_clause(catalog_entry, state)

                    select_sql += pk_clause

            params = {}

            common.sync_query(cur,
                              catalog_entry,
                              state,
                              select_sql,
                              columns,
                              stream_version,
                              params)

    # clear max pk value and last pk fetched upon successful sync
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values')
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched')

    singer.write_message(activate_version_message)
Пример #10
0
def sync_table(mysql_conn, catalog_entry, state, columns, limit=None):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id, state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    iterate_limit = True
    while iterate_limit:

        replication_key_metadata = stream_metadata.get('replication-key')
        replication_key_state = singer.get_bookmark(state,
                                                    catalog_entry.tap_stream_id,
                                                    'replication_key')

        replication_key_value = None

        if replication_key_metadata == replication_key_state:
            replication_key_value = singer.get_bookmark(state,
                                                        catalog_entry.tap_stream_id,
                                                        'replication_key_value')
        else:
            state = singer.write_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'replication_key',
                                          replication_key_metadata)
            state = singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'replication_key_value')

        stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state)
        state = singer.write_bookmark(state,
                                      catalog_entry.tap_stream_id,
                                      'version',
                                      stream_version)

        activate_version_message = singer.ActivateVersionMessage(
            stream=catalog_entry.stream,
            version=stream_version
        )

        singer.write_message(activate_version_message)

        with connect_with_backoff(mysql_conn) as open_conn:
            with open_conn.cursor() as cur:
                select_sql = common.generate_select_sql(catalog_entry, columns)
                params = {}

                if replication_key_value is not None:
                    if catalog_entry.schema.properties[replication_key_metadata].format == 'date-time':
                        replication_key_value = pendulum.parse(replication_key_value)

                    select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format(
                        replication_key_metadata,
                        replication_key_metadata)

                    params['replication_key_value'] = replication_key_value
                elif replication_key_metadata is not None:
                    select_sql += ' ORDER BY `{}` ASC'.format(replication_key_metadata)

                if limit:
                    select_sql += ' LIMIT {}'.format(limit)

                num_rows = common.sync_query(cur,
                                             catalog_entry,
                                             state,
                                             select_sql,
                                             columns,
                                             stream_version,
                                             params)
                if limit is None or num_rows < limit:
                    iterate_limit = False