示例#1
0
def sync_table(connection, catalog_entry, state, columns, stream_version):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id,
                                   state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, 'initial_full_table_complete')

    state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream, version=stream_version)

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists
                                                and state_version is None):
        yield activate_version_message

    with connection.cursor() as cursor:
        select_sql = common.generate_select_sql(catalog_entry, columns)

        params = {}

        for message in common.sync_query(cursor, catalog_entry, state,
                                         select_sql, columns, stream_version,
                                         params):
            yield message

    yield activate_version_message
示例#2
0
def sync_table(mysql_conn, catalog_entry, state, columns, stream_version):
    common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry),
                                   catalog_entry.tap_stream_id, state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, 'initial_full_table_complete')

    state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream, version=stream_version)

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists
                                                and state_version is None):
        singer.write_message(activate_version_message)

    perform_resumable_sync = sync_is_resumable(mysql_conn, catalog_entry)

    pk_clause = ""

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)

            if perform_resumable_sync:
                LOGGER.info(
                    "Full table sync is resumable based on primary key definition, will replicate incrementally"
                )

                state = update_incremental_full_table_state(
                    catalog_entry, state, cur)
                pk_clause = generate_pk_clause(catalog_entry, state)

            select_sql += pk_clause

            try:
                select_sql = _create_temp_table(mysql_conn, catalog_entry,
                                                columns, pk_clause)
            except Exception as ex:
                logging.warning("creating temp table failed: {}".format(
                    str(ex)))

            params = {}

            common.sync_query(cur, catalog_entry, state, select_sql, columns,
                              stream_version, params)

    # clear max pk value and last pk fetched upon successful sync
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values')
    singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                          'last_pk_fetched')

    singer.write_message(activate_version_message)
示例#3
0
def sync_table(mysql_conn,
               catalog_entry,
               state,
               columns,
               original_state_file=''):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id,
                                   state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    replication_key_metadata = stream_metadata.get('replication-key')
    replication_key_state = singer.get_bookmark(state,
                                                catalog_entry.tap_stream_id,
                                                'replication_key')

    replication_key_value = None

    if replication_key_metadata == replication_key_state:
        replication_key_value = singer.get_bookmark(
            state, catalog_entry.tap_stream_id, 'replication_key_value')
    else:
        state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key',
                                      replication_key_metadata)
        state = singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key_value')

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id,
                                               state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                  'version', stream_version)

    activate_version_message = singer.ActivateVersionMessage(
        stream='%s_%s' %
        (common.get_database_name(catalog_entry), catalog_entry.stream),
        version=stream_version)

    singer.write_message(activate_version_message)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)
            params = {}

            if replication_key_value is not None:
                if catalog_entry.schema.properties[
                        replication_key_metadata].format == 'date-time':
                    replication_key_value = pendulum.parse(
                        replication_key_value)

                select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format(
                    replication_key_metadata, replication_key_metadata)

                params['replication_key_value'] = replication_key_value

            common.sync_query(cur, catalog_entry, state, select_sql, columns,
                              stream_version, params, original_state_file)
示例#4
0
def sync_table(connection, catalog_entry, state, columns):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id,
                                   state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    replication_key_metadata = stream_metadata.get('replication-key')
    replication_key_state = singer.get_bookmark(state,
                                                catalog_entry.tap_stream_id,
                                                'replication_key')

    replication_key = replication_key_state or replication_key_metadata
    replication_key_value = None

    if replication_key_metadata == replication_key_state:
        replication_key_value = singer.get_bookmark(
            state, catalog_entry.tap_stream_id, 'replication_key_value')
    else:
        state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key', replication_key)
        state = singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key_value')

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id,
                                               state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                  'version', stream_version)

    yield singer.ActivateVersionMessage(stream=catalog_entry.stream,
                                        version=stream_version)

    with connection.cursor() as cursor:
        select_sql = common.generate_select_sql(catalog_entry, columns)
        params = {}

        if replication_key_value is not None:
            if catalog_entry.schema.properties[
                    replication_key].format == 'date-time':
                replication_key_value = pendulum.parse(replication_key_value)

            select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format(
                replication_key, replication_key)

            params['replication_key_value'] = replication_key_value
        elif replication_key is not None:
            select_sql += ' ORDER BY `{}` ASC'.format(replication_key)

        for message in common.sync_query(cursor, catalog_entry, state,
                                         select_sql, columns, stream_version,
                                         params):
            yield message
示例#5
0
def sync_table(mysql_conn, catalog_entry, state, columns):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id, state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    replication_key_metadata = stream_metadata.get("replication-key")
    replication_key_state = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, "replication_key"
    )

    replication_key_value = None

    if replication_key_metadata == replication_key_state:
        replication_key_value = singer.get_bookmark(
            state, catalog_entry.tap_stream_id, "replication_key_value"
        )
    else:
        state = singer.write_bookmark(
            state, catalog_entry.tap_stream_id, "replication_key", replication_key_metadata
        )
        state = singer.clear_bookmark(state, catalog_entry.tap_stream_id, "replication_key_value")

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id, "version", stream_version)

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream, version=stream_version
    )

    singer.write_message(activate_version_message)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)
            params = {}

            if replication_key_value is not None:
                if catalog_entry.schema.properties[replication_key_metadata].format == "date-time":
                    replication_key_value = pendulum.parse(replication_key_value)

                select_sql += " WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC".format(
                    replication_key_metadata, replication_key_metadata
                )

                params["replication_key_value"] = replication_key_value
            elif replication_key_metadata is not None:
                select_sql += " ORDER BY `{}` ASC".format(replication_key_metadata)

            common.sync_query(
                cur, catalog_entry, state, select_sql, columns, stream_version, params
            )
示例#6
0
def sync_binlog_stream(mysql_conn: MySQLConnection, config: Dict,
                       binlog_streams_map: Dict[str,
                                                Any], state: Dict) -> None:
    """
    Capture the binlog events created between the pos in the state and current Master position and creates Singer
    streams to be flushed to stdout
    Args:
        mysql_conn: mysql connection instance
        config: tap config
        binlog_streams_map: tables to stream using binlog
        state: the current state
    """
    for tap_stream_id in binlog_streams_map:
        common.whitelist_bookmark_keys(BOOKMARK_KEYS, tap_stream_id, state)

    log_file = log_pos = gtid = None

    if config['use_gtid']:
        gtid = calculate_gtid_bookmark(mysql_conn, binlog_streams_map, state,
                                       config['engine'])
    else:
        log_file, log_pos = calculate_bookmark(mysql_conn, binlog_streams_map,
                                               state)

    reader = None

    try:
        reader = create_binlog_stream_reader(config, log_file, log_pos, gtid)

        end_log_file, end_log_pos = fetch_current_log_file_and_pos(mysql_conn)
        LOGGER.info('Current Master binlog file and pos: %s %s', end_log_file,
                    end_log_pos)

        _run_binlog_sync(mysql_conn, reader, binlog_streams_map, state, config,
                         end_log_file, end_log_pos)

    except pymysql.err.OperationalError as ex:
        if ex.args[0] == 1236:
            LOGGER.error(
                'Cannot resume logical replication from given GTID %s! This GTID might date back to before '
                'the new primary has been setup, connect to old primary and consume all binlog events to get '
                'a newer GTID then switch back.', gtid)

        raise

    finally:
        # BinLogStreamReader doesn't implement the `with` methods
        # So, try/finally will close the chain from the top
        if reader:
            reader.close()

    singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
示例#7
0
def sync_binlog_stream(mysql_conn, config, binlog_streams, state):
    binlog_streams_map = generate_streams_map(binlog_streams)

    for tap_stream_id, _ in binlog_streams_map.items():
        common.whitelist_bookmark_keys(BOOKMARK_KEYS, tap_stream_id, state)

    log_file, log_pos = calculate_bookmark(mysql_conn, binlog_streams_map,
                                           state)

    verify_log_file_exists(mysql_conn, log_file, log_pos)

    if config.get('server_id'):
        server_id = int(config.get('server_id'))
        LOGGER.info("Using provided server_id=%s", server_id)
    else:
        server_id = fetch_server_id(mysql_conn)
        LOGGER.info("No server_id provided, will use global server_id=%s",
                    server_id)

    connection_wrapper = make_connection_wrapper(config)
    reader = None

    try:
        slave_uuid = f"bi-reader-%04x" % random.getrandbits(64)

        reader = BinLogStreamReader(
            connection_settings={},
            server_id=server_id,
            slave_uuid=slave_uuid,
            log_file=log_file,
            log_pos=log_pos,
            resume_stream=True,
            only_events=[
                RotateEvent, WriteRowsEvent, UpdateRowsEvent, DeleteRowsEvent
            ],
            pymysql_wrapper=connection_wrapper,
        )
        LOGGER.info("Starting binlog replication with log_file=%s, log_pos=%s",
                    log_file, log_pos)
        _run_binlog_sync(mysql_conn, reader, binlog_streams_map, state, config)
    finally:
        # BinLogStreamReader doesn't implement the `with` methods
        # So, try/finally will close the chain from the top
        reader.close()

    singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
示例#8
0
def sync_table(mysql_conn, config, catalog_entry, state, columns,
               stream_version):
    common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry),
                                   catalog_entry.tap_stream_id, state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, 'initial_full_table_complete')

    state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream='%s_%s' %
        (common.get_database_name(catalog_entry), catalog_entry.stream),
        version=stream_version)

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists
                                                and state_version is None):
        singer.write_message(activate_version_message)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)
            params = {}

            # common.sync_query(cur, catalog_entry, state, select_sql, columns, stream_version, params)
            common.sync_query(cur, catalog_entry, state, select_sql, columns,
                              stream_version, params)

    # clear max pk value and last pk fetched upon successful sync
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values')
    singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                          'last_pk_fetched')

    singer.write_message(activate_version_message)
示例#9
0
def sync_table(connection, config, catalog_entry, state, columns):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id,
                                   state)

    log_file = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                   'log_file')

    log_pos = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                  'log_pos')

    verify_binlog_config(connection, catalog_entry)
    verify_log_file_exists(connection, catalog_entry, log_file, log_pos)

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id,
                                               state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                  'version', stream_version)

    server_id = fetch_server_id(connection)

    connection_wrapper = make_connection_wrapper(config)

    reader = BinLogStreamReader(connection_settings={},
                                server_id=server_id,
                                log_file=log_file,
                                log_pos=log_pos,
                                resume_stream=True,
                                only_events=[
                                    RotateEvent, WriteRowsEvent,
                                    UpdateRowsEvent, DeleteRowsEvent
                                ],
                                pymysql_wrapper=connection_wrapper)

    table_path = (catalog_entry.database, catalog_entry.stream)

    time_extracted = utils.now()

    LOGGER.info("Starting binlog replication with log_file=%s, log_pos=%s",
                log_file, log_pos)

    rows_saved = 0

    for binlog_event in reader:
        if reader.log_file == log_file and reader.log_pos == log_pos:
            LOGGER.info(
                "Skipping event for log_file=%s and log_pos=%s as it was processed last sync",
                reader.log_file, reader.log_pos)
            continue

        if isinstance(binlog_event, RotateEvent):
            state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                          'log_file', binlog_event.next_binlog)
            state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                          'log_pos', binlog_event.position)

        elif (binlog_event.schema, binlog_event.table) == table_path:
            db_column_types = {c.name: c.type for c in binlog_event.columns}

            if isinstance(binlog_event, WriteRowsEvent):
                for row in binlog_event.rows:
                    filtered_vals = {
                        k: v
                        for k, v in row['values'].items() if k in columns
                    }

                    yield row_to_singer_record(catalog_entry, stream_version,
                                               db_column_types, filtered_vals,
                                               time_extracted)

                    rows_saved = rows_saved + 1

            elif isinstance(binlog_event, UpdateRowsEvent):
                for row in binlog_event.rows:
                    filtered_vals = {
                        k: v
                        for k, v in row['after_values'].items() if k in columns
                    }

                    yield row_to_singer_record(catalog_entry, stream_version,
                                               db_column_types, filtered_vals,
                                               time_extracted)

                    rows_saved = rows_saved + 1
            elif isinstance(binlog_event, DeleteRowsEvent):
                for row in binlog_event.rows:
                    event_ts = datetime.datetime.utcfromtimestamp(
                        binlog_event.timestamp).replace(tzinfo=pytz.UTC)

                    vals = row['values']
                    vals[SDC_DELETED_AT] = event_ts

                    filtered_vals = {
                        k: v
                        for k, v in vals.items() if k in columns
                    }

                    yield row_to_singer_record(catalog_entry, stream_version,
                                               db_column_types, filtered_vals,
                                               time_extracted)

                    rows_saved = rows_saved + 1

            state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                          'log_file', reader.log_file)

            state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                          'log_pos', reader.log_pos)

            if rows_saved % UPDATE_BOOKMARK_PERIOD == 0:
                yield singer.StateMessage(value=copy.deepcopy(state))

    yield singer.StateMessage(value=copy.deepcopy(state))
示例#10
0
def sync_table(mysql_conn, catalog_entry, state, columns, stream_version):
    common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry), catalog_entry.tap_stream_id, state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(state,
                                                      catalog_entry.tap_stream_id,
                                                      'initial_full_table_complete')

    state_version = singer.get_bookmark(state,
                                        catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream,
        version=stream_version
    )

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists and state_version is None):
        singer.write_message(activate_version_message)

    key_props_are_auto_incrementing = pks_are_auto_incrementing(mysql_conn, catalog_entry)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)

            if key_props_are_auto_incrementing:
                LOGGER.info("Detected auto-incrementing primary key(s) - will replicate incrementally")
                max_pk_values = singer.get_bookmark(state,
                                                    catalog_entry.tap_stream_id,
                                                    'max_pk_values') or get_max_pk_values(cur, catalog_entry)


                if not max_pk_values:
                    LOGGER.info("No max value for auto-incrementing PK found for table {}".format(catalog_entry.table))
                else:
                    state = singer.write_bookmark(state,
                                                  catalog_entry.tap_stream_id,
                                                  'max_pk_values',
                                                  max_pk_values)

                    pk_clause = generate_pk_clause(catalog_entry, state)

                    select_sql += pk_clause

            params = {}

            common.sync_query(cur,
                              catalog_entry,
                              state,
                              select_sql,
                              columns,
                              stream_version,
                              params)

    # clear max pk value and last pk fetched upon successful sync
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values')
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched')

    singer.write_message(activate_version_message)
示例#11
0
def sync_binlog_stream(mysql_conn, config, binlog_streams, state):
    binlog_streams_map = generate_streams_map(binlog_streams)

    for tap_stream_id in binlog_streams_map.keys():
        common.whitelist_bookmark_keys(BOOKMARK_KEYS, tap_stream_id, state)

    log_file, log_pos = calculate_bookmark(mysql_conn, binlog_streams_map,
                                           state)

    verify_log_file_exists(mysql_conn, log_file, log_pos)

    if config.get('server_id'):
        server_id = int(config.get('server_id'))
        LOGGER.info("Using provided server_id=%s", server_id)
    else:
        server_id = fetch_server_id(mysql_conn)
        LOGGER.info("No server_id provided, will use global server_id=%s",
                    server_id)

    connection_wrapper = make_connection_wrapper(config)

    reader = BinLogStreamReader(connection_settings={},
                                server_id=server_id,
                                log_file=log_file,
                                log_pos=log_pos,
                                resume_stream=True,
                                only_events=[
                                    RotateEvent, WriteRowsEvent,
                                    UpdateRowsEvent, DeleteRowsEvent
                                ],
                                pymysql_wrapper=connection_wrapper)

    time_extracted = utils.now()

    LOGGER.info("Starting binlog replication with log_file=%s, log_pos=%s",
                log_file, log_pos)

    rows_saved = 0
    events_skipped = 0

    current_log_file, current_log_pos = fetch_current_log_file_and_pos(
        mysql_conn)

    for binlog_event in reader:
        if isinstance(binlog_event, RotateEvent):
            state = update_bookmarks(state, binlog_streams_map,
                                     binlog_event.next_binlog,
                                     binlog_event.position)
        else:
            tap_stream_id = common.generate_tap_stream_id(
                binlog_event.schema, binlog_event.table)
            streams_map_entry = binlog_streams_map.get(tap_stream_id, {})
            catalog_entry = streams_map_entry.get('catalog_entry')
            desired_columns = streams_map_entry.get('desired_columns')

            if not catalog_entry:
                events_skipped = events_skipped + 1

                if events_skipped % UPDATE_BOOKMARK_PERIOD == 0:
                    LOGGER.info(
                        "Skipped %s events so far as they were not for selected tables; %s rows extracted",
                        events_skipped, rows_saved)

            elif catalog_entry:
                if isinstance(binlog_event, WriteRowsEvent):
                    rows_saved = handle_write_rows_event(
                        binlog_event, catalog_entry, state, desired_columns,
                        rows_saved, time_extracted)

                elif isinstance(binlog_event, UpdateRowsEvent):
                    rows_saved = handle_update_rows_event(
                        binlog_event, catalog_entry, state, desired_columns,
                        rows_saved, time_extracted)

                elif isinstance(binlog_event, DeleteRowsEvent):
                    rows_saved = handle_delete_rows_event(
                        binlog_event, catalog_entry, state, desired_columns,
                        rows_saved, time_extracted)
                else:
                    LOGGER.info(
                        "Skipping event for table %s.%s as it is not an INSERT, UPDATE, or DELETE",
                        binlog_event.schema, binlog_event.table)

        state = update_bookmarks(state, binlog_streams_map, reader.log_file,
                                 reader.log_pos)

        # The iterator across python-mysql-replication's fetchone method should ultimately terminate
        # upon receiving an EOF packet. There seem to be some cases when a MySQL server will not send
        # one causing binlog replication to hang.
        if current_log_file == reader.log_file and reader.log_pos >= current_log_pos:
            break

        if ((rows_saved and rows_saved % UPDATE_BOOKMARK_PERIOD == 0) or
            (events_skipped and events_skipped % UPDATE_BOOKMARK_PERIOD == 0)):
            singer.write_message(
                singer.StateMessage(value=copy.deepcopy(state)))

    singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
示例#12
0
def sync_table(mysql_conn, catalog_entry, state, columns, limit=None):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id, state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    iterate_limit = True
    while iterate_limit:

        replication_key_metadata = stream_metadata.get('replication-key')
        replication_key_state = singer.get_bookmark(state,
                                                    catalog_entry.tap_stream_id,
                                                    'replication_key')

        replication_key_value = None

        if replication_key_metadata == replication_key_state:
            replication_key_value = singer.get_bookmark(state,
                                                        catalog_entry.tap_stream_id,
                                                        'replication_key_value')
        else:
            state = singer.write_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'replication_key',
                                          replication_key_metadata)
            state = singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'replication_key_value')

        stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state)
        state = singer.write_bookmark(state,
                                      catalog_entry.tap_stream_id,
                                      'version',
                                      stream_version)

        activate_version_message = singer.ActivateVersionMessage(
            stream=catalog_entry.stream,
            version=stream_version
        )

        singer.write_message(activate_version_message)

        with connect_with_backoff(mysql_conn) as open_conn:
            with open_conn.cursor() as cur:
                select_sql = common.generate_select_sql(catalog_entry, columns)
                params = {}

                if replication_key_value is not None:
                    if catalog_entry.schema.properties[replication_key_metadata].format == 'date-time':
                        replication_key_value = pendulum.parse(replication_key_value)

                    select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format(
                        replication_key_metadata,
                        replication_key_metadata)

                    params['replication_key_value'] = replication_key_value
                elif replication_key_metadata is not None:
                    select_sql += ' ORDER BY `{}` ASC'.format(replication_key_metadata)

                if limit:
                    select_sql += ' LIMIT {}'.format(limit)

                num_rows = common.sync_query(cur,
                                             catalog_entry,
                                             state,
                                             select_sql,
                                             columns,
                                             stream_version,
                                             params)
                if limit is None or num_rows < limit:
                    iterate_limit = False