def do_sync_full_table(mysql_conn, catalog_entry, state, columns): LOGGER.info("Stream %s is using full table replication", catalog_entry.stream) write_schema_message(catalog_entry) stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version) # Prefer initial_full_table_complete going forward singer.clear_bookmark(state, catalog_entry.tap_stream_id, "version") state = singer.write_bookmark( state, catalog_entry.tap_stream_id, "initial_full_table_complete", True ) singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
def generate_messages(con, catalog, state): catalog = resolve_catalog(con, catalog, state) for catalog_entry in catalog.streams: state = singer.set_currently_syncing(state, catalog_entry.tap_stream_id) # Emit a state message to indicate that we've started this stream yield singer.StateMessage(value=copy.deepcopy(state)) md_map = metadata.to_map(catalog_entry.metadata) replication_method = md_map.get((), {}).get('replication-method') replication_key = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'replication_key') if catalog_entry.is_view: key_properties = md_map.get((), {}).get('view-key-properties') else: key_properties = md_map.get((), {}).get('table-key-properties') # Emit a SCHEMA message before we sync any records yield singer.SchemaMessage( stream=catalog_entry.stream, schema=catalog_entry.schema.to_dict(), key_properties=key_properties, bookmark_properties=replication_key ) with metrics.job_timer('sync_table') as timer: timer.tags['database'] = catalog_entry.database timer.tags['table'] = catalog_entry.table log_engine(con, catalog_entry) if replication_method == 'INCREMENTAL': for message in incremental.sync_table(con, catalog_entry, state): yield message elif replication_method == 'FULL_TABLE': for message in full_table.sync_table(con, catalog_entry, state): yield message else: raise Exception("only INCREMENTAL and FULL TABLE replication methods are supported") # if we get here, we've finished processing all the streams, so clear # currently_syncing from the state and emit a state message. state = singer.set_currently_syncing(state, None) yield singer.StateMessage(value=copy.deepcopy(state))
def do_sync_historical_binlog(mysql_conn, catalog_entry, state, columns, use_gtid: bool, engine: str): binlog.verify_binlog_config(mysql_conn) if use_gtid and engine == MYSQL_ENGINE: binlog.verify_gtid_config(mysql_conn) is_view = common.get_is_view(catalog_entry) if is_view: raise Exception(f"Unable to replicate stream({catalog_entry.stream}) with binlog because it is a view.") log_file = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'log_file') log_pos = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'log_pos') gtid = None if use_gtid: gtid = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'gtid') max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values') write_schema_message(catalog_entry) stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) if max_pk_values and ((use_gtid and gtid) or (log_file and log_pos)): LOGGER.info("Resuming initial full table sync for LOG_BASED stream %s", catalog_entry.tap_stream_id) full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version) else: LOGGER.info("Performing initial full table sync for LOG_BASED stream %s", catalog_entry.tap_stream_id) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'initial_binlog_complete', False) current_log_file, current_log_pos = binlog.fetch_current_log_file_and_pos(mysql_conn) current_gtid = None if use_gtid: current_gtid = binlog.fetch_current_gtid_pos(mysql_conn, engine) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'version', stream_version) if full_table.pks_are_auto_incrementing(mysql_conn, catalog_entry): # We must save log_file, log_pos, gtid across FULL_TABLE syncs when using # an incrementing PK state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'log_file', current_log_file) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'log_pos', current_log_pos) if current_gtid: state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'gtid', current_gtid) full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version) else: full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'log_file', current_log_file) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'log_pos', current_log_pos) if current_gtid: state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'gtid', current_gtid)
def do_sync_historical_binlog(mysql_conn, config, catalog_entry, state, columns): binlog.verify_binlog_config(mysql_conn) is_view = common.get_is_view(catalog_entry) key_properties = common.get_key_properties(catalog_entry) if is_view: raise Exception("Unable to replicate stream({}) with binlog because it is a view.".format(catalog_entry.stream)) log_file = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'log_file') log_pos = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'log_pos') max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values') last_pk_fetched = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched') write_schema_message(catalog_entry) stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) if log_file and log_pos and max_pk_values: LOGGER.info("Resuming initial full table sync for LOG_BASED stream %s", catalog_entry.tap_stream_id) full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version) else: LOGGER.info("Performing initial full table sync for LOG_BASED stream %s", catalog_entry.tap_stream_id) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'initial_binlog_complete', False) current_log_file, current_log_pos = binlog.fetch_current_log_file_and_pos(mysql_conn) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'version', stream_version) if full_table.sync_is_resumable(mysql_conn, catalog_entry): # We must save log_file and log_pos across FULL_TABLE syncs when performing # a resumable full table sync state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'log_file', current_log_file) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'log_pos', current_log_pos) full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version) else: full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'log_file', current_log_file) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'log_pos', current_log_pos)
def do_sync_historical_binlog(mysql_conn, catalog_entry, state, columns): binlog.verify_binlog_config(mysql_conn) is_view = common.get_is_view(catalog_entry) if is_view: raise Exception( f"Unable to replicate stream({catalog_entry.stream}) with binlog because it is a view." ) log_file = singer.get_bookmark(state, catalog_entry.tap_stream_id, "log_file") log_pos = singer.get_bookmark(state, catalog_entry.tap_stream_id, "log_pos") max_pk_values = singer.get_bookmark( state, catalog_entry.tap_stream_id, "max_pk_values" ) write_schema_message(catalog_entry) stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) if log_file and log_pos and max_pk_values: LOGGER.info( "Resuming initial full table sync for LOG_BASED stream %s", catalog_entry.tap_stream_id, ) full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version) else: LOGGER.info( "Performing initial full table sync for LOG_BASED stream %s", catalog_entry.tap_stream_id, ) state = singer.write_bookmark( state, catalog_entry.tap_stream_id, "initial_binlog_complete", False ) current_log_file, current_log_pos = binlog.fetch_current_log_file_and_pos( mysql_conn ) state = singer.write_bookmark( state, catalog_entry.tap_stream_id, "version", stream_version ) if full_table.pks_are_auto_incrementing(mysql_conn, catalog_entry): # We must save log_file and log_pos across FULL_TABLE syncs when using # an incrementing PK state = singer.write_bookmark( state, catalog_entry.tap_stream_id, "log_file", current_log_file ) state = singer.write_bookmark( state, catalog_entry.tap_stream_id, "log_pos", current_log_pos ) full_table.sync_table( mysql_conn, catalog_entry, state, columns, stream_version ) else: full_table.sync_table( mysql_conn, catalog_entry, state, columns, stream_version ) state = singer.write_bookmark( state, catalog_entry.tap_stream_id, "log_file", current_log_file ) state = singer.write_bookmark( state, catalog_entry.tap_stream_id, "log_pos", current_log_pos )
def generate_messages(con, config, catalog, state): catalog = resolve_catalog(con, catalog, state) for catalog_entry in catalog.streams: columns = list(catalog_entry.schema.properties.keys()) if not columns: LOGGER.warning( 'There are no columns selected for stream %s, skipping it.', catalog_entry.stream) continue state = singer.set_currently_syncing(state, catalog_entry.tap_stream_id) # Emit a state message to indicate that we've started this stream yield singer.StateMessage(value=copy.deepcopy(state)) md_map = metadata.to_map(catalog_entry.metadata) replication_method = md_map.get((), {}).get('replication-method') replication_key = md_map.get((), {}).get('replication-key') if catalog_entry.is_view: key_properties = md_map.get((), {}).get('view-key-properties') else: key_properties = md_map.get((), {}).get('table-key-properties') with metrics.job_timer('sync_table') as timer: timer.tags['database'] = catalog_entry.database timer.tags['table'] = catalog_entry.table log_engine(con, catalog_entry) if replication_method == 'INCREMENTAL': LOGGER.info("Stream %s is using incremental replication", catalog_entry.stream) yield generate_schema_message(catalog_entry, key_properties, [replication_key]) for message in incremental.sync_table(con, catalog_entry, state, columns): yield message elif replication_method == 'LOG_BASED': if catalog_entry.is_view: raise Exception( "Unable to replicate stream({}) with binlog because it is a view." .format(catalog_entry.stream)) LOGGER.info("Stream %s is using binlog replication", catalog_entry.stream) log_file = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'log_file') log_pos = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'log_pos') yield generate_schema_message(catalog_entry, key_properties, []) if log_file and log_pos: columns = binlog.add_automatic_properties( catalog_entry, columns) for message in binlog.sync_table(con, config, catalog_entry, state, columns): yield message else: LOGGER.info("Performing initial full table sync") log_file, log_pos = binlog.fetch_current_log_file_and_pos( con) stream_version = common.get_stream_version( catalog_entry.tap_stream_id, state) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'version', stream_version) for message in full_table.sync_table( con, catalog_entry, state, columns, stream_version): yield message state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'log_file', log_file) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'log_pos', log_pos) yield singer.StateMessage(value=copy.deepcopy(state)) elif replication_method == 'FULL_TABLE': LOGGER.info("Stream %s is using full table replication", catalog_entry.stream) yield generate_schema_message(catalog_entry, key_properties, []) stream_version = common.get_stream_version( catalog_entry.tap_stream_id, state) for message in full_table.sync_table(con, catalog_entry, state, columns, stream_version): yield message # Prefer initial_full_table_complete going forward singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'version') state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'initial_full_table_complete', True) yield singer.StateMessage(value=copy.deepcopy(state)) else: raise Exception( "only INCREMENTAL, LOG_BASED, and FULL TABLE replication methods are supported" ) # if we get here, we've finished processing all the streams, so clear # currently_syncing from the state and emit a state message. state = singer.set_currently_syncing(state, None) yield singer.StateMessage(value=copy.deepcopy(state))