def sync_traditional_stream(conn_config, stream, state, sync_method, end_scn): LOGGER.info("Beginning sync of stream(%s) with sync method(%s)", stream.tap_stream_id, sync_method) md_map = metadata.to_map(stream.metadata) desired_columns = [ c for c in stream.schema.properties.keys() if common.should_sync_column(md_map, c) ] desired_columns.sort() if len(desired_columns) == 0: LOGGER.warning( 'There are no columns selected for stream %s, skipping it', stream.tap_stream_id) return state if sync_method == 'full': LOGGER.info("Stream %s is using full_table replication", stream.tap_stream_id) state = singer.set_currently_syncing(state, stream.tap_stream_id) common.send_schema_message(stream, []) if md_map.get((), {}).get('is-view'): state = full_table.sync_view(conn_config, stream, state, desired_columns) else: state = full_table.sync_table(conn_config, stream, state, desired_columns) elif sync_method == 'log_initial': #start off with full-table replication state = singer.set_currently_syncing(state, stream.tap_stream_id) LOGGER.info( "stream %s is using log_miner. will use full table for first run", stream.tap_stream_id) state = singer.write_bookmark(state, stream.tap_stream_id, 'scn', end_scn) common.send_schema_message(stream, []) state = full_table.sync_table(conn_config, stream, state, desired_columns) elif sync_method == 'log_initial_interrupted': LOGGER.info( "Initial stage of full table sync was interrupted. resuming...") state = singer.set_currently_syncing(state, stream.tap_stream_id) common.send_schema_message(stream, []) state = full_table.sync_table(conn_config, stream, state, desired_columns) elif sync_method == 'incremental': state = singer.set_currently_syncing(state, stream.tap_stream_id) state = do_sync_incremental(conn_config, stream, state, desired_columns) else: raise Exception("unknown sync method {} for stream {}".format( sync_method, stream.tap_stream_id)) state = singer.set_currently_syncing(state, None) singer.write_message(singer.StateMessage(value=copy.deepcopy(state))) return state
def sync_method_for_streams(streams, state, default_replication_method): lookup = {} traditional_streams = [] logical_streams = [] for stream in streams: stream_metadata = metadata.to_map(stream.metadata) replication_method = stream_metadata.get((), {}).get('replication-method', default_replication_method) replication_key = stream_metadata.get((), {}).get('replication-key') state = clear_state_on_replication_change(state, stream.tap_stream_id, replication_key, replication_method) if replication_method not in set(['LOG_BASED', 'FULL_TABLE', 'INCREMENTAL']): raise Exception("Unrecognized replication_method {}".format(replication_method)) if replication_method == 'LOG_BASED' and stream_metadata.get((), {}).get('is-view'): raise Exception('LogMiner is NOT supported for views. Please change the replication method for {}'.format(stream.tap_stream_id)) md_map = metadata.to_map(stream.metadata) desired_columns = [c for c in stream.schema.properties.keys() if common.should_sync_column(md_map, c)] desired_columns.sort() if len(desired_columns) == 0: LOGGER.warning('There are no columns selected for stream %s, skipping it', stream.tap_stream_id) continue if replication_method == 'FULL_TABLE': lookup[stream.tap_stream_id] = 'full' traditional_streams.append(stream) elif replication_method == 'LOG_BASED': if not get_bookmark(state, stream.tap_stream_id, 'scn'): #initial full-table phase of LogMiner lookup[stream.tap_stream_id] = 'log_initial' traditional_streams.append(stream) elif get_bookmark(state, stream.tap_stream_id, 'ORA_ROWSCN') and get_bookmark(state, stream.tap_stream_id, 'scn'): #finishing previously interrupted full-table (first stage of logical replication) lookup[stream.tap_stream_id] = 'log_initial_interrupted' traditional_streams.append(stream) #inconsistent state elif get_bookmark(state, stream.tap_stream_id, 'ORA_ROWSCN') and not get_bookmark(state, stream.tap_stream_id, 'scn'): raise Exception("ORA_ROWSCN found(%s) in state implying log inintial full-table replication but no scn is present") else: #initial stage of LogMiner(full-table) has been completed. moving onto pure LogMiner lookup[stream.tap_stream_id] = 'pure_log' logical_streams.append(stream) else: # Incremental replication lookup[stream.tap_stream_id] = 'incremental' traditional_streams.append(stream) return lookup, traditional_streams, logical_streams
def sync_tables_logminer(cur, streams, state, start_scn, end_scn): time_extracted = utils.now() start_logmnr_sql = """BEGIN DBMS_LOGMNR.START_LOGMNR( startScn => {}, endScn => {}, OPTIONS => DBMS_LOGMNR.DICT_FROM_ONLINE_CATALOG + DBMS_LOGMNR.COMMITTED_DATA_ONLY + DBMS_LOGMNR.CONTINUOUS_MINE); END;""".format(start_scn, end_scn) LOGGER.info("Starting LogMiner for %s: %s -> %s", list(map(lambda s: s.tap_stream_id, streams)), start_scn, end_scn) LOGGER.info("%s",start_logmnr_sql) cur.execute(start_logmnr_sql) #mine changes for stream in streams: md_map = metadata.to_map(stream.metadata) desired_columns = [c for c in stream.schema.properties.keys() if common.should_sync_column(md_map, c)] redo_value_sql_clause = ",\n ".join(["""DBMS_LOGMNR.MINE_VALUE(REDO_VALUE, :{})""".format(idx+1) for idx,c in enumerate(desired_columns)]) undo_value_sql_clause = ",\n ".join(["""DBMS_LOGMNR.MINE_VALUE(UNDO_VALUE, :{})""".format(idx+1) for idx,c in enumerate(desired_columns)]) schema_name = md_map.get(()).get('schema-name') stream_version = get_stream_version(stream.tap_stream_id, state) mine_sql = """ SELECT OPERATION, SQL_REDO, SCN, CSCN, COMMIT_TIMESTAMP, {}, {} from v$logmnr_contents where table_name = :table_name AND seg_owner = :seg_owner AND operation in ('INSERT', 'UPDATE', 'DELETE') """.format(redo_value_sql_clause, undo_value_sql_clause) binds = [orc_db.fully_qualified_column_name(schema_name, stream.table, c) for c in desired_columns] + \ [orc_db.fully_qualified_column_name(schema_name, stream.table, c) for c in desired_columns] + \ [stream.table] + [schema_name] rows_saved = 0 columns_for_record = desired_columns + ['scn', '_sdc_deleted_at'] with metrics.record_counter(None) as counter: LOGGER.info("Examing log for table %s", stream.tap_stream_id) common.send_schema_message(stream, ['lsn']) LOGGER.info("mine_sql=%s", mine_sql) for op, redo, scn, cscn, commit_ts, *col_vals in cur.execute(mine_sql, binds): redo_vals = col_vals[0:len(desired_columns)] undo_vals = col_vals[len(desired_columns):] if op == 'INSERT' or op == 'UPDATE': redo_vals += [cscn, None] record_message = row_to_singer_message(stream, redo_vals, stream_version, columns_for_record, time_extracted) elif op == 'DELETE': undo_vals += [cscn, singer.utils.strftime(commit_ts.replace(tzinfo=pytz.UTC))] record_message = row_to_singer_message(stream, undo_vals, stream_version, columns_for_record, time_extracted) else: raise Exception("unrecognized logminer operation: {}".format(op)) singer.write_message(record_message) rows_saved = rows_saved + 1 counter.increment() state = singer.write_bookmark(state, stream.tap_stream_id, 'scn', int(cscn)) if rows_saved % UPDATE_BOOKMARK_PERIOD == 0: singer.write_message(singer.StateMessage(value=copy.deepcopy(state))) for s in streams: LOGGER.info("updating bookmark for stream %s to end_lsn %s", s.tap_stream_id, end_scn) state = singer.write_bookmark(state, s.tap_stream_id, 'scn', end_scn) singer.write_message(singer.StateMessage(value=copy.deepcopy(state))) return state
def sync_tables(conn_config, streams, state, end_scn): connection = orc_db.open_connection(conn_config) if not verify_db_supplemental_log_level(connection): for stream in streams: if not verify_table_supplemental_log_level(stream, connection): raise Exception(""" Unable to replicate with logminer for stream({}) because supplmental_log_data is not set to 'ALL' for either the table or the database. Please run: ALTER DATABASE ADD SUPPLEMENTAL LOG DATA (ALL) COLUMNS; """.format(stream.tap_stream_id)) cur = connection.cursor() cur.execute("ALTER SESSION SET TIME_ZONE = '00:00'") cur.execute("""ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD"T"HH24:MI:SS."00+00:00"'""") cur.execute("""ALTER SESSION SET NLS_TIMESTAMP_FORMAT='YYYY-MM-DD"T"HH24:MI:SSXFF"+00:00"'""") cur.execute("""ALTER SESSION SET NLS_TIMESTAMP_TZ_FORMAT = 'YYYY-MM-DD"T"HH24:MI:SS.FFTZH:TZM'""") start_scn = min([get_bookmark(state, s.tap_stream_id, 'scn') for s in streams]) time_extracted = utils.now() start_logmnr_sql = """BEGIN DBMS_LOGMNR.START_LOGMNR( startScn => {}, endScn => {}, OPTIONS => DBMS_LOGMNR.DICT_FROM_ONLINE_CATALOG + DBMS_LOGMNR.COMMITTED_DATA_ONLY + DBMS_LOGMNR.CONTINUOUS_MINE); END;""".format(start_scn, end_scn) LOGGER.info("Starting LogMiner for %s: %s -> %s", list(map(lambda s: s.tap_stream_id, streams)), start_scn, end_scn) LOGGER.info("%s",start_logmnr_sql) cur.execute(start_logmnr_sql) #mine changes for stream in streams: md_map = metadata.to_map(stream.metadata) desired_columns = [c for c in stream.schema.properties.keys() if common.should_sync_column(md_map, c)] redo_value_sql_clause = ",\n ".join(["""DBMS_LOGMNR.MINE_VALUE(REDO_VALUE, :{})""".format(idx+1) for idx,c in enumerate(desired_columns)]) undo_value_sql_clause = ",\n ".join(["""DBMS_LOGMNR.MINE_VALUE(UNDO_VALUE, :{})""".format(idx+1) for idx,c in enumerate(desired_columns)]) schema_name = md_map.get(()).get('schema-name') stream_version = get_stream_version(stream.tap_stream_id, state) mine_sql = """ SELECT OPERATION, SQL_REDO, SCN, CSCN, COMMIT_TIMESTAMP, {}, {} from v$logmnr_contents where table_name = :table_name AND operation in ('INSERT', 'UPDATE', 'DELETE') """.format(redo_value_sql_clause, undo_value_sql_clause) binds = [orc_db.fully_qualified_column_name(schema_name, stream.table, c) for c in desired_columns] + \ [orc_db.fully_qualified_column_name(schema_name, stream.table, c) for c in desired_columns] + \ [stream.table] rows_saved = 0 columns_for_record = desired_columns + ['scn', '_sdc_deleted_at'] with metrics.record_counter(None) as counter: LOGGER.info("Examing log for table %s", stream.tap_stream_id) common.send_schema_message(stream, ['lsn']) for op, redo, scn, cscn, commit_ts, *col_vals in cur.execute(mine_sql, binds): redo_vals = col_vals[0:len(desired_columns)] undo_vals = col_vals[len(desired_columns):] if op == 'INSERT' or op == 'UPDATE': redo_vals += [cscn, None] record_message = row_to_singer_message(stream, redo_vals, stream_version, columns_for_record, time_extracted) elif op == 'DELETE': undo_vals += [cscn, singer.utils.strftime(commit_ts.replace(tzinfo=pytz.UTC))] record_message = row_to_singer_message(stream, undo_vals, stream_version, columns_for_record, time_extracted) else: raise Exception("unrecognized logminer operation: {}".format(op)) singer.write_message(record_message) rows_saved = rows_saved + 1 counter.increment() state = singer.write_bookmark(state, stream.tap_stream_id, 'scn', int(cscn)) if rows_saved % UPDATE_BOOKMARK_PERIOD == 0: singer.write_message(singer.StateMessage(value=copy.deepcopy(state))) for s in streams: LOGGER.info("updating bookmark for stream %s to end_lsn %s", s.tap_stream_id, end_scn) state = singer.write_bookmark(state, s.tap_stream_id, 'scn', end_scn) singer.write_message(singer.StateMessage(value=copy.deepcopy(state))) cur.close() connection.close() return state