示例#1
0
def sync_traditional_stream(conn_config, stream, state, sync_method, end_scn):
    LOGGER.info("Beginning sync of stream(%s) with sync method(%s)",
                stream.tap_stream_id, sync_method)
    md_map = metadata.to_map(stream.metadata)
    desired_columns = [
        c for c in stream.schema.properties.keys()
        if common.should_sync_column(md_map, c)
    ]
    desired_columns.sort()
    if len(desired_columns) == 0:
        LOGGER.warning(
            'There are no columns selected for stream %s, skipping it',
            stream.tap_stream_id)
        return state

    if sync_method == 'full':
        LOGGER.info("Stream %s is using full_table replication",
                    stream.tap_stream_id)
        state = singer.set_currently_syncing(state, stream.tap_stream_id)
        common.send_schema_message(stream, [])
        if md_map.get((), {}).get('is-view'):
            state = full_table.sync_view(conn_config, stream, state,
                                         desired_columns)
        else:
            state = full_table.sync_table(conn_config, stream, state,
                                          desired_columns)
    elif sync_method == 'log_initial':
        #start off with full-table replication
        state = singer.set_currently_syncing(state, stream.tap_stream_id)
        LOGGER.info(
            "stream %s is using log_miner. will use full table for first run",
            stream.tap_stream_id)

        state = singer.write_bookmark(state, stream.tap_stream_id, 'scn',
                                      end_scn)

        common.send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns)
    elif sync_method == 'log_initial_interrupted':
        LOGGER.info(
            "Initial stage of full table sync was interrupted. resuming...")
        state = singer.set_currently_syncing(state, stream.tap_stream_id)
        common.send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns)
    elif sync_method == 'incremental':
        state = singer.set_currently_syncing(state, stream.tap_stream_id)
        state = do_sync_incremental(conn_config, stream, state,
                                    desired_columns)

    else:
        raise Exception("unknown sync method {} for stream {}".format(
            sync_method, stream.tap_stream_id))

    state = singer.set_currently_syncing(state, None)
    singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
    return state
示例#2
0
def sync_method_for_streams(streams, state, default_replication_method):
   lookup = {}
   traditional_streams = []
   logical_streams = []

   for stream in streams:
      stream_metadata = metadata.to_map(stream.metadata)
      replication_method = stream_metadata.get((), {}).get('replication-method', default_replication_method)
      replication_key = stream_metadata.get((), {}).get('replication-key')

      state = clear_state_on_replication_change(state, stream.tap_stream_id, replication_key, replication_method)

      if replication_method not in set(['LOG_BASED', 'FULL_TABLE', 'INCREMENTAL']):
         raise Exception("Unrecognized replication_method {}".format(replication_method))

      if replication_method == 'LOG_BASED' and stream_metadata.get((), {}).get('is-view'):
         raise Exception('LogMiner is NOT supported for views. Please change the replication method for {}'.format(stream.tap_stream_id))

      md_map = metadata.to_map(stream.metadata)
      desired_columns = [c for c in stream.schema.properties.keys() if common.should_sync_column(md_map, c)]
      desired_columns.sort()

      if len(desired_columns) == 0:
         LOGGER.warning('There are no columns selected for stream %s, skipping it', stream.tap_stream_id)
         continue

      if replication_method == 'FULL_TABLE':
         lookup[stream.tap_stream_id] = 'full'
         traditional_streams.append(stream)
      elif replication_method == 'LOG_BASED':
         if not get_bookmark(state, stream.tap_stream_id, 'scn'):
            #initial full-table phase of LogMiner
            lookup[stream.tap_stream_id] = 'log_initial'
            traditional_streams.append(stream)

         elif get_bookmark(state, stream.tap_stream_id, 'ORA_ROWSCN') and get_bookmark(state, stream.tap_stream_id, 'scn'):
            #finishing previously interrupted full-table (first stage of logical replication)
            lookup[stream.tap_stream_id] = 'log_initial_interrupted'
            traditional_streams.append(stream)

         #inconsistent state
         elif get_bookmark(state, stream.tap_stream_id, 'ORA_ROWSCN') and not get_bookmark(state, stream.tap_stream_id, 'scn'):
            raise Exception("ORA_ROWSCN found(%s) in state implying log inintial full-table replication but no scn is present")

         else:
            #initial stage of LogMiner(full-table) has been completed. moving onto pure LogMiner
            lookup[stream.tap_stream_id] = 'pure_log'
            logical_streams.append(stream)
      else:
         # Incremental replication
         lookup[stream.tap_stream_id] = 'incremental'
         traditional_streams.append(stream)

   return lookup, traditional_streams, logical_streams
示例#3
0
def sync_tables_logminer(cur, streams, state, start_scn, end_scn):

   time_extracted = utils.now()

   start_logmnr_sql = """BEGIN
                         DBMS_LOGMNR.START_LOGMNR(
                                 startScn => {},
                                 endScn => {},
                                 OPTIONS => DBMS_LOGMNR.DICT_FROM_ONLINE_CATALOG +
                                            DBMS_LOGMNR.COMMITTED_DATA_ONLY +
                                            DBMS_LOGMNR.CONTINUOUS_MINE);
                         END;""".format(start_scn, end_scn)

   LOGGER.info("Starting LogMiner for %s: %s -> %s", list(map(lambda s: s.tap_stream_id, streams)), start_scn, end_scn)
   LOGGER.info("%s",start_logmnr_sql)
   cur.execute(start_logmnr_sql)

   #mine changes
   for stream in streams:
      md_map = metadata.to_map(stream.metadata)
      desired_columns = [c for c in stream.schema.properties.keys() if common.should_sync_column(md_map, c)]
      redo_value_sql_clause = ",\n ".join(["""DBMS_LOGMNR.MINE_VALUE(REDO_VALUE, :{})""".format(idx+1)
                                           for idx,c in enumerate(desired_columns)])
      undo_value_sql_clause = ",\n ".join(["""DBMS_LOGMNR.MINE_VALUE(UNDO_VALUE, :{})""".format(idx+1)
                                           for idx,c in enumerate(desired_columns)])

      schema_name = md_map.get(()).get('schema-name')
      stream_version = get_stream_version(stream.tap_stream_id, state)
      mine_sql = """
      SELECT OPERATION, SQL_REDO, SCN, CSCN, COMMIT_TIMESTAMP,  {}, {} from v$logmnr_contents where table_name = :table_name AND seg_owner = :seg_owner AND operation in ('INSERT', 'UPDATE', 'DELETE')
      """.format(redo_value_sql_clause, undo_value_sql_clause)
      binds = [orc_db.fully_qualified_column_name(schema_name, stream.table, c) for c in desired_columns] + \
              [orc_db.fully_qualified_column_name(schema_name, stream.table, c) for c in desired_columns] + \
              [stream.table] + [schema_name]


      rows_saved = 0
      columns_for_record = desired_columns + ['scn', '_sdc_deleted_at']
      with metrics.record_counter(None) as counter:
         LOGGER.info("Examing log for table %s", stream.tap_stream_id)
         common.send_schema_message(stream, ['lsn'])
         LOGGER.info("mine_sql=%s", mine_sql)
         for op, redo, scn, cscn, commit_ts, *col_vals in cur.execute(mine_sql, binds):
            redo_vals = col_vals[0:len(desired_columns)]
            undo_vals = col_vals[len(desired_columns):]
            if op == 'INSERT' or op == 'UPDATE':
               redo_vals += [cscn, None]
               record_message = row_to_singer_message(stream, redo_vals, stream_version, columns_for_record, time_extracted)
            elif op == 'DELETE':
               undo_vals += [cscn, singer.utils.strftime(commit_ts.replace(tzinfo=pytz.UTC))]
               record_message = row_to_singer_message(stream, undo_vals, stream_version, columns_for_record, time_extracted)
            else:
               raise Exception("unrecognized logminer operation: {}".format(op))

            singer.write_message(record_message)
            rows_saved = rows_saved + 1
            counter.increment()
            state = singer.write_bookmark(state,
                                          stream.tap_stream_id,
                                          'scn',
                                          int(cscn))


            if rows_saved % UPDATE_BOOKMARK_PERIOD == 0:
               singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))

   for s in streams:
      LOGGER.info("updating bookmark for stream %s to end_lsn %s", s.tap_stream_id, end_scn)
      state = singer.write_bookmark(state, s.tap_stream_id, 'scn', end_scn)
      singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))

   return state
示例#4
0
def sync_tables(conn_config, streams, state, end_scn):
   connection = orc_db.open_connection(conn_config)
   if not verify_db_supplemental_log_level(connection):
      for stream in streams:
         if not verify_table_supplemental_log_level(stream, connection):
            raise Exception("""
      Unable to replicate with logminer for stream({}) because supplmental_log_data is not set to 'ALL' for either the table or the database.
      Please run: ALTER DATABASE ADD SUPPLEMENTAL LOG DATA (ALL) COLUMNS;
            """.format(stream.tap_stream_id))



   cur = connection.cursor()
   cur.execute("ALTER SESSION SET TIME_ZONE = '00:00'")
   cur.execute("""ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD"T"HH24:MI:SS."00+00:00"'""")
   cur.execute("""ALTER SESSION SET NLS_TIMESTAMP_FORMAT='YYYY-MM-DD"T"HH24:MI:SSXFF"+00:00"'""")
   cur.execute("""ALTER SESSION SET NLS_TIMESTAMP_TZ_FORMAT  = 'YYYY-MM-DD"T"HH24:MI:SS.FFTZH:TZM'""")

   start_scn = min([get_bookmark(state, s.tap_stream_id, 'scn') for s in streams])
   time_extracted = utils.now()

   start_logmnr_sql = """BEGIN
                         DBMS_LOGMNR.START_LOGMNR(
                                 startScn => {},
                                 endScn => {},
                                 OPTIONS => DBMS_LOGMNR.DICT_FROM_ONLINE_CATALOG +
                                            DBMS_LOGMNR.COMMITTED_DATA_ONLY +
                                            DBMS_LOGMNR.CONTINUOUS_MINE);
                         END;""".format(start_scn, end_scn)

   LOGGER.info("Starting LogMiner for %s: %s -> %s", list(map(lambda s: s.tap_stream_id, streams)), start_scn, end_scn)
   LOGGER.info("%s",start_logmnr_sql)
   cur.execute(start_logmnr_sql)

   #mine changes
   for stream in streams:
      md_map = metadata.to_map(stream.metadata)
      desired_columns = [c for c in stream.schema.properties.keys() if common.should_sync_column(md_map, c)]
      redo_value_sql_clause = ",\n ".join(["""DBMS_LOGMNR.MINE_VALUE(REDO_VALUE, :{})""".format(idx+1)
                                           for idx,c in enumerate(desired_columns)])
      undo_value_sql_clause = ",\n ".join(["""DBMS_LOGMNR.MINE_VALUE(UNDO_VALUE, :{})""".format(idx+1)
                                           for idx,c in enumerate(desired_columns)])

      schema_name = md_map.get(()).get('schema-name')
      stream_version = get_stream_version(stream.tap_stream_id, state)
      mine_sql = """
      SELECT OPERATION, SQL_REDO, SCN, CSCN, COMMIT_TIMESTAMP,  {}, {} from v$logmnr_contents where table_name = :table_name AND operation in ('INSERT', 'UPDATE', 'DELETE')
      """.format(redo_value_sql_clause, undo_value_sql_clause)
      binds = [orc_db.fully_qualified_column_name(schema_name, stream.table, c) for c in desired_columns] + \
              [orc_db.fully_qualified_column_name(schema_name, stream.table, c) for c in desired_columns] + \
              [stream.table]


      rows_saved = 0
      columns_for_record = desired_columns + ['scn', '_sdc_deleted_at']
      with metrics.record_counter(None) as counter:
         LOGGER.info("Examing log for table %s", stream.tap_stream_id)
         common.send_schema_message(stream, ['lsn'])
         for op, redo, scn, cscn, commit_ts, *col_vals in cur.execute(mine_sql, binds):
            redo_vals = col_vals[0:len(desired_columns)]
            undo_vals = col_vals[len(desired_columns):]
            if op == 'INSERT' or op == 'UPDATE':
               redo_vals += [cscn, None]
               record_message = row_to_singer_message(stream, redo_vals, stream_version, columns_for_record, time_extracted)
            elif op == 'DELETE':
               undo_vals += [cscn, singer.utils.strftime(commit_ts.replace(tzinfo=pytz.UTC))]
               record_message = row_to_singer_message(stream, undo_vals, stream_version, columns_for_record, time_extracted)
            else:
               raise Exception("unrecognized logminer operation: {}".format(op))

            singer.write_message(record_message)
            rows_saved = rows_saved + 1
            counter.increment()
            state = singer.write_bookmark(state,
                                          stream.tap_stream_id,
                                          'scn',
                                          int(cscn))


            if rows_saved % UPDATE_BOOKMARK_PERIOD == 0:
               singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))

   for s in streams:
      LOGGER.info("updating bookmark for stream %s to end_lsn %s", s.tap_stream_id, end_scn)
      state = singer.write_bookmark(state, s.tap_stream_id, 'scn', end_scn)
      singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))

   cur.close()
   connection.close()
   return state