Exemplo n.º 1
0
def do_sync_logical_replication(conn_config, stream, state, desired_columns,
                                md_map):
    if get_bookmark(state, stream.tap_stream_id, 'lsn'):
        LOGGER.info("Stream %s is using logical replication. end lsn %s",
                    stream.tap_stream_id,
                    logical_replication.fetch_current_lsn(conn_config))
        logical_replication.add_automatic_properties(stream)
        send_schema_message(stream, ['lsn'])
        state = logical_replication.sync_table(conn_config, stream, state,
                                               desired_columns, md_map)
    else:
        #start off with full-table replication
        end_lsn = logical_replication.fetch_current_lsn(conn_config)
        LOGGER.info(
            "Stream %s is using logical replication. performing initial full table sync",
            stream.tap_stream_id)
        send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns, md_map)
        state = singer.write_bookmark(state, stream.tap_stream_id, 'xmin',
                                      None)
        #once we are done with full table, write the lsn to the state
        state = singer.write_bookmark(state, stream.tap_stream_id, 'lsn',
                                      end_lsn)

    return state
Exemplo n.º 2
0
def do_sync_logical_replication(conn_config, stream, state, desired_columns,
                                md_map):
    LOGGER.info("Stream %s is using logical replication", stream.tap_stream_id)

    if get_bookmark(state, stream.tap_stream_id, 'xmin') and get_bookmark(
            state, stream.tap_stream_id, 'lsn'):
        #finishing previously interrupted full-table (first stage of logical replication)
        LOGGER.info(
            "Initial stage of full table sync was interrupted. resuming...")
        send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns, md_map)
        state = singer.write_bookmark(state, stream.tap_stream_id, 'xmin',
                                      None)
        state = singer.write_bookmark(state, stream.tap_stream_id,
                                      'initial_logical_replication_complete',
                                      False)

    #inconsistent state
    elif get_bookmark(
            state, stream.tap_stream_id,
            'xmin') and not get_bookmark(state, stream.tap_stream_id, 'lsn'):
        raise Exception(
            "Xmin found(%s) in state implying full-table replication but no lsn is present"
        )

    elif not get_bookmark(state,
                          stream.tap_stream_id, 'xmin') and not get_bookmark(
                              state, stream.tap_stream_id, 'lsn'):
        #initial full-table phase of logical replication
        end_lsn = logical_replication.fetch_current_lsn(conn_config)
        LOGGER.info("Performing initial full table sync")
        state = singer.write_bookmark(state, stream.tap_stream_id, 'lsn',
                                      end_lsn)

        send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns, md_map)
        state = singer.write_bookmark(state, stream.tap_stream_id, 'xmin',
                                      None)
        state = singer.write_bookmark(state, stream.tap_stream_id,
                                      'initial_logical_replication_complete',
                                      False)

    elif not get_bookmark(state,
                          stream.tap_stream_id, 'xmin') and get_bookmark(
                              state, stream.tap_stream_id, 'lsn'):
        #initial stage of logical replication(full-table) has been completed. moving onto pure logical replication
        LOGGER.info("Pure Logical Replication upto lsn %s",
                    logical_replication.fetch_current_lsn(conn_config))
        logical_replication.add_automatic_properties(stream)
        send_schema_message(stream, ['lsn'])
        state = logical_replication.sync_table(conn_config, stream, state,
                                               desired_columns, md_map)

    return state
Exemplo n.º 3
0
def do_sync(conn_config, catalog, default_replication_method, state):
    currently_syncing = singer.get_currently_syncing(state)
    streams = list(filter(is_selected_via_metadata, catalog['streams']))
    streams.sort(key=lambda s: s['tap_stream_id'])
    LOGGER.info("Selected streams: %s ", list(map(lambda s: s['tap_stream_id'], streams)))
    if any_logical_streams(streams, default_replication_method):
        LOGGER.info("Use of logical replication requires fetching an lsn...")
        end_lsn = logical_replication.fetch_current_lsn(conn_config)
        LOGGER.info("End LSN: %s ", end_lsn)
    else:
        end_lsn = None

    sync_method_lookup, traditional_streams, logical_streams = sync_method_for_streams(streams, state, default_replication_method)

    if currently_syncing:
        LOGGER.info("found currently_syncing: %s", currently_syncing)
        currently_syncing_stream = list(filter(lambda s: s['tap_stream_id'] == currently_syncing, traditional_streams))
        if currently_syncing_stream is None:
            LOGGER.warning("unable to locate currently_syncing(%s) amongst selected traditional streams(%s). will ignore", currently_syncing, list(map(lambda s: s['tap_stream_id'], traditional_streams)))
        other_streams = list(filter(lambda s: s['tap_stream_id'] != currently_syncing, traditional_streams))
        traditional_streams = currently_syncing_stream + other_streams
    else:
        LOGGER.info("No currently_syncing found")

    for stream in traditional_streams:
        state = sync_traditional_stream(conn_config, stream, state, sync_method_lookup[stream['tap_stream_id']], end_lsn)

    logical_streams.sort(key=lambda s: metadata.to_map(s['metadata']).get(()).get('database-name'))
    for dbname, streams in itertools.groupby(logical_streams, lambda s: metadata.to_map(s['metadata']).get(()).get('database-name')):
        conn_config['dbname'] = dbname
        state = sync_logical_streams(conn_config, list(streams), state, end_lsn)
    return state
def do_sync(conn_config,
            catalog,
            default_replication_method,
            state,
            state_file=None):
    """
    Orchestrates sync of all streams
    """
    currently_syncing = singer.get_currently_syncing(state)
    streams = list(filter(is_selected_via_metadata, catalog['streams']))
    streams.sort(key=lambda s: s['tap_stream_id'])
    LOGGER.info("Selected streams: %s ", [s['tap_stream_id'] for s in streams])
    if any_logical_streams(streams, default_replication_method):
        # Use of logical replication requires fetching an lsn
        end_lsn = logical_replication.fetch_current_lsn(conn_config)
        LOGGER.debug("end_lsn = %s ", end_lsn)
    else:
        end_lsn = None

    refresh_streams_schema(conn_config, streams)

    sync_method_lookup, traditional_streams, logical_streams = \
        sync_method_for_streams(streams, state, default_replication_method)

    if currently_syncing:
        LOGGER.debug("Found currently_syncing: %s", currently_syncing)

        currently_syncing_stream = list(
            filter(lambda s: s['tap_stream_id'] == currently_syncing,
                   traditional_streams))

        if not currently_syncing_stream:
            LOGGER.warning(
                "unable to locate currently_syncing(%s) amongst selected traditional streams(%s). "
                "Will ignore", currently_syncing,
                {s['tap_stream_id']
                 for s in traditional_streams})

        other_streams = list(
            filter(lambda s: s['tap_stream_id'] != currently_syncing,
                   traditional_streams))
        traditional_streams = currently_syncing_stream + other_streams
    else:
        LOGGER.info("No streams marked as currently_syncing in state file")

    for stream in traditional_streams:
        state = sync_traditional_stream(
            conn_config, stream, state,
            sync_method_lookup[stream['tap_stream_id']], end_lsn)

    logical_streams.sort(key=lambda s: metadata.to_map(s['metadata']).get(
        ()).get('database-name'))
    for dbname, streams in itertools.groupby(
            logical_streams, lambda s: metadata.to_map(s['metadata']).get(
                ()).get('database-name')):
        conn_config['dbname'] = dbname
        state = sync_logical_streams(conn_config, list(streams), state,
                                     end_lsn, state_file)
    return state
Exemplo n.º 5
0
def sync_logical_streams(conn_config, logical_streams, state):
    if logical_streams:
        LOGGER.info("Pure Logical Replication upto lsn %s for (%s)", logical_replication.fetch_current_lsn(conn_config), list(map(lambda s: s['tap_stream_id'], logical_streams)))
        logical_streams = list(map(logical_replication.add_automatic_properties, logical_streams))

        state = logical_replication.sync_tables(conn_config, logical_streams, state)

    return state
Exemplo n.º 6
0
def sync_traditional_stream(conn_config, stream, state, sync_method):
    LOGGER.info("Beginning sync of stream(%s) with sync method(%s)",
                stream['tap_stream_id'], sync_method)
    md_map = metadata.to_map(stream['metadata'])
    conn_config['dbname'] = md_map.get(()).get('database-name')
    desired_columns = [
        c for c in stream['schema']['properties'].keys()
        if should_sync_column(md_map, c)
    ]
    desired_columns.sort()

    if len(desired_columns) == 0:
        LOGGER.warning(
            'There are no columns selected for stream %s, skipping it',
            stream['tap_stream_id'])
        return state

    register_type_adapters(conn_config)

    if sync_method == 'full':
        state = singer.set_currently_syncing(state, stream['tap_stream_id'])
        state = do_sync_full_table(conn_config, stream, state, desired_columns,
                                   md_map)
    elif sync_method == 'incremental':
        state = singer.set_currently_syncing(state, stream['tap_stream_id'])
        state = do_sync_incremental(conn_config, stream, state,
                                    desired_columns, md_map)
    elif sync_method == 'logical_initial':
        state = singer.set_currently_syncing(state, stream['tap_stream_id'])
        end_lsn = logical_replication.fetch_current_lsn(conn_config)
        LOGGER.info("Performing initial full table sync")
        state = singer.write_bookmark(state, stream['tap_stream_id'], 'lsn',
                                      end_lsn)

        sync_common.send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns, md_map)
        state = singer.write_bookmark(state, stream['tap_stream_id'], 'xmin',
                                      None)
    elif sync_method == 'logical_initial_interrupted':
        state = singer.set_currently_syncing(state, stream['tap_stream_id'])
        LOGGER.info(
            "Initial stage of full table sync was interrupted. resuming...")
        sync_common.send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns, md_map)
    else:
        raise Exception("unknown sync method {} for stream {}".format(
            sync_method, stream['tap_stream_id']))

    state = singer.set_currently_syncing(state, None)
    singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
    return state