Exemplo n.º 1
0
def do_sync_incremental(mysql_conn,
                        catalog_entry,
                        state,
                        columns,
                        optional_limit=None):
    LOGGER.info("Stream %s is using incremental replication",
                catalog_entry.stream)

    md_map = metadata.to_map(catalog_entry.metadata)
    replication_key = md_map.get((), {}).get('replication-key')

    if not replication_key:
        raise Exception(
            "Cannot use INCREMENTAL replication for table ({}) without a replication key."
            .format(catalog_entry.stream))

    write_schema_message(catalog_entry=catalog_entry,
                         bookmark_properties=[replication_key])

    if optional_limit:
        LOGGER.info(
            "Incremental Stream %s is using an optional limit clause of %d",
            catalog_entry.stream, int(optional_limit))
        incremental.sync_table(mysql_conn, catalog_entry, state, columns,
                               int(optional_limit))
    else:
        incremental.sync_table(mysql_conn, catalog_entry, state, columns)

    singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
Exemplo n.º 2
0
def do_sync_incremental(mysql_conn, catalog_entry, state, columns):
    LOGGER.info("Stream %s is using incremental replication", catalog_entry.stream)

    md_map = metadata.to_map(catalog_entry.metadata)
    replication_key = md_map.get((), {}).get('replication-key')

    if not replication_key:
        raise Exception(
            f"Cannot use INCREMENTAL replication for table ({catalog_entry.stream}) without a replication key.")

    write_schema_message(catalog_entry=catalog_entry,
                         bookmark_properties=[replication_key])

    incremental.sync_table(mysql_conn, catalog_entry, state, columns)

    singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
Exemplo n.º 3
0
def generate_messages(con, catalog, state):
    catalog = resolve_catalog(con, catalog, state)

    for catalog_entry in catalog.streams:
        state = singer.set_currently_syncing(state, catalog_entry.tap_stream_id)

        # Emit a state message to indicate that we've started this stream
        yield singer.StateMessage(value=copy.deepcopy(state))

        md_map = metadata.to_map(catalog_entry.metadata)

        replication_method = md_map.get((), {}).get('replication-method')
        replication_key = singer.get_bookmark(state,
                                              catalog_entry.tap_stream_id,
                                              'replication_key')

        if catalog_entry.is_view:
            key_properties = md_map.get((), {}).get('view-key-properties')
        else:
            key_properties = md_map.get((), {}).get('table-key-properties')

        # Emit a SCHEMA message before we sync any records
        yield singer.SchemaMessage(
            stream=catalog_entry.stream,
            schema=catalog_entry.schema.to_dict(),
            key_properties=key_properties,
            bookmark_properties=replication_key
        )

        with metrics.job_timer('sync_table') as timer:
            timer.tags['database'] = catalog_entry.database
            timer.tags['table'] = catalog_entry.table

            log_engine(con, catalog_entry)

            if replication_method == 'INCREMENTAL':
                for message in incremental.sync_table(con, catalog_entry, state):
                    yield message
            elif replication_method == 'FULL_TABLE':
                for message in full_table.sync_table(con, catalog_entry, state):
                    yield message
            else:
                raise Exception("only INCREMENTAL and FULL TABLE replication methods are supported")

    # if we get here, we've finished processing all the streams, so clear
    # currently_syncing from the state and emit a state message.
    state = singer.set_currently_syncing(state, None)
    yield singer.StateMessage(value=copy.deepcopy(state))
Exemplo n.º 4
0
def generate_messages(con, config, catalog, state):
    catalog = resolve_catalog(con, catalog, state)

    for catalog_entry in catalog.streams:
        columns = list(catalog_entry.schema.properties.keys())

        if not columns:
            LOGGER.warning(
                'There are no columns selected for stream %s, skipping it.',
                catalog_entry.stream)
            continue

        state = singer.set_currently_syncing(state,
                                             catalog_entry.tap_stream_id)

        # Emit a state message to indicate that we've started this stream
        yield singer.StateMessage(value=copy.deepcopy(state))

        md_map = metadata.to_map(catalog_entry.metadata)

        replication_method = md_map.get((), {}).get('replication-method')
        replication_key = md_map.get((), {}).get('replication-key')

        if catalog_entry.is_view:
            key_properties = md_map.get((), {}).get('view-key-properties')
        else:
            key_properties = md_map.get((), {}).get('table-key-properties')

        with metrics.job_timer('sync_table') as timer:
            timer.tags['database'] = catalog_entry.database
            timer.tags['table'] = catalog_entry.table

            log_engine(con, catalog_entry)

            if replication_method == 'INCREMENTAL':
                LOGGER.info("Stream %s is using incremental replication",
                            catalog_entry.stream)

                yield generate_schema_message(catalog_entry, key_properties,
                                              [replication_key])

                for message in incremental.sync_table(con, catalog_entry,
                                                      state, columns):
                    yield message
            elif replication_method == 'LOG_BASED':
                if catalog_entry.is_view:
                    raise Exception(
                        "Unable to replicate stream({}) with binlog because it is a view."
                        .format(catalog_entry.stream))

                LOGGER.info("Stream %s is using binlog replication",
                            catalog_entry.stream)

                log_file = singer.get_bookmark(state,
                                               catalog_entry.tap_stream_id,
                                               'log_file')

                log_pos = singer.get_bookmark(state,
                                              catalog_entry.tap_stream_id,
                                              'log_pos')

                yield generate_schema_message(catalog_entry, key_properties,
                                              [])

                if log_file and log_pos:
                    columns = binlog.add_automatic_properties(
                        catalog_entry, columns)

                    for message in binlog.sync_table(con, config,
                                                     catalog_entry, state,
                                                     columns):
                        yield message
                else:
                    LOGGER.info("Performing initial full table sync")

                    log_file, log_pos = binlog.fetch_current_log_file_and_pos(
                        con)

                    stream_version = common.get_stream_version(
                        catalog_entry.tap_stream_id, state)

                    state = singer.write_bookmark(state,
                                                  catalog_entry.tap_stream_id,
                                                  'version', stream_version)

                    for message in full_table.sync_table(
                            con, catalog_entry, state, columns,
                            stream_version):
                        yield message

                    state = singer.write_bookmark(state,
                                                  catalog_entry.tap_stream_id,
                                                  'log_file', log_file)

                    state = singer.write_bookmark(state,
                                                  catalog_entry.tap_stream_id,
                                                  'log_pos', log_pos)

                    yield singer.StateMessage(value=copy.deepcopy(state))
            elif replication_method == 'FULL_TABLE':
                LOGGER.info("Stream %s is using full table replication",
                            catalog_entry.stream)

                yield generate_schema_message(catalog_entry, key_properties,
                                              [])

                stream_version = common.get_stream_version(
                    catalog_entry.tap_stream_id, state)

                for message in full_table.sync_table(con, catalog_entry, state,
                                                     columns, stream_version):
                    yield message

                # Prefer initial_full_table_complete going forward
                singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                                      'version')

                state = singer.write_bookmark(state,
                                              catalog_entry.tap_stream_id,
                                              'initial_full_table_complete',
                                              True)

                yield singer.StateMessage(value=copy.deepcopy(state))
            else:
                raise Exception(
                    "only INCREMENTAL, LOG_BASED, and FULL TABLE replication methods are supported"
                )

    # if we get here, we've finished processing all the streams, so clear
    # currently_syncing from the state and emit a state message.
    state = singer.set_currently_syncing(state, None)
    yield singer.StateMessage(value=copy.deepcopy(state))