Exemplo n.º 1
0
def numeric_scale(c):
    if c.numeric_scale is None:
        return MAX_SCALE
    if c.numeric_scale > MAX_SCALE:
        LOGGER.warning("capping decimal scale to 38.  THIS MAY CAUSE TRUNCATION")
        return MAX_SCALE

    return c.numeric_scale
Exemplo n.º 2
0
def numeric_precision(c):
    if c.numeric_precision is None:
        return MAX_PRECISION

    if c.numeric_precision > MAX_PRECISION:
        LOGGER.warning("capping decimal precision to 100.  THIS MAY CAUSE TRUNCATION")
        return MAX_PRECISION

    return c.numeric_precision
Exemplo n.º 3
0
def sync_traditional_stream(conn_config, stream, state, sync_method, end_lsn):
    LOGGER.info("Beginning sync of stream(%s) with sync method(%s)",
                stream["tap_stream_id"], sync_method)
    md_map = metadata.to_map(stream["metadata"])
    conn_config["dbname"] = md_map.get(()).get("database-name")
    desired_columns = [
        c for c in stream["schema"]["properties"].keys()
        if sync_common.should_sync_column(md_map, c)
    ]
    desired_columns.sort()

    if len(desired_columns) == 0:
        LOGGER.warning(
            "There are no columns selected for stream %s, skipping it",
            stream["tap_stream_id"])
        return state

    register_type_adapters(conn_config)

    if sync_method == "full":
        state = singer.set_currently_syncing(state, stream["tap_stream_id"])
        state = do_sync_full_table(conn_config, stream, state, desired_columns,
                                   md_map)
    elif sync_method == "incremental":
        state = singer.set_currently_syncing(state, stream["tap_stream_id"])
        state = do_sync_incremental(conn_config, stream, state,
                                    desired_columns, md_map)
    elif sync_method == "logical_initial":
        state = singer.set_currently_syncing(state, stream["tap_stream_id"])
        LOGGER.info("Performing initial full table sync")
        state = singer.write_bookmark(state, stream["tap_stream_id"], "lsn",
                                      end_lsn)

        sync_common.send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns, md_map)
        state = singer.write_bookmark(state, stream["tap_stream_id"], "xmin",
                                      None)
    elif sync_method == "logical_initial_interrupted":
        state = singer.set_currently_syncing(state, stream["tap_stream_id"])
        LOGGER.info(
            "Initial stage of full table sync was interrupted. resuming...")
        sync_common.send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns, md_map)
    else:
        raise Exception("unknown sync method {} for stream {}".format(
            sync_method, stream["tap_stream_id"]))

    state = singer.set_currently_syncing(state, None)
    singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
    return state
Exemplo n.º 4
0
def do_sync(conn_config, catalog, default_replication_method, state):
    currently_syncing = singer.get_currently_syncing(state)
    streams = list(filter(is_selected_via_metadata, catalog["streams"]))
    streams.sort(key=lambda s: s["tap_stream_id"])
    LOGGER.info("Selected streams: %s ",
                list(map(lambda s: s["tap_stream_id"], streams)))
    if any_logical_streams(streams, default_replication_method):
        LOGGER.info("Use of logical replication requires fetching an lsn...")
        end_lsn = logical_replication.fetch_current_lsn(conn_config)
        LOGGER.info("End LSN: %s ", end_lsn)
    else:
        end_lsn = None

    sync_method_lookup, traditional_streams, logical_streams = sync_method_for_streams(
        streams, state, default_replication_method)

    if currently_syncing:
        LOGGER.info("found currently_syncing: %s", currently_syncing)
        currently_syncing_stream = list(
            filter(lambda s: s["tap_stream_id"] == currently_syncing,
                   traditional_streams))
        if currently_syncing_stream is None:
            LOGGER.warning(
                "Can't find currently_syncing(%s) in selected traditional streams(%s). Will ignore",
                currently_syncing,
                list(map(lambda s: s["tap_stream_id"], traditional_streams)),
            )
        other_streams = list(
            filter(lambda s: s["tap_stream_id"] != currently_syncing,
                   traditional_streams))
        traditional_streams = currently_syncing_stream + other_streams
    else:
        LOGGER.info("No currently_syncing found")

    for stream in traditional_streams:
        state = sync_traditional_stream(
            conn_config, stream, state,
            sync_method_lookup[stream["tap_stream_id"]], end_lsn)

    logical_streams.sort(key=lambda s: metadata.to_map(s["metadata"]).get(
        ()).get("database-name"))
    for dbname, streams in itertools.groupby(
            logical_streams, lambda s: metadata.to_map(s["metadata"]).get(
                ()).get("database-name")):
        conn_config["dbname"] = dbname
        state = sync_logical_streams(conn_config, list(streams), state,
                                     end_lsn)
    return state
Exemplo n.º 5
0
def attempt_connection_to_db(conn_config, dbname):
    nascent_config = copy.deepcopy(conn_config)
    nascent_config["dbname"] = dbname
    LOGGER.info("(%s) Testing connectivity...", dbname)
    try:
        conn = post_db.open_connection(nascent_config)
        LOGGER.info("(%s) connectivity verified", dbname)
        conn.close()
        return True
    except Exception as err:
        LOGGER.warning(
            'Unable to connect to %s. This is OK if you do not replicate from this database: "%s"',
            dbname,
            err,
        )
        return False
Exemplo n.º 6
0
def sync_method_for_streams(streams, state, default_replication_method):
    lookup = {}
    traditional_steams = []
    logical_streams = []

    for stream in streams:
        stream_metadata = metadata.to_map(stream["metadata"])
        replication_method = stream_metadata.get(
            (), {}).get("replication-method", default_replication_method)
        replication_key = stream_metadata.get((), {}).get("replication-key")

        state = clear_state_on_replication_change(state,
                                                  stream["tap_stream_id"],
                                                  replication_key,
                                                  replication_method)

        if replication_method not in set(
            ["LOG_BASED", "FULL_TABLE", "INCREMENTAL"]):
            raise Exception(
                "Unrecognized replication_method {} for stream {}".format(
                    replication_method, stream["tap_stream_id"]))

        md_map = metadata.to_map(stream["metadata"])
        desired_columns = [
            c for c in stream["schema"]["properties"].keys()
            if sync_common.should_sync_column(md_map, c)
        ]
        desired_columns.sort()

        if len(desired_columns) == 0:
            LOGGER.warning(
                "There are no columns selected for stream %s, skipping it",
                stream["tap_stream_id"])
            continue

        if replication_method == "LOG_BASED" and stream_metadata.get(
            (), {}).get("is-view"):
            raise Exception(
                "Logical Replication is NOT supported for views."
                "Please change the replication method for {}".format(
                    stream["tap_stream_id"]))

        if replication_method == "FULL_TABLE":
            lookup[stream["tap_stream_id"]] = "full"
            traditional_steams.append(stream)
        elif replication_method == "INCREMENTAL":
            lookup[stream["tap_stream_id"]] = "incremental"
            traditional_steams.append(stream)

        elif get_bookmark(state,
                          stream["tap_stream_id"], "xmin") and get_bookmark(
                              state, stream["tap_stream_id"], "lsn"):
            # finishing previously interrupted full-table (first stage of logical replication)
            lookup[stream["tap_stream_id"]] = "logical_initial_interrupted"
            traditional_steams.append(stream)

        # inconsistent state
        elif get_bookmark(
                state, stream["tap_stream_id"], "xmin") and not get_bookmark(
                    state, stream["tap_stream_id"], "lsn"):
            raise Exception(
                "Xmin found(%s) in state implying full-table replication but no lsn is present"
            )

        elif not get_bookmark(state, stream["tap_stream_id"],
                              "xmin") and not get_bookmark(
                                  state, stream["tap_stream_id"], "lsn"):
            # initial full-table phase of logical replication
            lookup[stream["tap_stream_id"]] = "logical_initial"
            traditional_steams.append(stream)

        else:  # no xmin but we have an lsn
            # initial stage of logical replication(full-table) has been completed.
            # Moving onto pure logical replication
            lookup[stream["tap_stream_id"]] = "pure_logical"
            logical_streams.append(stream)

    return lookup, traditional_steams, logical_streams