def numeric_scale(c): if c.numeric_scale is None: return MAX_SCALE if c.numeric_scale > MAX_SCALE: LOGGER.warning("capping decimal scale to 38. THIS MAY CAUSE TRUNCATION") return MAX_SCALE return c.numeric_scale
def numeric_precision(c): if c.numeric_precision is None: return MAX_PRECISION if c.numeric_precision > MAX_PRECISION: LOGGER.warning("capping decimal precision to 100. THIS MAY CAUSE TRUNCATION") return MAX_PRECISION return c.numeric_precision
def sync_traditional_stream(conn_config, stream, state, sync_method, end_lsn): LOGGER.info("Beginning sync of stream(%s) with sync method(%s)", stream["tap_stream_id"], sync_method) md_map = metadata.to_map(stream["metadata"]) conn_config["dbname"] = md_map.get(()).get("database-name") desired_columns = [ c for c in stream["schema"]["properties"].keys() if sync_common.should_sync_column(md_map, c) ] desired_columns.sort() if len(desired_columns) == 0: LOGGER.warning( "There are no columns selected for stream %s, skipping it", stream["tap_stream_id"]) return state register_type_adapters(conn_config) if sync_method == "full": state = singer.set_currently_syncing(state, stream["tap_stream_id"]) state = do_sync_full_table(conn_config, stream, state, desired_columns, md_map) elif sync_method == "incremental": state = singer.set_currently_syncing(state, stream["tap_stream_id"]) state = do_sync_incremental(conn_config, stream, state, desired_columns, md_map) elif sync_method == "logical_initial": state = singer.set_currently_syncing(state, stream["tap_stream_id"]) LOGGER.info("Performing initial full table sync") state = singer.write_bookmark(state, stream["tap_stream_id"], "lsn", end_lsn) sync_common.send_schema_message(stream, []) state = full_table.sync_table(conn_config, stream, state, desired_columns, md_map) state = singer.write_bookmark(state, stream["tap_stream_id"], "xmin", None) elif sync_method == "logical_initial_interrupted": state = singer.set_currently_syncing(state, stream["tap_stream_id"]) LOGGER.info( "Initial stage of full table sync was interrupted. resuming...") sync_common.send_schema_message(stream, []) state = full_table.sync_table(conn_config, stream, state, desired_columns, md_map) else: raise Exception("unknown sync method {} for stream {}".format( sync_method, stream["tap_stream_id"])) state = singer.set_currently_syncing(state, None) singer.write_message(singer.StateMessage(value=copy.deepcopy(state))) return state
def do_sync(conn_config, catalog, default_replication_method, state): currently_syncing = singer.get_currently_syncing(state) streams = list(filter(is_selected_via_metadata, catalog["streams"])) streams.sort(key=lambda s: s["tap_stream_id"]) LOGGER.info("Selected streams: %s ", list(map(lambda s: s["tap_stream_id"], streams))) if any_logical_streams(streams, default_replication_method): LOGGER.info("Use of logical replication requires fetching an lsn...") end_lsn = logical_replication.fetch_current_lsn(conn_config) LOGGER.info("End LSN: %s ", end_lsn) else: end_lsn = None sync_method_lookup, traditional_streams, logical_streams = sync_method_for_streams( streams, state, default_replication_method) if currently_syncing: LOGGER.info("found currently_syncing: %s", currently_syncing) currently_syncing_stream = list( filter(lambda s: s["tap_stream_id"] == currently_syncing, traditional_streams)) if currently_syncing_stream is None: LOGGER.warning( "Can't find currently_syncing(%s) in selected traditional streams(%s). Will ignore", currently_syncing, list(map(lambda s: s["tap_stream_id"], traditional_streams)), ) other_streams = list( filter(lambda s: s["tap_stream_id"] != currently_syncing, traditional_streams)) traditional_streams = currently_syncing_stream + other_streams else: LOGGER.info("No currently_syncing found") for stream in traditional_streams: state = sync_traditional_stream( conn_config, stream, state, sync_method_lookup[stream["tap_stream_id"]], end_lsn) logical_streams.sort(key=lambda s: metadata.to_map(s["metadata"]).get( ()).get("database-name")) for dbname, streams in itertools.groupby( logical_streams, lambda s: metadata.to_map(s["metadata"]).get( ()).get("database-name")): conn_config["dbname"] = dbname state = sync_logical_streams(conn_config, list(streams), state, end_lsn) return state
def attempt_connection_to_db(conn_config, dbname): nascent_config = copy.deepcopy(conn_config) nascent_config["dbname"] = dbname LOGGER.info("(%s) Testing connectivity...", dbname) try: conn = post_db.open_connection(nascent_config) LOGGER.info("(%s) connectivity verified", dbname) conn.close() return True except Exception as err: LOGGER.warning( 'Unable to connect to %s. This is OK if you do not replicate from this database: "%s"', dbname, err, ) return False
def sync_method_for_streams(streams, state, default_replication_method): lookup = {} traditional_steams = [] logical_streams = [] for stream in streams: stream_metadata = metadata.to_map(stream["metadata"]) replication_method = stream_metadata.get( (), {}).get("replication-method", default_replication_method) replication_key = stream_metadata.get((), {}).get("replication-key") state = clear_state_on_replication_change(state, stream["tap_stream_id"], replication_key, replication_method) if replication_method not in set( ["LOG_BASED", "FULL_TABLE", "INCREMENTAL"]): raise Exception( "Unrecognized replication_method {} for stream {}".format( replication_method, stream["tap_stream_id"])) md_map = metadata.to_map(stream["metadata"]) desired_columns = [ c for c in stream["schema"]["properties"].keys() if sync_common.should_sync_column(md_map, c) ] desired_columns.sort() if len(desired_columns) == 0: LOGGER.warning( "There are no columns selected for stream %s, skipping it", stream["tap_stream_id"]) continue if replication_method == "LOG_BASED" and stream_metadata.get( (), {}).get("is-view"): raise Exception( "Logical Replication is NOT supported for views." "Please change the replication method for {}".format( stream["tap_stream_id"])) if replication_method == "FULL_TABLE": lookup[stream["tap_stream_id"]] = "full" traditional_steams.append(stream) elif replication_method == "INCREMENTAL": lookup[stream["tap_stream_id"]] = "incremental" traditional_steams.append(stream) elif get_bookmark(state, stream["tap_stream_id"], "xmin") and get_bookmark( state, stream["tap_stream_id"], "lsn"): # finishing previously interrupted full-table (first stage of logical replication) lookup[stream["tap_stream_id"]] = "logical_initial_interrupted" traditional_steams.append(stream) # inconsistent state elif get_bookmark( state, stream["tap_stream_id"], "xmin") and not get_bookmark( state, stream["tap_stream_id"], "lsn"): raise Exception( "Xmin found(%s) in state implying full-table replication but no lsn is present" ) elif not get_bookmark(state, stream["tap_stream_id"], "xmin") and not get_bookmark( state, stream["tap_stream_id"], "lsn"): # initial full-table phase of logical replication lookup[stream["tap_stream_id"]] = "logical_initial" traditional_steams.append(stream) else: # no xmin but we have an lsn # initial stage of logical replication(full-table) has been completed. # Moving onto pure logical replication lookup[stream["tap_stream_id"]] = "pure_logical" logical_streams.append(stream) return lookup, traditional_steams, logical_streams