def validate_state(config, catalog, state): for stream in catalog["streams"]: for mdata in stream['metadata']: if mdata['breadcrumb'] == [] and mdata['metadata'].get('selected') != True: # If a stream is deselected while it's the current stream, unset the # current stream. if stream["tap_stream_id"] == get_currently_syncing(state): set_currently_syncing(state, None) break replication_key = determine_replication_key(stream['tap_stream_id']) if not replication_key: continue # If there's no bookmark for a stream (new integration, newly selected, # reset, etc) we need to use the default start date from the config. bookmark = get_bookmark(state, stream["tap_stream_id"], replication_key) if bookmark is None: state = write_bookmark(state, stream["tap_stream_id"], replication_key, config["start_date"]) singer.write_state(state) return state
def sync(client, catalog, config, state): starting_stream = bookmarks.get_currently_syncing(state) if starting_stream: singer.log_info("Resuming sync from %s", starting_stream) else: singer.log_info("Starting sync") for stream in catalog["streams"]: # Skip unselected streams. mdata = metadata.to_map(stream['metadata']) if not metadata.get(mdata, (), 'selected'): singer.log_info("%s: not selected", stream["tap_stream_id"]) continue # Skip streams that have already be synced when resuming. if starting_stream and stream["tap_stream_id"] != starting_stream: singer.log_info("%s: already synced", stream["tap_stream_id"]) continue singer.log_info("%s: starting sync", stream["tap_stream_id"]) # Now that we've started, there's no more "starting stream". Set # the current stream to resume on next run. starting_stream = None state = bookmarks.set_currently_syncing(state, stream["tap_stream_id"]) singer.write_state(state) # Sync stream based on type. if stream["tap_stream_id"] == "activity_types": state, record_count = sync_activity_types(client, state, stream) elif stream["tap_stream_id"] == "leads": state, record_count = sync_leads(client, state, stream, config) elif stream["tap_stream_id"].startswith("activities_"): state, record_count = sync_activities(client, state, stream, config) elif stream["tap_stream_id"] in ["campaigns", "lists"]: state, record_count = sync_paginated(client, state, stream) elif stream["tap_stream_id"] == "programs": state, record_count = sync_programs(client, state, stream) else: raise Exception("Stream %s not implemented" % stream["tap_stream_id"]) # Emit metric for record count. counter = singer.metrics.record_counter(stream["tap_stream_id"]) counter.value = record_count counter._pop() # pylint: disable=protected-access # Unset current stream. state = bookmarks.set_currently_syncing(state, None) singer.write_state(state) singer.log_info("%s: finished sync", stream["tap_stream_id"]) # If Corona is not supported, log a warning near the end of the tap # log with instructions on how to get Corona supported. singer.log_info("Finished sync.") if not client.use_corona: singer.log_warning(NO_CORONA_WARNING)
def sync(config: Dict[str, Any], state: Dict[str, Any], catalog: Catalog) -> None: # For looking up Catalog-configured streams more efficiently # later Singer stores catalog entries as a list and iterates # over it with .get_stream() stream_defs: Dict[str, Union["Stream", "Substream"]] = {} stream_versions: Dict[str, Optional[int]] = {} check_dependency_conflicts(catalog) for stream in catalog.get_selected_streams(state): if is_substream(AVAILABLE_STREAMS[stream.tap_stream_id]): LOGGER.info( 'Skipping substream "%s" until parent stream is reached', stream.tap_stream_id, ) continue LOGGER.info("Syncing stream: %s", stream.tap_stream_id) filter_datetime = prepare_stream(stream.tap_stream_id, stream_defs, stream_versions, catalog, config, state) stream_def = stream_defs[stream.tap_stream_id] LOGGER.info("Querying since: %s", filter_datetime) for tap_stream_id, record in stream_def.sync( filter_datetime): # type: ignore state = handle_record( tap_stream_id, record, stream_defs[tap_stream_id], stream_versions[tap_stream_id], state, ) write_state(state) for substream_def in stream_def.substreams: # type: ignore if not substream_def.is_selected: continue # All substreams are necessarily FULL_TABLE and thus have a version, # so write their ACTIVATE_VERSION messages without check. write_activate_version( substream_def.tap_stream_id, stream_versions[substream_def.tap_stream_id], ) if stream_versions[stream_def.tap_stream_id] is not None: write_activate_version( stream_def.tap_stream_id, stream_versions[stream_def.tap_stream_id], ) state = set_currently_syncing(state, None) write_state(state)
def handle_record( tap_stream_id: str, record: Dict[str, Any], stream_def: Union["Stream", "Substream"], stream_version: Optional[int], state: Dict[str, Any], ) -> Dict[str, Any]: """Handles a single record's emission""" print_record(tap_stream_id, record, version=stream_version) if not is_substream(stream_def): state = set_currently_syncing(state, tap_stream_id) if not stream_def.is_valid_incremental: return state replication_key = stream_def.replication_key # mypy ignoring is_valid_incremental above bookmark_date = record.get(replication_key) # type: ignore if bookmark_date is None: LOGGER.warning( 'State not updated. Replication key "%s" not found in record for stream "%s": %s', replication_key, tap_stream_id, record, ) return state LOGGER.debug("Adding bookmark for %s at %s", tap_stream_id, bookmark_date) state = write_bookmark( state, tap_stream_id, replication_key, bookmark_date, ) write_state(state) return state
def sync(config: Dict[str, Any], state: Dict[str, Any], catalog: Catalog) -> None: # For looking up Catalog-configured streams more efficiently # later Singer stores catalog entries as a list and iterates # over it with .get_stream() stream_defs: Dict[str, Union["Stream", "Substream"]] = {} stream_versions: Dict[str, Optional[int]] = {} check_dependency_conflicts(catalog) for stream in catalog.get_selected_streams(state): if is_substream(AVAILABLE_STREAMS[stream.tap_stream_id]): LOGGER.info( 'Skipping substream "%s" until parent stream is reached', stream.tap_stream_id, ) continue LOGGER.info("Syncing stream: %s", stream.tap_stream_id) filter_datetime = prepare_stream(stream.tap_stream_id, stream_defs, stream_versions, catalog, config, state) stream_def = stream_defs[stream.tap_stream_id] LOGGER.info("Querying since: %s", filter_datetime) for tap_stream_id, record in stream_def.sync( filter_datetime): # type: ignore state = handle_record( tap_stream_id, record, stream_defs[tap_stream_id], stream_versions[tap_stream_id], state, ) write_state(state) state = set_currently_syncing(state, None) write_state(state)
def update_current_stream(state, stream_name=None): set_currently_syncing(state, stream_name) singer.write_state(state)