def get_metafields(parent_object, since_id, parent_replication_object, timeout): # set timeout parent_replication_object.set_timeout(timeout) # This call results in an HTTP request - the parent object never has a # cache of this data so we have to issue that request. return parent_object.metafields( limit=Context.get_results_per_page(RESULTS_PER_PAGE), since_id=since_id)
def get_objects(self): updated_at_min = self.get_bookmark() stop_time = singer.utils.now().replace(microsecond=0) date_window_size = float( Context.config.get("date_window_size", DATE_WINDOW_SIZE)) results_per_page = Context.get_results_per_page(RESULTS_PER_PAGE) # Page through till the end of the resultset while updated_at_min < stop_time: # Bookmarking can also occur on the since_id since_id = self.get_since_id() or 1 if since_id != 1: LOGGER.info("Resuming sync from since_id %d", since_id) # It's important that `updated_at_min` has microseconds # truncated. Why has been lost to the mists of time but we # think it has something to do with how the API treats # microseconds on its date windows. Maybe it's possible to # drop data due to rounding errors or something like that? updated_at_max = updated_at_min + datetime.timedelta( days=date_window_size) if updated_at_max > stop_time: updated_at_max = stop_time while True: status_key = self.status_key or "status" query_params = { "since_id": since_id, "updated_at_min": updated_at_min, "updated_at_max": updated_at_max, "limit": results_per_page, status_key: "any" } with metrics.http_request_timer(self.name): objects = self.call_api(query_params) for obj in objects: if obj.id < since_id: # This verifies the api behavior expectation we # have that all results actually honor the # since_id parameter. raise OutOfOrderIdsError( "obj.id < since_id: {} < {}".format( obj.id, since_id)) yield obj # You know you're at the end when the current page has # less than the request size limits you set. if len(objects) < results_per_page: # Save the updated_at_max as our bookmark as we've synced all rows up in our # window and can move forward. Also remove the since_id because we want to # restart at 1. Context.state.get('bookmarks', {}).get(self.name, {}).pop('since_id', None) self.update_bookmark(utils.strftime(updated_at_max)) break if objects[-1].id != max([o.id for o in objects]): # This verifies the api behavior expectation we have # that all pages are internally ordered by the # `since_id`. raise OutOfOrderIdsError( "{} is not the max id in objects ({})".format( objects[-1].id, max([o.id for o in objects]))) since_id = objects[-1].id # Put since_id into the state. self.update_bookmark(since_id, bookmark_key='since_id') updated_at_min = updated_at_max
def __init__(self): self.results_per_page = Context.get_results_per_page(RESULTS_PER_PAGE)
def __init__(self): self.results_per_page = Context.get_results_per_page(RESULTS_PER_PAGE) # set request timeout self.request_timeout = get_request_timeout()
def get_objects(self): updated_at_min = self.get_bookmark() stop_time = singer.utils.now().replace(microsecond=0) # Retrieve data for max 1 year. Otherwise log incremental needed. diff_days = (stop_time - updated_at_min).days yearly = False if diff_days > 365: yearly = True stop_time = updated_at_min + datetime.timedelta(days=365) LOGGER.info("This import will only import the first year of historical data. " "You need to trigger further incremental imports to get the missing rows.") date_window_size = float(Context.config.get("date_window_size", DATE_WINDOW_SIZE)) results_per_page = Context.get_results_per_page(RESULTS_PER_PAGE) # Page through till the end of the resultset while updated_at_min < stop_time: # Bookmarking can also occur on the since_id since_id = self.get_since_id() or 1 if since_id != 1: LOGGER.info("Resuming sync from since_id %d", since_id) # It's important that `updated_at_min` has microseconds # truncated. Why has been lost to the mists of time but we # think it has something to do with how the API treats # microseconds on its date windows. Maybe it's possible to # drop data due to rounding errors or something like that? updated_at_max = updated_at_min + datetime.timedelta(days=date_window_size) if updated_at_max > stop_time: updated_at_max = stop_time singer.log_info("getting from %s - %s", updated_at_min, updated_at_max) min_filer_key = self.get_min_replication_key() max_filer_key = self.get_max_replication_key() while True: status_key = self.status_key or "status" query_params = { "since_id": since_id, min_filer_key: updated_at_min, max_filer_key: updated_at_max, "limit": results_per_page, } if self.add_status: query_params[status_key] = "any" with metrics.http_request_timer(self.name): objects = self.call_api(query_params) for obj in objects: if obj.id < since_id: # This verifies the api behavior expectation we # have that all results actually honor the # since_id parameter. raise OutOfOrderIdsError("obj.id < since_id: {} < {}".format( obj.id, since_id)) yield obj # You know you're at the end when the current page has # less than the request size limits you set. singer.log_info(f"Got {len(objects)} records") if len(objects) < results_per_page: # Save the updated_at_max as our bookmark as we've synced all rows up in our # window and can move forward. Also remove the since_id because we want to # restart at 1. Context.state.get('bookmarks', {}).get(self.name, {}).pop('since_id', None) state_val = updated_at_max if self.skip_day: state_val = state_val + datetime.timedelta(days=1) self.update_bookmark(utils.strftime(state_val)) break if objects[-1].id != max([o.id for o in objects]): # This verifies the api behavior expectation we have # that all pages are internally ordered by the # `since_id`. raise OutOfOrderIdsError("{} is not the max id in objects ({})".format( objects[-1].id, max([o.id for o in objects]))) since_id = objects[-1].id # Put since_id into the state. self.update_bookmark(since_id, bookmark_key='since_id') updated_at_min = updated_at_max + datetime.timedelta(seconds=1) if self.skip_day: updated_at_min = updated_at_min + datetime.timedelta(days=1) if yearly: LOGGER.info("This import only imported one year of historical data. " "Please trigger further incremental data to get the missing rows.")