def sync_data(self, parent_ids): table = self.TABLE self.write_schema() start_date = get_last_record_value_for_table( self.state, table, 'start_date') if start_date is None: start_date = get_config_start_date(self.config) else: start_date = parse(start_date) technician_id = get_last_record_value_for_table( self.state, table, 'technician_id') if technician_id is None: technician_id = 0 for start_date, end_date in generate_date_windows(start_date): for index, parent_id in enumerate(parent_ids): if parent_id < technician_id: continue LOGGER.info( 'Fetching %s for technician %s (%s/%s) from %s to %s', table, parent_id, index + 1, len(parent_ids), start_date, end_date) parsed_response = self.execute_request(parent_id, start_date, end_date) with singer.metrics.record_counter(endpoint=table) as ctr: singer.write_records(table, parsed_response['rows']) ctr.increment(amount=len(parsed_response['rows'])) # technician_id acts as a substream bookmark so that we # can pick up in a single stream where we left off. self.state = incorporate( self.state, table, 'technician_id', parent_id) # There's no need to save `start_date` here. Even in the # case of the first run the config start_date won't change # so we're safe. It's acceptable to update start_date only # after we're done the sync for this whole window. save_state(self.state) technician_id = 0 self.state = incorporate( self.state, table, 'start_date', end_date) # Because we go through all the technicians every time we sync # we need to start over by resetting the technician_id sub # bookmark to 0. self.state = incorporate( self.state, table, 'technician_id', 0, force=True) save_state(self.state)
def sync_data(self): table = self.TABLE api_method = self.API_METHOD done = False # Attempt to get the bookmark date from the state file (if one exists and is supplied). LOGGER.info('Attempting to get the most recent bookmark_date for entity {}.'.format(self.ENTITY)) bookmark_date = get_last_record_value_for_table(self.state, table, 'bookmark_date') # If there is no bookmark date, fall back to using the start date from the config file. if bookmark_date is None: LOGGER.info('Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.') bookmark_date = get_config_start_date(self.config) else: bookmark_date = parse(bookmark_date) # Convert bookmarked start date to POSIX. bookmark_date_posix = int(bookmark_date.timestamp()) # Create params for filtering if self.ENTITY == 'event': params = {"occurred_at[after]": bookmark_date_posix} bookmark_key = 'occurred_at' elif self.ENTITY in ['promotional_credit','comment']: params = {"created_at[after]": bookmark_date_posix} bookmark_key = 'created_at' else: params = {"updated_at[after]": bookmark_date_posix} bookmark_key = 'updated_at' # Add sort_by[asc] to prevent data overwrite by oldest deleted records if self.SORT_BY is not None: params['sort_by[asc]'] = self.SORT_BY LOGGER.info("Querying {} starting at {}".format(table, bookmark_date)) while not done: max_date = bookmark_date response = self.client.make_request( url=self.get_url(), method=api_method, params=params) if 'api_error_code' in response.keys(): if response['api_error_code'] == 'configuration_incompatible': LOGGER.error('{} is not configured'.format(response['error_code'])) break records = response.get('list') to_write = self.get_stream_data(records) if self.config.get('include_deleted') not in ['false','False', False]: if self.ENTITY == 'event': for event in to_write: if event["event_type"] == 'plan_deleted': Util.plans.append(event['content']['plan']) elif event['event_type'] == 'addon_deleted': Util.addons.append(event['content']['addon']) elif event['event_type'] == 'coupon_deleted': Util.coupons.append(event['content']['coupon']) if self.ENTITY == 'plan': for plan in Util.plans: to_write.append(plan) if self.ENTITY == 'addon': for addon in Util.addons: to_write.append(addon) if self.ENTITY == 'coupon': for coupon in Util.coupons: to_write.append(coupon) with singer.metrics.record_counter(endpoint=table) as ctr: singer.write_records(table, to_write) ctr.increment(amount=len(to_write)) for item in to_write: #if item.get(bookmark_key) is not None: max_date = max( max_date, parse(item.get(bookmark_key)) ) self.state = incorporate( self.state, table, 'bookmark_date', max_date) if not response.get('next_offset'): LOGGER.info("Final offset reached. Ending sync.") done = True else: LOGGER.info("Advancing by one offset.") params['offset'] = response.get('next_offset') bookmark_date = max_date save_state(self.state)
def sync_data(self): table = self.TABLE done = False filters = self.get_filters() start_date = get_last_record_value_for_table(self.state, table) if start_date is None: start_date = get_config_start_date(self.config) else: start_date = start_date.replace(tzinfo=pytz.UTC) td = self.get_interval() end_date = start_date + td while not done: max_date = start_date LOGGER.info("Querying {} starting at {}".format(table, start_date)) body = { "startMSeconds": int(start_date.timestamp() * 1000), "endMSeconds": int(end_date.timestamp() * 1000), } if filters is not None: body["filters"] = filters LOGGER.info(body) try: response = self.client.make_request(self.get_url(), "POST", body=body) except RuntimeError as e: if "502" in str(e) or "504" in str(e): # try one more time response = self.client.make_request(self.get_url(), "POST", body=body) else: raise e to_write = self.get_stream_data(response) with singer.metrics.record_counter(endpoint=table) as ctr: singer.write_records(table, to_write) ctr.increment(amount=len(to_write)) for item in to_write: max_date = max(max_date, self.get_time_for_state(item)) self.state = incorporate(self.state, table, "start_date", start_date) if max_date > datetime.datetime.now(pytz.UTC): done = True if len(to_write) == 0: LOGGER.info("Advancing one full interval.") if end_date > datetime.datetime.now(pytz.UTC): done = True else: start_date = end_date elif start_date == max_date: LOGGER.info("Advancing one millisecond.") start_date = start_date + datetime.timedelta(milliseconds=1) else: LOGGER.info("Advancing by one page.") start_date = max_date end_date = start_date + td save_state(self.state)
def sync_data(self): table = self.TABLE done = False start_date = get_last_record_value_for_table(self.state, table) if start_date is None: start_date = get_config_start_date(self.config) else: start_date = start_date.replace(tzinfo=pytz.UTC) td = datetime.timedelta(hours=1) end_date = start_date + td while not done: max_date = start_date LOGGER.info("Querying {} starting at {}".format(table, start_date)) body = { "filters": { "environment": self.config.get("environment"), "lastUpdated": { "gte": int(start_date.timestamp() * 1000), "lte": int(end_date.timestamp() * 1000), }, } } LOGGER.info(body) response = self.client.make_request(self.get_url(), "POST", body=body) to_write = self.get_stream_data(response) with singer.metrics.record_counter(endpoint=table) as ctr: singer.write_records(table, to_write) ctr.increment(amount=len(to_write)) for item in to_write: max_date = max(max_date, self.get_time_for_state(item)) self.state = incorporate(self.state, table, "start_date", start_date) if max_date > datetime.datetime.now(pytz.UTC): done = True if len(to_write) == 0: LOGGER.info("Advancing one full interval.") if end_date > datetime.datetime.now(pytz.UTC): done = True else: start_date = end_date elif start_date == max_date: LOGGER.info("Advancing one second.") start_date = start_date + datetime.timedelta(seconds=1) else: LOGGER.info("Advancing by one page.") start_date = max_date end_date = start_date + td save_state(self.state)
def sync_data(self): table = self.TABLE api_method = self.API_METHOD # Attempt to get the bookmark date from the state file (if one exists and is supplied). LOGGER.info( 'Attempting to get the most recent bookmark_date for entity {}.'. format(self.TABLE)) bookmark_date = get_last_record_value_for_table( self.state, table, 'bookmark_date') # If there is no bookmark date, fall back to using the start date from the config file. if bookmark_date is None: LOGGER.info( 'Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.' ) bookmark_date = get_config_start_date(self.config) else: bookmark_date = parse(bookmark_date) current_date = utils.strftime(utils.now()) LOGGER.info("Querying {} starting at {}".format(table, bookmark_date)) urls, spaces, mult_endpoints = self.get_url(bookmark_date, current_date) key_hash = hashlib.sha256() for i, url in enumerate(urls): done = False updated = False temp_max = [] page = 1 params = self.get_params(bookmark_date, current_date) while not done: max_date = bookmark_date response = self.client.make_request(url=url, method=api_method, params=params) to_write = self.get_stream_data(response.get('results')) for item in to_write: if self.TABLE in ('space_counts', 'space_events'): item['space_id'] = spaces[i] if self.TABLE == 'space_counts': # Creates a surrogate primary key for this endpoint, since one doesn't exist at time of writing key_hash.update(((item['space_id'] + item['timestamp']).encode('utf-8'))) item['id'] = key_hash.hexdigest() if item.get('updated_at') is not None: max_date = max(max_date, parse(item.get('updated_at'))) updated = True elif item.get('created_at') is not None: max_date = max(max_date, parse(item.get('created_at'))) updated = True elif item.get('timestamp') is not None and mult_endpoints: max_date = max(max_date, parse(item.get('timestamp'))) updated = True with singer.metrics.record_counter(endpoint=table) as ctr: singer.write_records(table, to_write) ctr.increment(amount=len(to_write)) self.state = incorporate(self.state, table, 'bookmark_date', max_date) if not response.get('next'): LOGGER.info("Final page reached. Ending sync.") done = True else: page += 1 LOGGER.info("Advancing by one page.") params['page'] = page if mult_endpoints and updated: temp_max.append(max_date) max_date = min(temp_max) self.state = incorporate(self.state, table, 'bookmark_date', max_date) save_state(self.state)
def sync_data(self): table = self.TABLE api_method = self.API_METHOD done = False # Attempt to get the bookmark date from the state file (if one exists and is supplied). LOGGER.info( 'Attempting to get the most recent bookmark_date for entity {}.'. format(self.ENTITY)) bookmark_date = get_last_record_value_for_table( self.state, table, 'bookmark_date') # If there is no bookmark date, fall back to using the start date from the config file. if bookmark_date is None: LOGGER.info( 'Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.' ) bookmark_date = get_config_start_date(self.config) else: bookmark_date = parse(bookmark_date) # Convert bookmarked start date to POSIX. bookmark_date_posix = int(bookmark_date.timestamp()) # Create params for filtering if self.ENTITY == 'event': params = {"occurred_at[after]": bookmark_date_posix} bookmark_key = 'occurred_at' else: params = {"updated_at[after]": bookmark_date_posix} bookmark_key = 'updated_at' LOGGER.info("Querying {} starting at {}".format(table, bookmark_date)) while not done: max_date = bookmark_date response = self.client.make_request(url=self.get_url(), method=api_method, params=params) to_write = self.get_stream_data(response.get('list')) with singer.metrics.record_counter(endpoint=table) as ctr: singer.write_records(table, to_write) ctr.increment(amount=len(to_write)) for item in to_write: max_date = max(max_date, parse(item.get(bookmark_key))) self.state = incorporate(self.state, table, 'bookmark_date', max_date) if not response.get('next_offset'): LOGGER.info("Final offset reached. Ending sync.") done = True else: LOGGER.info("Advancing by one offset.") params['offset'] = response.get('next_offset') bookmark_date = max_date save_state(self.state)
def sync_data(self): table = self.TABLE api_method = self.API_METHOD done = False sync_interval_in_mins = 2 # Attempt to get the bookmark date from the state file (if one exists and is supplied). LOGGER.info( 'Attempting to get the most recent bookmark_date for entity {}.'. format(self.ENTITY)) bookmark_date = get_last_record_value_for_table( self.state, table, 'bookmark_date') # If there is no bookmark date, fall back to using the start date from the config file. if bookmark_date is None: LOGGER.info( 'Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.' ) bookmark_date = get_config_start_date(self.config) else: bookmark_date = parse(bookmark_date) # Convert bookmarked start date to POSIX. bookmark_date_posix = int(bookmark_date.timestamp()) to_date = datetime.now( pytz.utc) - timedelta(minutes=sync_interval_in_mins) to_date_posix = int(to_date.timestamp()) sync_window = str([bookmark_date_posix, to_date_posix]) LOGGER.info("Sync Window {} for schema {}".format(sync_window, table)) # Create params for filtering if self.ENTITY == 'event': params = {"occurred_at[between]": sync_window} bookmark_key = 'occurred_at' elif self.ENTITY in ['promotional_credit', 'comment']: params = {"created_at[between]": sync_window} bookmark_key = 'created_at' else: params = {"updated_at[between]": sync_window} bookmark_key = 'updated_at' # Add sort_by[asc] to prevent data overwrite by oldest deleted records if self.SORT_BY is not None: params['sort_by[asc]'] = self.SORT_BY LOGGER.info("Querying {} starting at {}".format(table, bookmark_date)) while not done: max_date = to_date response = self.client.make_request(url=self.get_url(), method=api_method, params=params) if 'api_error_code' in response.keys(): if response['api_error_code'] == 'configuration_incompatible': LOGGER.error('{} is not configured'.format( response['error_code'])) break records = response.get('list') # List of deleted "plans, addons and coupons" from the /events endpoint deleted_records = [] if self.config.get('include_deleted') not in [ 'false', 'False', False ]: if self.ENTITY == 'event': # Parse "event_type" from events records and collect deleted plan/addon/coupon from events for record in records: event = record.get(self.ENTITY) if event["event_type"] == 'plan_deleted': Util.plans.append(event['content']['plan']) elif event['event_type'] == 'addon_deleted': Util.addons.append(event['content']['addon']) elif event['event_type'] == 'coupon_deleted': Util.coupons.append(event['content']['coupon']) # We need additional transform for deleted records as "to_write" already contains transformed data if self.ENTITY == 'plan': for plan in Util.plans: deleted_records.append(self.transform_record(plan)) if self.ENTITY == 'addon': for addon in Util.addons: deleted_records.append(self.transform_record(addon)) if self.ENTITY == 'coupon': for coupon in Util.coupons: deleted_records.append(self.transform_record(coupon)) # Get records from API response and transform to_write = self.get_stream_data(records) with singer.metrics.record_counter(endpoint=table) as ctr: # Combine transformed records and deleted data of "plan, addon and coupon" collected from events endpoint to_write = to_write + deleted_records singer.write_records(table, to_write) ctr.increment(amount=len(to_write)) # update max_date with minimum of (max_replication_key) or (now - 2 minutes) # this will make sure that bookmark does not go beyond (now - 2 minutes) # so, no data will be missed due to API latency max_date = min(max_date, to_date) self.state = incorporate(self.state, table, 'bookmark_date', max_date) if not response.get('next_offset'): LOGGER.info("Final offset reached. Ending sync.") done = True else: LOGGER.info("Advancing by one offset.") params['offset'] = response.get('next_offset') bookmark_date = max_date save_state(self.state)
def get_headers(self, technician_id): start_date = get_config_start_date(self.config) end_date = start_date + timedelta(days=7) parsed_response = self.execute_request(technician_id, start_date, end_date) return parsed_response['headers']
def sync_data(self): table = self.TABLE api_method = self.API_METHOD done = False # Attempt to get the bookmark date from the state file (if one exists and is supplied). LOGGER.info( "Attempting to get the most recent bookmark_date for entity {}.". format(self.ENTITY)) bookmark_date = get_last_record_value_for_table( self.state, table, "bookmark_date") # If there is no bookmark date, fall back to using the start date from the config file. if bookmark_date is None: LOGGER.info( "Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead." ) bookmark_date = get_config_start_date(self.config) else: bookmark_date = parse(bookmark_date) # Convert bookmarked start date to POSIX. bookmark_date_posix = int(bookmark_date.timestamp()) # Create params for filtering if self.ENTITY == "event": params = {"occurred_at[after]": bookmark_date_posix} bookmark_key = "occurred_at" elif self.ENTITY == "promotional_credit": params = {"created_at[after]": bookmark_date_posix} bookmark_key = "created_at" else: params = {"updated_at[after]": bookmark_date_posix} bookmark_key = "updated_at" LOGGER.info("Querying {} starting at {}".format(table, bookmark_date)) while not done: max_date = bookmark_date response = self.client.make_request(url=self.get_url(), method=api_method, params=params) if "api_error_code" in response.keys(): if response["api_error_code"] == "configuration_incompatible": LOGGER.error("{} is not configured".format( response["error_code"])) break records = response.get("list") to_write = self.get_stream_data(records) if self.ENTITY == "event": for event in to_write: if event["event_type"] == "plan_deleted": Util.plans.append(event["content"]["plan"]) elif event["event_type"] == "addon_deleted": Util.addons.append(event["content"]["addon"]) elif event["event_type"] == "coupon_deleted": Util.coupons.append(event["content"]["coupon"]) if self.ENTITY == "plan": for plan in Util.plans: to_write.append(plan) if self.ENTITY == "addon": for addon in Util.addons: to_write.append(addon) if self.ENTITY == "coupon": for coupon in Util.coupons: to_write.append(coupon) with singer.metrics.record_counter(endpoint=table) as ctr: singer.write_records(table, to_write) ctr.increment(amount=len(to_write)) for item in to_write: bookmark_key = item.get(bookmark_key, None) if bookmark_key is not None: bookmark_date = parse(bookmark_key) LOGGER.info(f"BOOKMARK_KEY value: {bookmark_date}") max_date = max( ensure_tz_aware_dt(max_date), ensure_tz_aware_dt(bookmark_date), ) else: pass # LOGGER.info("BOOKMARK_KEY not found. Using max date.") self.state = incorporate(self.state, table, "bookmark_date", max_date) if not response.get("next_offset"): LOGGER.info("Final offset reached. Ending sync.") done = True else: LOGGER.info("Advancing by one offset.") params["offset"] = response.get("next_offset") bookmark_date = max_date save_state(self.state)
def sync_data(self, parent_ids, return_first_response=False): table = self.TABLE if not return_first_response: self.write_schema() start_date = get_last_record_value_for_table(self.state, table, 'start_date') technician_id = get_last_record_value_for_table( self.state, table, 'technician_id') if start_date is None: start_date = get_config_start_date(self.config) else: start_date = parse(start_date) if technician_id is None: technician_id = 0 end_date = start_date + datetime.timedelta(days=7) self.client.make_request( 'https://secure.logmeinrescue.com/API/setReportArea_v8.aspx', 'POST', params={'area': self.REPORT_AREA}) while start_date < datetime.datetime.now(tz=pytz.UTC): for index, parent_id in enumerate(parent_ids): if parent_id < technician_id: continue LOGGER.info( ('Fetching session report for technician {} ({}/{}) ' 'from {} to {}').format(parent_id, index + 1, len(parent_ids), start_date, end_date)) self.client.make_request( 'https://secure.logmeinrescue.com/API/setReportDate_v2.aspx', # noqa 'POST', params={ 'bdate': start_date.strftime('%-m/%-d/%Y %-H:%M:%S'), 'edate': end_date.strftime('%-m/%-d/%Y %-H:%M:%S') }) response = self.client.make_request(self.get_url(), 'GET', params={ 'node': parent_id, 'nodetype': 'NODE', }) if return_first_response: return response to_write = self.get_stream_data(response) if not return_first_response: with singer.metrics.record_counter(endpoint=table) as ctr: singer.write_records(table, to_write) ctr.increment(amount=len(to_write)) self.state = incorporate(self.state, table, 'technician_id', parent_id) self.state = incorporate(self.state, table, 'start_date', start_date) self.state = save_state(self.state) elif len(to_write) > 0: return to_write[0] start_date = end_date end_date = start_date + datetime.timedelta(days=7) technician_id = 0 if not return_first_response: self.state = incorporate(self.state, table, 'start_date', start_date) self.state = incorporate(self.state, table, 'technician_id', technician_id, force=True) self.state = save_state(self.state)