def sync_data(self): table = self.TABLE LOGGER.info('Syncing data for {}'.format(table)) url = self.get_url() params = self.get_params(_next=None) resources = self.sync_paginated(url, params) if self.CACHE_RESULTS: stream_cache.add(table, resources) LOGGER.info('Added {} {}s to cache'.format(len(resources), table)) LOGGER.info('Reached end of stream, moving on.') save_state(self.state) return self.state
def do_sync(self): LOGGER.info("Starting sync.") streams, opportunity_child_catalogs = self.get_streams_to_replicate() if any(streams): LOGGER.info('Will sync: %s', ', '.join([stream.TABLE for stream in streams])) for stream in streams: stream.state = self.state if stream.TABLE == 'opportunities': stream.sync(opportunity_child_catalogs) else: stream.sync() self.state = stream.state save_state(self.state)
def sync_data_for_period(self, date, interval): table = self.TABLE updated_after = date updated_before = updated_after + interval LOGGER.info('Syncing data from {} to {}'.format( updated_after.isoformat(), updated_before.isoformat())) params = self.get_params(updated_after, updated_before) url = self.get_url() res = self.sync_paginated(url, params) self.state = incorporate(self.state, table, self.RANGE_FIELD, date.isoformat()) save_state(self.state) return res
def sync_data_for_period(self, date, interval, child_streams=None, stop_time=None): table = self.TABLE updated_after = date updated_before = updated_after + interval if stop_time is not None and updated_before > stop_time: updated_before = stop_time LOGGER.info( 'Syncing data from {} to {}'.format( updated_after.isoformat(), updated_before.isoformat())) params = self.get_params(updated_after, updated_before) url = self.get_url() asyncio.run(self.sync_paginated(url, params, updated_after, child_streams)) self.state = incorporate(self.state, table, self.RANGE_FIELD, updated_before.isoformat()) save_state(self.state)
def sync_paginated(self, url, params=None, updated_after=None, child_streams=None): table = self.TABLE transformer = singer.Transformer(singer.UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) applications_stream = OpportunityApplicationsStream(self.config, self.state, child_streams.get('opportunity_applications'), self.client) offers_stream = OpportunityOffersStream(self.config, self.state, child_streams.get('opportunity_offers'), self.client) referrals_stream = OpportunityReferralsStream(self.config, self.state, child_streams.get('opportunity_referrals'), self.client) resumes_stream = OpportunityResumesStream(self.config, self.state, child_streams.get('opportunity_resumes'), self.client) # Set up looping parameters (page is for logging consistency) finished_paginating = False page = singer.bookmarks.get_bookmark(self.state, table, "next_page") or 1 _next = singer.bookmarks.get_bookmark(self.state, table, "offset") if _next: params['offset'] = _next while not finished_paginating: try: result = self.client.make_request(url, self.API_METHOD, params=params) except OffsetInvalidException as ex: LOGGER.warning('Found invalid offset "%s", retrying without offset.', params['offset']) params.pop("offset") _next = None page = 1 result = self.client.make_request(url, self.API_METHOD, params=params) _next = result.get('next') data = self.get_stream_data(result['data'], transformer) LOGGER.info('Starting Opportunity child stream syncs') for opportunity in data: opportunity_id = opportunity['id'] if child_streams.get('opportunity_applications'): applications_stream.write_schema() applications_stream.sync_data(opportunity_id) if child_streams.get('opportunity_offers'): offers_stream.write_schema() offers_stream.sync_data(opportunity_id) if child_streams.get('opportunity_referrals'): referrals_stream.write_schema() referrals_stream.sync_data(opportunity_id) if child_streams.get('opportunity_resumes'): resumes_stream.write_schema() resumes_stream.sync_data(opportunity_id) LOGGER.info('Finished Opportunity child stream syncs') with singer.metrics.record_counter(endpoint=table) as counter: self.write_schema() singer.write_records(table, data) counter.increment(len(data)) LOGGER.info('Synced page {} for {}'.format(page, self.TABLE)) page += 1 if _next: params['offset'] = _next self.state = singer.bookmarks.write_bookmark(self.state, table, "offset", _next) self.state = singer.bookmarks.write_bookmark(self.state, table, "next_page", page) # Save the last_record bookmark when we're paginating to make sure we pick up there if interrupted self.state = singer.bookmarks.write_bookmark(self.state, table, "last_record", updated_after.isoformat()) save_state(self.state) else: finished_paginating = True transformer.log_warning() self.state = singer.bookmarks.clear_bookmark(self.state, table, "offset") self.state = singer.bookmarks.clear_bookmark(self.state, table, "next_page") save_state(self.state)
async def sync_paginated(self, url, params=None, updated_after=None, child_streams=None): table = self.TABLE transformer = singer.Transformer(singer.UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) # Set up looping parameters (page is for logging consistency) finished_paginating = False page = singer.bookmarks.get_bookmark(self.state, table, "next_page") or 1 _next = singer.bookmarks.get_bookmark(self.state, table, "offset") params["expand"] = self.EXPAND if _next: params['offset'] = _next while not finished_paginating: try: result = self.client.make_request(url, self.API_METHOD, params=params) except OffsetInvalidException as ex: LOGGER.warning('Found invalid offset "%s", retrying without offset.', params['offset']) params.pop("offset") _next = None page = 1 result = self.client.make_request(url, self.API_METHOD, params=params) _next = result.get('next') data = self.get_stream_data(result['data'], transformer) LOGGER.info('Starting Opportunity child stream syncs') tasks = [] async with aiohttp.ClientSession() as session: for opportunity in data: opportunity["links"] = [] opportunity_id = opportunity['id'] if opportunity_id is None: LOGGER.info("oppurtunity id is null") continue for stream_name in child_streams: child_streams[stream_name].write_schema() task = asyncio.ensure_future( child_streams[stream_name].sync_data(opportunity_id, async_session=session)) tasks.append(task) responses_async = await asyncio.gather(*tasks) LOGGER.info('Finished Opportunity child stream syncs') with singer.metrics.record_counter(endpoint=table) as counter: singer.write_records(table, data) counter.increment(len(data)) LOGGER.info('Synced page {} for {}'.format(page, self.TABLE)) page += 1 if _next: params['offset'] = _next self.state = singer.bookmarks.write_bookmark(self.state, table, "offset", _next) self.state = singer.bookmarks.write_bookmark(self.state, table, "next_page", page) # Save the last_record bookmark when we're paginating to make sure we pick up there if interrupted self.state = singer.bookmarks.write_bookmark(self.state, table, "last_record", updated_after.isoformat()) save_state(self.state) else: finished_paginating = True transformer.log_warning() self.state = singer.bookmarks.clear_bookmark(self.state, table, "offset") self.state = singer.bookmarks.clear_bookmark(self.state, table, "next_page") save_state(self.state)