def sleep_on_ratelimit(_details): _, exc, _ = sys.exc_info() if isinstance(exc, FreshdeskRateLimited): retry_after = int(exc.response.headers["Retry-After"]) logger.info( f"Rate limit reached. Sleeping for {retry_after} seconds") time.sleep(retry_after + 1) # extra second to cover any fractions of second
def sleep_on_ratelimit(_details): _, exc, _ = sys.exc_info() if isinstance(exc, HubspotRateLimited): # Hubspot API does not always return Retry-After value for 429 HTTP error retry_after = int(exc.response.headers.get("Retry-After", 3)) logger.info( f"Rate limit reached. Sleeping for {retry_after} seconds") time.sleep(retry_after + 1) # extra second to cover any fractions of second
def state(self, value): potentially_new_records_in_the_past = self._include_deleted and not value.get( "include_deleted", False) if potentially_new_records_in_the_past: logger.info( f"Ignoring bookmark for {self.name} because of enabled `include_deleted` option" ) else: self._state = pendulum.parse(value[self.state_pk])
def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator: """Apply state filter to set of records, update cursor(state) if necessary in the end""" latest_cursor = None for record in self._paginator(getter): yield record cursor = pendulum.parse(record[self.updated_at_field]) latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor if latest_cursor: new_state = max(latest_cursor, self._state) if self._state else latest_cursor if new_state != self._state: logger.info(f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}") self._state = new_state self._start_date = self._state
def get(self, url: str, params: Mapping = None): """Wrapper around request.get() to use the API prefix. Returns a JSON response.""" for _ in range(10): params = params or {} response = self._session.get(self._api_prefix + url, params=params) try: return self._parse_and_handle_errors(response) except FreshdeskRateLimited: retry_after = int(response.headers["Retry-After"]) logger.info( f"Rate limit reached. Sleeping for {retry_after} seconds") time.sleep(retry_after + 1) # extra second to cover any fractions of second raise Exception("Max retry limit reached")
def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator: """Apply state filter to set of records, update cursor(state) if necessary in the end""" params = params or {} latest_cursor = None for record in super().read(getter, params): cursor = pendulum.parse(record[self.state_pk]) if self._state and self._state.subtract(days=self.buffer_days + 1) >= cursor: continue latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor yield record if latest_cursor: logger.info(f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}") self._state = max(latest_cursor, self._state) if self._state else latest_cursor
def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator: """Update cursor(state)""" params = params or {} cursor = None for record in super().read(getter, params): "Report API return records from newest to oldest" if not cursor: cursor = pendulum.parse(record[self.state_pk]) yield record if cursor: new_state = max(cursor, self._state) if self._state else cursor if new_state != self._state: logger.info(f"Advancing bookmark for {self.name} stream from {self._state} to {new_state}") self._state = new_state
def state_filter(self, record: dict) -> Optional[dict]: """Apply state filter to record, update cursor(state)""" cursor = pendulum.parse(record[self.state_pk]) if self._state[record[self.cursor_field]] >= cursor: return stream_name = self.__class__.__name__ if stream_name.endswith("API"): stream_name = stream_name[:-3] logger.info( f"Advancing bookmark for {stream_name} stream for {self.cursor_field} {record[self.cursor_field]} from {self._state[record[self.cursor_field]]} to {cursor}" ) self._state.update({record[self.cursor_field]: max(cursor, self._state[record[self.cursor_field]])}) return record
def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator: """Read using getter, patched to respect current state""" params = params or {} params = {**params, **self._state_params()} latest_cursor = None for record in self.get_tickets(self.result_return_limit, getter, params): cursor = pendulum.parse(record[self.state_pk]) # filter out records older then state if self._state and self._state >= cursor: continue latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor yield record if latest_cursor: logger.info(f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}") self._state = max(latest_cursor, self._state) if self._state else latest_cursor
def read_chunked( self, getter: Callable, params: Mapping[str, Any] = None, chunk_size: pendulum.Interval = pendulum.interval(days=1) ) -> Iterator: params = {**params} if params else {} now_ts = int(pendulum.now().timestamp() * 1000) start_ts = int(self._start_date.timestamp() * 1000) chunk_size = int(chunk_size.total_seconds() * 1000) for ts in range(start_ts, now_ts, chunk_size): end_ts = ts + chunk_size params["startTimestamp"] = ts params["endTimestamp"] = end_ts logger.info( f"Reading chunk from stream {self.name} between {pendulum.from_timestamp(ts / 1000)} and {pendulum.from_timestamp(end_ts / 1000)}" ) yield from super().read(getter, params)
def state_filter(self, records: Iterator[dict]) -> Iterator[Any]: """Apply state filter to set of records, update cursor(state) if necessary in the end""" latest_cursor = None for record in records: cursor = pendulum.parse(record[self.state_pk]) if self._state and self._state >= cursor: continue latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor yield record if latest_cursor: stream_name = self.__class__.__name__ if stream_name.endswith("API"): stream_name = stream_name[:-3] logger.info( f"Advancing bookmark for {stream_name} stream from {self._state} to {latest_cursor}" ) self._state = max(latest_cursor, self._state) if self._state else latest_cursor
def _acquire_access_token_from_refresh_token(self): payload = { "grant_type": "refresh_token", "redirect_uri": self._credentials["redirect_uri"], "refresh_token": self._credentials["refresh_token"], "client_id": self._credentials["client_id"], "client_secret": self._credentials["client_secret"], } resp = requests.post(self.BASE_URL + "/oauth/v1/token", data=payload) if resp.status_code == HTTPStatus.FORBIDDEN: raise HubspotInvalidAuth(resp.content, response=resp) resp.raise_for_status() auth = resp.json() self._credentials["access_token"] = auth["access_token"] self._credentials["refresh_token"] = auth["refresh_token"] self._credentials["token_expires"] = datetime.utcnow() + timedelta( seconds=auth["expires_in"] - 600) logger.info("Token refreshed. Expires at %s", self._credentials["token_expires"])
def _run_job_until_completion(self, params) -> AdReportRun: # TODO parallelize running these jobs job = self._get_insights(params) logger.info(f"Created AdReportRun: {job} to sync insights with breakdown {self.breakdowns}") start_time = pendulum.now() sleep_seconds = 2 while True: job = job.api_get() job_progress_pct = job["async_percent_completion"] logger.info(f"ReportRunId {job['report_run_id']} is {job_progress_pct}% complete") if job["async_status"] == "Job Completed": return job runtime = pendulum.now() - start_time if runtime > self.MAX_WAIT_TO_START and job_progress_pct == 0: raise JobTimeoutException( f"AdReportRun {job} did not start after {runtime.in_seconds()} seconds. This is an intermittent error which may be fixed by retrying the job. Aborting." ) elif runtime > self.MAX_WAIT_TO_FINISH: raise JobTimeoutException( f"AdReportRun {job} did not finish after {runtime.in_seconds()} seconds. This is an intermittent error which may be fixed by retrying the job. Aborting." ) logger.info(f"Sleeping {sleep_seconds} seconds while waiting for AdReportRun: {job} to complete") time.sleep(sleep_seconds) if sleep_seconds < self.MAX_ASYNC_SLEEP.in_seconds(): sleep_seconds *= 2
def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator: """Apply state filter to set of records, update cursor(state) if necessary in the end""" latest_cursor = None # to track state, there is no guarantee that returned records sorted in ascending order. Having exact # boundary we could always ensure we don't miss records between states. In the future, if we would # like to save the state more often we can do this every batch for record in self.read_chunked(getter, params): yield record cursor = self._field_to_datetime(record[self.updated_at_field]) latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor if latest_cursor: new_state = max(latest_cursor, self._state) if self._state else latest_cursor if new_state != self._state: logger.info( f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}" ) self._state = new_state self._start_date = self._state
def log_retry_attempt(details): _, exc, _ = sys.exc_info() logger.info(str(exc)) logger.info( f"Caught retryable error after {details['tries']} tries. Waiting {details['wait']} more seconds then retrying..." )