Exemplo n.º 1
0
 def sleep_on_ratelimit(_details):
     _, exc, _ = sys.exc_info()
     if isinstance(exc, FreshdeskRateLimited):
         retry_after = int(exc.response.headers["Retry-After"])
         logger.info(
             f"Rate limit reached. Sleeping for {retry_after} seconds")
         time.sleep(retry_after +
                    1)  # extra second to cover any fractions of second
Exemplo n.º 2
0
 def sleep_on_ratelimit(_details):
     _, exc, _ = sys.exc_info()
     if isinstance(exc, HubspotRateLimited):
         # Hubspot API does not always return Retry-After value for 429 HTTP error
         retry_after = int(exc.response.headers.get("Retry-After", 3))
         logger.info(
             f"Rate limit reached. Sleeping for {retry_after} seconds")
         time.sleep(retry_after +
                    1)  # extra second to cover any fractions of second
Exemplo n.º 3
0
 def state(self, value):
     potentially_new_records_in_the_past = self._include_deleted and not value.get(
         "include_deleted", False)
     if potentially_new_records_in_the_past:
         logger.info(
             f"Ignoring bookmark for {self.name} because of enabled `include_deleted` option"
         )
     else:
         self._state = pendulum.parse(value[self.state_pk])
Exemplo n.º 4
0
Arquivo: api.py Projeto: NMWDI/airbyte
    def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator:
        """Apply state filter to set of records, update cursor(state) if necessary in the end"""
        latest_cursor = None
        for record in self._paginator(getter):
            yield record
            cursor = pendulum.parse(record[self.updated_at_field])
            latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor

        if latest_cursor:
            new_state = max(latest_cursor, self._state) if self._state else latest_cursor
            if new_state != self._state:
                logger.info(f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}")
                self._state = new_state
                self._start_date = self._state
Exemplo n.º 5
0
 def get(self, url: str, params: Mapping = None):
     """Wrapper around request.get() to use the API prefix. Returns a JSON response."""
     for _ in range(10):
         params = params or {}
         response = self._session.get(self._api_prefix + url, params=params)
         try:
             return self._parse_and_handle_errors(response)
         except FreshdeskRateLimited:
             retry_after = int(response.headers["Retry-After"])
             logger.info(
                 f"Rate limit reached. Sleeping for {retry_after} seconds")
             time.sleep(retry_after +
                        1)  # extra second to cover any fractions of second
     raise Exception("Max retry limit reached")
Exemplo n.º 6
0
    def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator:
        """Apply state filter to set of records, update cursor(state) if necessary in the end"""
        params = params or {}
        latest_cursor = None
        for record in super().read(getter, params):
            cursor = pendulum.parse(record[self.state_pk])
            if self._state and self._state.subtract(days=self.buffer_days + 1) >= cursor:
                continue
            latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor
            yield record

        if latest_cursor:
            logger.info(f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}")
            self._state = max(latest_cursor, self._state) if self._state else latest_cursor
Exemplo n.º 7
0
    def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator:
        """Update cursor(state)"""
        params = params or {}
        cursor = None
        for record in super().read(getter, params):
            "Report API return records from newest to oldest"
            if not cursor:
                cursor = pendulum.parse(record[self.state_pk])
            yield record

        if cursor:
            new_state = max(cursor, self._state) if self._state else cursor
            if new_state != self._state:
                logger.info(f"Advancing bookmark for {self.name} stream from {self._state} to {new_state}")
                self._state = new_state
Exemplo n.º 8
0
    def state_filter(self, record: dict) -> Optional[dict]:
        """Apply state filter to record, update cursor(state)"""

        cursor = pendulum.parse(record[self.state_pk])
        if self._state[record[self.cursor_field]] >= cursor:
            return

        stream_name = self.__class__.__name__
        if stream_name.endswith("API"):
            stream_name = stream_name[:-3]
        logger.info(
            f"Advancing bookmark for {stream_name} stream for {self.cursor_field} {record[self.cursor_field]} from {self._state[record[self.cursor_field]]} to {cursor}"
        )
        self._state.update({record[self.cursor_field]: max(cursor, self._state[record[self.cursor_field]])})
        return record
Exemplo n.º 9
0
    def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator:
        """Read using getter, patched to respect current state"""
        params = params or {}
        params = {**params, **self._state_params()}
        latest_cursor = None
        for record in self.get_tickets(self.result_return_limit, getter, params):
            cursor = pendulum.parse(record[self.state_pk])
            # filter out records older then state
            if self._state and self._state >= cursor:
                continue
            latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor
            yield record

        if latest_cursor:
            logger.info(f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}")
            self._state = max(latest_cursor, self._state) if self._state else latest_cursor
Exemplo n.º 10
0
    def read_chunked(
        self,
        getter: Callable,
        params: Mapping[str, Any] = None,
        chunk_size: pendulum.Interval = pendulum.interval(days=1)
    ) -> Iterator:
        params = {**params} if params else {}
        now_ts = int(pendulum.now().timestamp() * 1000)
        start_ts = int(self._start_date.timestamp() * 1000)
        chunk_size = int(chunk_size.total_seconds() * 1000)

        for ts in range(start_ts, now_ts, chunk_size):
            end_ts = ts + chunk_size
            params["startTimestamp"] = ts
            params["endTimestamp"] = end_ts
            logger.info(
                f"Reading chunk from stream {self.name} between {pendulum.from_timestamp(ts / 1000)} and {pendulum.from_timestamp(end_ts / 1000)}"
            )
            yield from super().read(getter, params)
Exemplo n.º 11
0
    def state_filter(self, records: Iterator[dict]) -> Iterator[Any]:
        """Apply state filter to set of records, update cursor(state) if necessary in the end"""
        latest_cursor = None
        for record in records:
            cursor = pendulum.parse(record[self.state_pk])
            if self._state and self._state >= cursor:
                continue
            latest_cursor = max(cursor,
                                latest_cursor) if latest_cursor else cursor
            yield record

        if latest_cursor:
            stream_name = self.__class__.__name__
            if stream_name.endswith("API"):
                stream_name = stream_name[:-3]
            logger.info(
                f"Advancing bookmark for {stream_name} stream from {self._state} to {latest_cursor}"
            )
            self._state = max(latest_cursor,
                              self._state) if self._state else latest_cursor
Exemplo n.º 12
0
    def _acquire_access_token_from_refresh_token(self):
        payload = {
            "grant_type": "refresh_token",
            "redirect_uri": self._credentials["redirect_uri"],
            "refresh_token": self._credentials["refresh_token"],
            "client_id": self._credentials["client_id"],
            "client_secret": self._credentials["client_secret"],
        }

        resp = requests.post(self.BASE_URL + "/oauth/v1/token", data=payload)
        if resp.status_code == HTTPStatus.FORBIDDEN:
            raise HubspotInvalidAuth(resp.content, response=resp)

        resp.raise_for_status()
        auth = resp.json()
        self._credentials["access_token"] = auth["access_token"]
        self._credentials["refresh_token"] = auth["refresh_token"]
        self._credentials["token_expires"] = datetime.utcnow() + timedelta(
            seconds=auth["expires_in"] - 600)
        logger.info("Token refreshed. Expires at %s",
                    self._credentials["token_expires"])
Exemplo n.º 13
0
    def _run_job_until_completion(self, params) -> AdReportRun:
        # TODO parallelize running these jobs
        job = self._get_insights(params)
        logger.info(f"Created AdReportRun: {job} to sync insights with breakdown {self.breakdowns}")
        start_time = pendulum.now()
        sleep_seconds = 2
        while True:
            job = job.api_get()
            job_progress_pct = job["async_percent_completion"]
            logger.info(f"ReportRunId {job['report_run_id']} is {job_progress_pct}% complete")

            if job["async_status"] == "Job Completed":
                return job

            runtime = pendulum.now() - start_time
            if runtime > self.MAX_WAIT_TO_START and job_progress_pct == 0:
                raise JobTimeoutException(
                    f"AdReportRun {job} did not start after {runtime.in_seconds()} seconds. This is an intermittent error which may be fixed by retrying the job. Aborting."
                )
            elif runtime > self.MAX_WAIT_TO_FINISH:
                raise JobTimeoutException(
                    f"AdReportRun {job} did not finish after {runtime.in_seconds()} seconds. This is an intermittent error which may be fixed by retrying the job. Aborting."
                )
            logger.info(f"Sleeping {sleep_seconds} seconds while waiting for AdReportRun: {job} to complete")
            time.sleep(sleep_seconds)
            if sleep_seconds < self.MAX_ASYNC_SLEEP.in_seconds():
                sleep_seconds *= 2
Exemplo n.º 14
0
    def read(self,
             getter: Callable,
             params: Mapping[str, Any] = None) -> Iterator:
        """Apply state filter to set of records, update cursor(state) if necessary in the end"""
        latest_cursor = None
        # to track state, there is no guarantee that returned records sorted in ascending order. Having exact
        # boundary we could always ensure we don't miss records between states. In the future, if we would
        # like to save the state more often we can do this every batch
        for record in self.read_chunked(getter, params):
            yield record
            cursor = self._field_to_datetime(record[self.updated_at_field])
            latest_cursor = max(cursor,
                                latest_cursor) if latest_cursor else cursor

        if latest_cursor:
            new_state = max(latest_cursor,
                            self._state) if self._state else latest_cursor
            if new_state != self._state:
                logger.info(
                    f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}"
                )
                self._state = new_state
                self._start_date = self._state
Exemplo n.º 15
0
 def log_retry_attempt(details):
     _, exc, _ = sys.exc_info()
     logger.info(str(exc))
     logger.info(
         f"Caught retryable error after {details['tries']} tries. Waiting {details['wait']} more seconds then retrying..."
     )