def _send_request(self, request: requests.PreparedRequest, request_kwargs: Mapping[str, Any]) -> requests.Response: """ Creates backoff wrappers which are responsible for retry logic """ """ Backoff package has max_tries parameter that means total number of tries before giving up, so if this number is 0 no calls expected to be done. But for this class we call it max_REtries assuming there would be at least one attempt and some retry attempts, to comply this logic we add 1 to expected retries attempts. """ max_tries = self.max_retries """ According to backoff max_tries docstring: max_tries: The maximum number of attempts to make before giving up ...The default value of None means there is no limit to the number of tries. This implies that if max_tries is excplicitly set to None there is no limit to retry attempts, otherwise it is limited number of tries. But this is not true for current version of backoff packages (1.8.0). Setting max_tries to 0 or negative number would result in endless retry atempts. Add this condition to avoid an endless loop if it hasnt been set explicitly (i.e. max_retries is not None). """ if max_tries is not None: max_tries = max(0, max_tries) + 1 AirbyteSentry.set_context("request", {"url": request.url, "headers": request.headers, "args": request_kwargs}) with AirbyteSentry.start_transaction_span(op="_send_request"): user_backoff_handler = user_defined_backoff_handler(max_tries=max_tries)(self._send) backoff_handler = default_backoff_handler(max_tries=max_tries, factor=self.retry_factor) return backoff_handler(user_backoff_handler)(request, request_kwargs)
def _send(self, request: requests.PreparedRequest, request_kwargs: Mapping[str, Any]) -> requests.Response: """ Wraps sending the request in rate limit and error handlers. Please note that error handling for HTTP status codes will be ignored if raise_on_http_errors is set to False This method handles two types of exceptions: 1. Expected transient exceptions e.g: 429 status code. 2. Unexpected transient exceptions e.g: timeout. To trigger a backoff, we raise an exception that is handled by the backoff decorator. If an exception is not handled by the decorator will fail the sync. For expected transient exceptions, backoff time is determined by the type of exception raised: 1. CustomBackoffException uses the user-provided backoff value 2. DefaultBackoffException falls back on the decorator's default behavior e.g: exponential backoff Unexpected transient exceptions use the default backoff parameters. Unexpected persistent exceptions are not handled and will cause the sync to fail. """ AirbyteSentry.add_breadcrumb(message=f"Issue {request.url}", data=request_kwargs) with AirbyteSentry.start_transaction_span(op="_send", description=request.url): response: requests.Response = self._session.send(request, **request_kwargs) if self.should_retry(response): custom_backoff_time = self.backoff_time(response) if custom_backoff_time: raise UserDefinedBackoffException(backoff=custom_backoff_time, request=request, response=response) else: raise DefaultBackoffException(request=request, response=response) elif self.raise_on_http_errors: # Raise any HTTP exceptions that happened in case there were unexpected ones response.raise_for_status() return response
def configure_sentry(self, spec_schema: Dict[str, Any], parsed_args: argparse.Namespace): secret_values = [] if "config" in parsed_args: config = self.source.read_config(parsed_args.config) secret_values = get_secret_values(spec_schema, config) source_name = self.source.__module__.split(".")[0] source_name = source_name.split("_", 1)[-1] AirbyteSentry.init(source_tag=source_name, secret_values=secret_values)
def read_records( self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: stream_state = stream_state or {} pagination_complete = False next_page_token = None with AirbyteSentry.start_transaction("read_records", self.name), AirbyteSentry.start_transaction_span("read_records"): while not pagination_complete: request_headers = self.request_headers( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) request = self._create_prepared_request( path=self.path(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), headers=dict(request_headers, **self.authenticator.get_auth_header()), params=self.request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), json=self.request_body_json(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), data=self.request_body_data(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), ) request_kwargs = self.request_kwargs(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) if self.use_cache: # use context manager to handle and store cassette metadata with self.cache_file as cass: self.cassete = cass # vcr tries to find records based on the request, if such records exist, return from cache file # else make a request and save record in cache file response = self._send_request(request, request_kwargs) else: response = self._send_request(request, request_kwargs) yield from self.parse_response(response, stream_state=stream_state, stream_slice=stream_slice) next_page_token = self.next_page_token(response) if not next_page_token: pagination_complete = True # Always return an empty generator just in case no records were ever yielded yield from []
def test_sentry_init(sentry_mock): AirbyteSentry.init("test_source") assert sentry_mock.init.called sentry_mock.set_tag.assert_any_call("source", "test_source") sentry_mock.set_tag.assert_any_call("run_id", mock.ANY) assert AirbyteSentry.sentry_enabled AirbyteSentry.set_tag("tagname", "value") assert sentry_mock.set_tag.called AirbyteSentry.add_breadcrumb("msg", data={}) assert sentry_mock.add_breadcrumb.called with AirbyteSentry.start_transaction("name", "op"): assert sentry_mock.start_transaction.called with AirbyteSentry.start_transaction_span("name", "op"): assert sentry_mock.start_span.called
def test_sentry_init_no_env(sentry_mock): assert AirbyteSentry.DSN_ENV_NAME not in os.environ AirbyteSentry.init("test_source") assert not sentry_mock.init.called assert not AirbyteSentry.sentry_enabled AirbyteSentry.set_tag("tagname", "value") assert not sentry_mock.set_tag.called AirbyteSentry.add_breadcrumb("msg", data={}) assert not sentry_mock.add_breadcrumb.called with AirbyteSentry.start_transaction("name", "op"): assert not sentry_mock.start_transaction.called with AirbyteSentry.start_transaction_span("name", "op"): assert not sentry_mock.start_span.called
def test_sentry_sensitive_info_transactions(httpserver): SECRET = "SOME_secret" SECRETS = [SECRET] UNEXPECTED_SECRET = "UnexEpectedSecret" transport = TestTransport(secrets=[*SECRETS, UNEXPECTED_SECRET]) AirbyteSentry.init("test_source", transport=transport, secret_values=SECRETS) AirbyteSentry.set_context("my secret", {"api_key": SECRET}) AirbyteSentry.set_context("headers", {"Authorization": f"Bearer {UNEXPECTED_SECRET}"}) with AirbyteSentry.start_transaction("name", "op"): with AirbyteSentry.start_transaction_span( "name", description=f"http://localhost:{httpserver.port}/test?api_key={UNEXPECTED_SECRET}" ): requests.get( f"http://localhost:{httpserver.port}/test?api_key={SECRET}", headers={"Authorization": f"Bearer {SECRET}"}, ).text assert not transport.failed
def test_sentry_sensitive_info(httpserver): SECRET = "SOME_secret" UNEXPECTED_SECRET = "UnexEpectedSecret" SECRETS = [SECRET] transport = TestTransport(secrets=[*SECRETS, UNEXPECTED_SECRET]) AirbyteSentry.init("test_source", transport=transport, secret_values=SECRETS) AirbyteSentry.add_breadcrumb("msg", {"crumb": SECRET}) AirbyteSentry.set_context("my secret", {"api_key": SECRET}) AirbyteSentry.capture_message(f"this is {SECRET}") AirbyteSentry.capture_message(f"Issue url http://localhost:{httpserver.port}/test?api_key={UNEXPECTED_SECRET}") AirbyteSentry.capture_message(f"Issue url http://localhost:{httpserver.port}/test?access_token={UNEXPECTED_SECRET}") AirbyteSentry.capture_message(f"Issue url http://localhost:{httpserver.port}/test?refresh_token={UNEXPECTED_SECRET}") AirbyteSentry.set_context("headers", {"Authorization": f"Bearer {UNEXPECTED_SECRET}"}) getLogger("airbyte").info(f"this is {SECRET}") requests.get( f"http://localhost:{httpserver.port}/test?api_key={SECRET}", headers={"Authorization": f"Bearer {SECRET}"}, ).text requests.get( f"http://localhost:{httpserver.port}/test?api_key={UNEXPECTED_SECRET}", headers={"Authorization": f"Bearer {UNEXPECTED_SECRET}"}, ).text AirbyteSentry.capture_exception(Exception(f"Secret info: {SECRET}")) assert not transport.failed