def get(self, request): try: raise ValueError("An example error") except Exception: capture_exception() return Error500View.as_view()(request)
def get_processor(data_export, environment_id): try: if data_export.query_type == ExportQueryType.ISSUES_BY_TAG: payload = data_export.query_info processor = IssuesByTagProcessor( project_id=payload["project"][0], group_id=payload["group"], key=payload["key"], environment_id=environment_id, ) elif data_export.query_type == ExportQueryType.DISCOVER: processor = DiscoverProcessor( discover_query=data_export.query_info, organization_id=data_export.organization_id, ) else: raise ExportError( f"No processor found for this query type: {data_export.query_type}" ) return processor except ExportError as error: error_str = str(error) metrics.incr("dataexport.error", tags={"error": error_str}, sample_rate=1.0) logger.info(f"dataexport.error: {error_str}") capture_exception(error) raise
def get(self, request): try: raise ValueError('An example error') except Exception: capture_exception() return Error500View.as_view()(request)
def process_discover(data_export, file, export_limit, batch_size, environment_id): """ Convert the discovery query to a CSV, writing it to the provided file. """ try: processor = DiscoverProcessor( discover_query=data_export.query_info, organization_id=data_export.organization_id ) except ExportError as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info("dataexport.error: {}".format(six.text_type(error))) capture_exception(error) raise error writer = create_writer(file, processor.header_fields) iteration = 0 is_completed = False while not is_completed: offset = batch_size * iteration next_offset = batch_size * (iteration + 1) is_exceeding_limit = export_limit and export_limit < next_offset raw_data_unicode = processor.data_fn(offset=offset, limit=batch_size)["data"] # TODO(python3): Remove next line once the 'csv' module has been updated to Python 3 # See associated comment in './utils.py' raw_data = convert_to_utf8(raw_data_unicode) raw_data = processor.handle_fields(raw_data) if is_exceeding_limit: # Since the next offset will pass the export_limit, just write the remainder writer.writerows(raw_data[: export_limit % batch_size]) else: writer.writerows(raw_data) iteration += 1 # If there are no returned results, or we've passed the export_limit, stop iterating is_completed = len(raw_data) == 0 or is_exceeding_limit
def get(self, request, organization): """ List an Organization's Repositories ``````````````````````````````````` Return a list of version control repositories for a given organization. :pparam string organization_slug: the organization short name :auth: required """ if not self.has_feature(request, organization): return self.respond( { 'error_type': 'unavailable_feature', 'detail': ['You do not have that feature enabled'] }, status=403) queryset = Repository.objects.filter(organization_id=organization.id, ) status = request.GET.get('status', 'active') if status == 'active': queryset = queryset.filter(status=ObjectStatus.VISIBLE, ) elif status == 'deleted': queryset = queryset.exclude(status=ObjectStatus.VISIBLE, ) # TODO(mn): Remove once old Plugins are removed or everyone migrates to # the new Integrations. Hopefully someday? elif status == 'unmigratable': integrations = Integration.objects.filter( organizationintegration__organization=organization, organizationintegration__status=ObjectStatus.ACTIVE, provider__in=('bitbucket', 'github', 'vsts'), status=ObjectStatus.ACTIVE, ) repos = [] for i in integrations: try: repos.extend( i.get_installation( organization.id).get_unmigratable_repositories()) except Exception: capture_exception() # Don't rely on the Integration's API being available. If # it's not, the page should still render. continue return Response(serialize(repos, request.user)) elif status: queryset = queryset.none() return self.paginate( request=request, queryset=queryset, order_by='name', on_results=lambda x: serialize(x, request.user), paginator_cls=OffsetPaginator, )
def wrapped(*args, **kwargs): try: return func(*args, **kwargs) except exclude: raise except on as exc: capture_exception() current.retry(exc=exc)
def merge_export_blobs(data_export_id, **kwargs): try: data_export = ExportedData.objects.get(id=data_export_id) except ExportedData.DoesNotExist as error: logger.exception(error) return # adapted from `putfile` in `src/sentry/models/file.py` try: with transaction.atomic(): file = File.objects.create( name=data_export.file_name, type="export.csv", headers={"Content-Type": "text/csv"}, ) size = 0 file_checksum = sha1(b"") for export_blob in ExportedDataBlob.objects.filter( data_export=data_export).order_by("offset"): blob = export_blob.blob FileBlobIndex.objects.create(file=file, blob=blob, offset=size) size += blob.size blob_checksum = sha1(b"") for chunk in blob.getfile().chunks(): blob_checksum.update(chunk) file_checksum.update(chunk) if blob.checksum != blob_checksum.hexdigest(): raise AssembleChecksumMismatch("Checksum mismatch") file.size = size file.checksum = file_checksum.hexdigest() file.save() data_export.finalize_upload(file=file) logger.info("dataexport.end", extra={"data_export_id": data_export_id}) metrics.incr("dataexport.end", sample_rate=1.0) except Exception as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.error( "dataexport.error: %s", six.text_type(error), extra={ "query": data_export.payload, "org": data_export.organization_id }, ) capture_exception(error) if isinstance(error, IntegrityError): message = "Failed to save the assembled file." else: message = "Internal processing failure." return data_export.email_failure(message=message)
def get(self, request, organization): """ List an Organization's Repositories ``````````````````````````````````` Return a list of version control repositories for a given organization. :pparam string organization_slug: the organization short name :auth: required """ queryset = Repository.objects.filter(organization_id=organization.id) if not features.has( "organizations:integrations-ignore-vsts-deprecation", organization): queryset = queryset.exclude(provider="visualstudio") status = request.GET.get("status", "active") if status == "active": queryset = queryset.filter(status=ObjectStatus.VISIBLE) elif status == "deleted": queryset = queryset.exclude(status=ObjectStatus.VISIBLE) # TODO(mn): Remove once old Plugins are removed or everyone migrates to # the new Integrations. Hopefully someday? elif status == "unmigratable": integrations = Integration.objects.filter( organizationintegration__organization=organization, organizationintegration__status=ObjectStatus.ACTIVE, provider__in=("bitbucket", "github", "vsts"), status=ObjectStatus.ACTIVE, ) repos = [] for i in integrations: try: repos.extend( i.get_installation( organization.id).get_unmigratable_repositories()) except Exception: capture_exception() # Don't rely on the Integration's API being available. If # it's not, the page should still render. continue return Response(serialize(repos, request.user)) elif status: queryset = queryset.none() return self.paginate( request=request, queryset=queryset, order_by="name", on_results=lambda x: serialize(x, request.user), paginator_cls=OffsetPaginator, )
def run_callbacks(self, request): context = {} for cb in self._callbacks: try: result = cb(request) context.update(result) except Exception: capture_exception() return context
def get(self, request, organization): """ List an Organization's Repositories ``````````````````````````````````` Return a list of version control repositories for a given organization. :pparam string organization_slug: the organization short name :auth: required """ queryset = Repository.objects.filter( organization_id=organization.id, ) status = request.GET.get('status', 'active') if status == 'active': queryset = queryset.filter( status=ObjectStatus.VISIBLE, ) elif status == 'deleted': queryset = queryset.exclude( status=ObjectStatus.VISIBLE, ) # TODO(mn): Remove once old Plugins are removed or everyone migrates to # the new Integrations. Hopefully someday? elif status == 'unmigratable': integrations = Integration.objects.filter( organizationintegration__organization=organization, organizationintegration__status=ObjectStatus.ACTIVE, provider__in=('bitbucket', 'github', 'vsts'), status=ObjectStatus.ACTIVE, ) repos = [] for i in integrations: try: repos.extend(i.get_installation(organization.id) .get_unmigratable_repositories()) except Exception: capture_exception() # Don't rely on the Integration's API being available. If # it's not, the page should still render. continue return Response(serialize(repos, request.user)) elif status: queryset = queryset.none() return self.paginate( request=request, queryset=queryset, order_by='name', on_results=lambda x: serialize(x, request.user), paginator_cls=OffsetPaginator, )
def assemble_download(data_export_id): # Extract the ExportedData object try: logger.info("dataexport.start", extra={"data_export_id": data_export_id}) data_export = ExportedData.objects.get(id=data_export_id) except ExportedData.DoesNotExist as error: capture_exception(error) return # Create a temporary file try: with tempfile.TemporaryFile() as tf: # Process the query based on its type if data_export.query_type == ExportQueryType.ISSUES_BY_TAG: file_name = process_issue_by_tag(data_export, tf) elif data_export.query_type == ExportQueryType.DISCOVER: file_name = process_discover(data_export, tf) # Create a new File object and attach it to the ExportedData tf.seek(0) try: with transaction.atomic(): file = File.objects.create( name=file_name, type="export.csv", headers={"Content-Type": "text/csv"}) file.putfile(tf, logger=logger) data_export.finalize_upload(file=file) logger.info("dataexport.end", extra={"data_export_id": data_export_id}) except IntegrityError as error: metrics.incr("dataexport.error", instance=six.text_type(error)) logger.error( "dataexport.error: {}".format(six.text_type(error)), extra={ "query": data_export.payload, "org": data_export.organization_id }, ) raise DataExportError("Failed to save the assembled file") except DataExportError as error: return data_export.email_failure(message=error) except NotImplementedError as error: return data_export.email_failure(message=error) except BaseException as error: metrics.incr("dataexport.error", instance=six.text_type(error)) logger.error( "dataexport.error: {}".format(six.text_type(error)), extra={ "query": data_export.payload, "org": data_export.organization_id }, ) return data_export.email_failure(message="Internal processing failure")
def process_rows(processor, data_export, batch_size, offset): try: if data_export.query_type == ExportQueryType.ISSUES_BY_TAG: rows = process_issues_by_tag(processor, batch_size, offset) elif data_export.query_type == ExportQueryType.DISCOVER: rows = process_discover(processor, batch_size, offset) return rows except ExportError as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info("dataexport.error: {}".format(six.text_type(error))) capture_exception(error) raise
def wrapped(*args, **kwargs): try: return func(*args, **kwargs) except discover.InvalidSearchQuery as error: metrics.incr("dataexport.error", tags={"error": str(error)}, sample_rate=1.0) logger.warn("dataexport.error: %s", str(error)) capture_exception(error) raise ExportError( "Invalid query. Please fix the query and try again.") except snuba.QueryOutsideRetentionError as error: metrics.incr("dataexport.error", tags={"error": str(error)}, sample_rate=1.0) logger.warn("dataexport.error: %s", str(error)) capture_exception(error) raise ExportError( "Invalid date range. Please try a more recent date range.") except snuba.QueryIllegalTypeOfArgument as error: metrics.incr("dataexport.error", tags={"error": str(error)}, sample_rate=1.0) logger.warn("dataexport.error: %s", str(error)) capture_exception(error) raise ExportError( "Invalid query. Argument to function is wrong type.") except snuba.SnubaError as error: metrics.incr("dataexport.error", tags={"error": str(error)}, sample_rate=1.0) logger.warn("dataexport.error: %s", str(error)) capture_exception(error) message = "Internal error. Please try again." recoverable = False if isinstance( error, ( snuba.RateLimitExceeded, snuba.QueryMemoryLimitExceeded, snuba.QueryExecutionTimeMaximum, snuba.QueryTooManySimultaneous, ), ): message = "Query timeout. Please try again. If the problem persists try a smaller date range or fewer projects." recoverable = True elif isinstance( error, ( snuba.DatasetSelectionError, snuba.QueryConnectionFailed, snuba.QuerySizeExceeded, snuba.QueryExecutionError, snuba.SchemaValidationError, snuba.UnqualifiedQueryError, ), ): message = "Internal error. Your query failed to run." raise ExportError(message, recoverable=recoverable)
def handle_exception(self, request, exc): if hasattr(exc, "code") and exc.code == 503: sys.stderr.write(traceback.format_exc()) event_id = capture_exception() context = {"detail": str(exc), "errorId": event_id} response = Response(context, status=503) response.exception = True return response return super().handle_exception(request, exc)
def process_rows(processor, data_export, batch_size, offset): try: if data_export.query_type == ExportQueryType.ISSUES_BY_TAG: rows = process_issues_by_tag(processor, batch_size, offset) elif data_export.query_type == ExportQueryType.DISCOVER: rows = process_discover(processor, batch_size, offset) else: raise ExportError( f"No processor found for this query type: {data_export.query_type}" ) return rows except ExportError as error: error_str = str(error) metrics.incr("dataexport.error", tags={"error": error_str}, sample_rate=1.0) logger.info(f"dataexport.error: {error_str}") capture_exception(error) raise
def process_issues_by_tag(data_export, file, export_limit, batch_size, environment_id): """ Convert the tag query to a CSV, writing it to the provided file. """ payload = data_export.query_info try: processor = IssuesByTagProcessor( project_id=payload["project"][0], group_id=payload["group"], key=payload["key"], environment_id=environment_id, ) except ExportError as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info("dataexport.error: {}".format(six.text_type(error))) capture_exception(error) raise error writer = create_writer(file, processor.header_fields) iteration = 0 with snuba_error_handler(logger=logger): is_completed = False while not is_completed: offset = batch_size * iteration next_offset = batch_size * (iteration + 1) is_exceeding_limit = export_limit and export_limit < next_offset gtv_list_unicode = processor.get_serialized_data(limit=batch_size, offset=offset) # TODO(python3): Remove next line once the 'csv' module has been updated to Python 3 # See associated comment in './utils.py' gtv_list = convert_to_utf8(gtv_list_unicode) if is_exceeding_limit: # Since the next offset will pass the export_limit, just write the remainder writer.writerows(gtv_list[:export_limit % batch_size]) else: writer.writerows(gtv_list) iteration += 1 # If there are no returned results, or we've passed the export_limit, stop iterating is_completed = len(gtv_list) == 0 or is_exceeding_limit
def get_processor(data_export, environment_id): try: if data_export.query_type == ExportQueryType.ISSUES_BY_TAG: payload = data_export.query_info processor = IssuesByTagProcessor( project_id=payload["project"][0], group_id=payload["group"], key=payload["key"], environment_id=environment_id, ) elif data_export.query_type == ExportQueryType.DISCOVER: processor = DiscoverProcessor( discover_query=data_export.query_info, organization_id=data_export.organization_id, ) return processor except ExportError as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info("dataexport.error: {}".format(six.text_type(error))) capture_exception(error) raise
def handle_exception(self, request, exc): if exc.code == 503: sys.stderr.write(traceback.format_exc()) event_id = capture_exception() context = { 'detail': six.text_type(exc), 'errorId': event_id, } response = Response(context, status=503) response.exception = True return response return super(IntegrationEndpoint, self).handle_exception(request, exc)
def handle_exception(self, request, exc): if hasattr(exc, 'code') and exc.code == 503: sys.stderr.write(traceback.format_exc()) event_id = capture_exception() context = { 'detail': six.text_type(exc), 'errorId': event_id, } response = Response(context, status=503) response.exception = True return response return super(IntegrationEndpoint, self).handle_exception(request, exc)
def handle_exception(self, request, exc): try: response = super(Endpoint, self).handle_exception(exc) except Exception: import sys import traceback sys.stderr.write(traceback.format_exc()) event_id = capture_exception() context = {"detail": "Internal Error", "errorId": event_id} response = Response(context, status=500) response.exception = True return response
def process_issues_by_tag(data_export, file, limit, environment_id): """ Convert the tag query to a CSV, writing it to the provided file. """ payload = data_export.query_info try: processor = IssuesByTagProcessor( project_id=payload["project_id"], group_id=payload["group_id"], key=payload["key"], environment_id=environment_id, ) except ExportError as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info("dataexport.error: {}".format(six.text_type(error))) capture_exception(error) raise error # Iterate through all the GroupTagValues writer = create_writer(file, processor.header_fields) iteration = 0 with snuba_error_handler(logger=logger): while True: offset = SNUBA_MAX_RESULTS * iteration next_offset = SNUBA_MAX_RESULTS * (iteration + 1) gtv_list_unicode = processor.get_serialized_data(offset=offset) if len(gtv_list_unicode) == 0: break # TODO(python3): Remove next line once the 'csv' module has been updated to Python 3 # See associated comment in './utils.py' gtv_list = convert_to_utf8(gtv_list_unicode) if limit and limit < next_offset: # Since the next offset will pass the limit, write the remainder and quit writer.writerows(gtv_list[: limit % SNUBA_MAX_RESULTS]) break else: writer.writerows(gtv_list) iteration += 1
def snuba_error_handler(logger): try: yield except discover.InvalidSearchQuery as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info("dataexport.error: {}".format(six.text_type(error))) capture_exception(error) raise ExportError("Invalid query. Please fix the query and try again.") except snuba.QueryOutsideRetentionError as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info("dataexport.error: {}".format(six.text_type(error))) capture_exception(error) raise ExportError( "Invalid date range. Please try a more recent date range.") except snuba.QueryIllegalTypeOfArgument as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info("dataexport.error: {}".format(six.text_type(error))) capture_exception(error) raise ExportError("Invalid query. Argument to function is wrong type.") except snuba.SnubaError as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info("dataexport.error: {}".format(six.text_type(error))) capture_exception(error) message = "Internal error. Please try again." if isinstance( error, ( snuba.RateLimitExceeded, snuba.QueryMemoryLimitExceeded, snuba.QueryTooManySimultaneous, ), ): message = "Query timeout. Please try again. If the problem persists try a smaller date range or fewer projects." elif isinstance( error, (snuba.UnqualifiedQueryError, snuba.QueryExecutionError, snuba.SchemaValidationError), ): message = "Internal error. Your query failed to run." raise ExportError(message)
def handle_exception(self, request, exc): try: response = super(Endpoint, self).handle_exception(exc) except Exception as exc: import sys import traceback sys.stderr.write(traceback.format_exc()) event_id = capture_exception() context = { 'detail': 'Internal Error', 'errorId': event_id, } response = Response(context, status=500) response.exception = True return response
def inner_dsym_download(project_id: int, config_id: str) -> None: """Downloads the dSYMs from App Store Connect and stores them in the Project's debug files.""" with sdk.configure_scope() as scope: scope.set_tag("project", project_id) scope.set_tag("config_id", config_id) project = Project.objects.get(pk=project_id) config = appconnect.AppStoreConnectConfig.from_project_config( project, config_id) client = appconnect.AppConnectClient.from_config(config) listed_builds = client.list_builds() builds = process_builds(project=project, config=config, to_process=listed_builds) if not builds: return for i, (build, build_state) in enumerate(builds): with sdk.configure_scope() as scope: scope.set_context("dsym_downloads", { "total": len(builds), "completed": i }) with tempfile.NamedTemporaryFile() as dsyms_zip: try: client.download_dsyms(build, pathlib.Path(dsyms_zip.name)) # For no dSYMs, let the build be marked as fetched so they're not # repeatedly re-checked every time this task is run. except appconnect.NoDsymsError: logger.debug("No dSYMs for build %s", build) # Moves on to the next build so we don't check off fetched. This url will # eventuallyTM be populated, so revisit it at a later time. except appconnect.PendingDsymsError: logger.debug("dSYM url currently unavailable for build %s", build) continue # early-return in unauthorized and forbidden to avoid trying all the other builds # as well, since an expired token will error for all of them. # the error is also swallowed unreported because this is an expected and actionable # error. except appstoreconnect_api.UnauthorizedError: sentry_sdk.capture_message( "Not authorized to download dSYM using current App Store Connect credentials", level="info", ) return except appstoreconnect_api.ForbiddenError: sentry_sdk.capture_message( "Forbidden from downloading dSYM using current App Store Connect credentials", level="info", ) return # Don't let malformed URLs abort all pending downloads in case it's an isolated instance except ValueError as e: sdk.capture_exception(e) continue # Assume request errors are a server side issue and do not abort all the # pending downloads. except appstoreconnect_api.RequestError as e: sdk.capture_exception(e) continue except requests.RequestException as e: sdk.capture_exception(e) continue else: create_difs_from_dsyms_zip(dsyms_zip.name, project) logger.debug("Uploaded dSYMs for build %s", build) metrics.incr("tasks.app_store_connect.builds_ingested", sample_rate=1) build_state.fetched = True build_state.save()
def assemble_download(data_export_id, export_limit=EXPORTED_ROWS_LIMIT, batch_size=SNUBA_MAX_RESULTS, offset=0, bytes_written=0, environment_id=None, **kwargs): first_page = offset == 0 try: if first_page: logger.info("dataexport.start", extra={"data_export_id": data_export_id}) data_export = ExportedData.objects.get(id=data_export_id) if first_page: metrics.incr("dataexport.start", tags={"success": True}, sample_rate=1.0) logger.info("dataexport.run", extra={ "data_export_id": data_export_id, "offset": offset }) except ExportedData.DoesNotExist as error: if first_page: metrics.incr("dataexport.start", tags={"success": False}, sample_rate=1.0) logger.exception(error) return try: if export_limit is None: export_limit = EXPORTED_ROWS_LIMIT else: export_limit = min(export_limit, EXPORTED_ROWS_LIMIT) # if there is an export limit, the last batch should only return up to the export limit if export_limit is not None: batch_size = min(batch_size, max(export_limit - offset, 0)) processor = get_processor(data_export, environment_id) with tempfile.TemporaryFile() as tf: writer = csv.DictWriter(tf, processor.header_fields, extrasaction="ignore") if first_page: writer.writeheader() rows = process_rows(processor, data_export, batch_size, offset) writer.writerows(rows) next_offset = offset + len(rows) tf.seek(0) new_bytes_written = store_export_chunk_as_blob( data_export, bytes_written, tf) bytes_written += new_bytes_written except ExportError as error: return data_export.email_failure(message=six.text_type(error)) except Exception as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.error( "dataexport.error: %s", six.text_type(error), extra={ "query": data_export.payload, "org": data_export.organization_id }, ) capture_exception(error) try: current.retry() except MaxRetriesExceededError: return data_export.email_failure( message="Internal processing failure") else: if (rows and len(rows) >= batch_size and new_bytes_written and (export_limit is None or next_offset < export_limit)): assemble_download.delay( data_export_id, export_limit=export_limit, batch_size=batch_size, offset=next_offset, bytes_written=bytes_written, environment_id=environment_id, ) else: merge_export_blobs.delay(data_export_id)
def inner(*a, **k): try: return func(*a, **k) except Exception: capture_exception()
def assemble_download(data_export_id, limit=1000000, environment_id=None): # Get the ExportedData object try: logger.info("dataexport.start", extra={"data_export_id": data_export_id}) metrics.incr("dataexport.start", tags={"success": True}, sample_rate=1.0) data_export = ExportedData.objects.get(id=data_export_id) except ExportedData.DoesNotExist as error: metrics.incr("dataexport.start", tags={"success": False}, sample_rate=1.0) capture_exception(error) return # Create a temporary file try: with tempfile.TemporaryFile() as tf: # Process the query based on its type if data_export.query_type == ExportQueryType.ISSUES_BY_TAG: process_issues_by_tag(data_export=data_export, file=tf, limit=limit, environment_id=environment_id) elif data_export.query_type == ExportQueryType.DISCOVER: process_discover(data_export=data_export, file=tf, limit=limit, environment_id=environment_id) # Create a new File object and attach it to the ExportedData tf.seek(0) try: with transaction.atomic(): file = File.objects.create( name=data_export.file_name, type="export.csv", headers={"Content-Type": "text/csv"}, ) file.putfile(tf, logger=logger) data_export.finalize_upload(file=file) logger.info("dataexport.end", extra={"data_export_id": data_export_id}) metrics.incr("dataexport.end", sample_rate=1.0) except IntegrityError as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info( "dataexport.error: {}".format(six.text_type(error)), extra={ "query": data_export.payload, "org": data_export.organization_id }, ) capture_exception(error) raise ExportError("Failed to save the assembled file") except ExportError as error: return data_export.email_failure(message=six.text_type(error)) except BaseException as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.info( "dataexport.error: {}".format(six.text_type(error)), extra={ "query": data_export.payload, "org": data_export.organization_id }, ) capture_exception(error) return data_export.email_failure(message="Internal processing failure")
def merge_export_blobs(data_export_id, **kwargs): with sentry_sdk.start_span(op="merge"): try: data_export = ExportedData.objects.get(id=data_export_id) except ExportedData.DoesNotExist as error: logger.exception(error) return with sentry_sdk.configure_scope() as scope: if data_export.user: user = {} if data_export.user.id: user["id"] = data_export.user.id if data_export.user.username: user["username"] = data_export.user.username if data_export.user.email: user["email"] = data_export.user.email scope.user = user scope.set_tag("organization.slug", data_export.organization.slug) scope.set_tag("export.type", ExportQueryType.as_str(data_export.query_type)) scope.set_extra("export.query", data_export.query_info) # adapted from `putfile` in `src/sentry/models/file.py` try: with atomic_transaction(using=( router.db_for_write(File), router.db_for_write(FileBlobIndex), )): file = File.objects.create( name=data_export.file_name, type="export.csv", headers={"Content-Type": "text/csv"}, ) size = 0 file_checksum = sha1(b"") for export_blob in ExportedDataBlob.objects.filter( data_export=data_export).order_by("offset"): blob = FileBlob.objects.get(pk=export_blob.blob_id) FileBlobIndex.objects.create(file=file, blob=blob, offset=size) size += blob.size blob_checksum = sha1(b"") for chunk in blob.getfile().chunks(): blob_checksum.update(chunk) file_checksum.update(chunk) if blob.checksum != blob_checksum.hexdigest(): raise AssembleChecksumMismatch("Checksum mismatch") file.size = size file.checksum = file_checksum.hexdigest() file.save() # This is in a separate atomic transaction because in prod, files exist # outside of the primary database which means that the transaction to # the primary database is idle the entire time the writes the the files # database is happening. In the event the writes to the files database # takes longer than the idle timeout, the connection to the primary # database can timeout causing a failure. with atomic_transaction( using=router.db_for_write(ExportedData)): data_export.finalize_upload(file=file) time_elapsed = (timezone.now() - data_export.date_added).total_seconds() metrics.timing("dataexport.duration", time_elapsed, sample_rate=1.0) logger.info("dataexport.end", extra={"data_export_id": data_export_id}) metrics.incr("dataexport.end", tags={"success": True}, sample_rate=1.0) except Exception as error: metrics.incr("dataexport.error", tags={"error": str(error)}, sample_rate=1.0) metrics.incr( "dataexport.end", tags={ "success": False, "error": str(error) }, sample_rate=1.0, ) logger.error( "dataexport.error: %s", str(error), extra={ "query": data_export.payload, "org": data_export.organization_id }, ) capture_exception(error) if isinstance(error, IntegrityError): message = "Failed to save the assembled file." else: message = "Internal processing failure." return data_export.email_failure(message=message)
def assemble_download( data_export_id, export_limit=EXPORTED_ROWS_LIMIT, batch_size=SNUBA_MAX_RESULTS, offset=0, bytes_written=0, environment_id=None, **kwargs ): with sentry_sdk.start_transaction( op="task.data_export.assemble", name="DataExportAssemble", sampled=True, ): first_page = offset == 0 try: if first_page: logger.info("dataexport.start", extra={"data_export_id": data_export_id}) data_export = ExportedData.objects.get(id=data_export_id) if first_page: metrics.incr("dataexport.start", tags={"success": True}, sample_rate=1.0) logger.info( "dataexport.run", extra={"data_export_id": data_export_id, "offset": offset} ) except ExportedData.DoesNotExist as error: if first_page: metrics.incr("dataexport.start", tags={"success": False}, sample_rate=1.0) logger.exception(error) return with sentry_sdk.configure_scope() as scope: if data_export.user: user = {} if data_export.user.id: user["id"] = data_export.user.id if data_export.user.username: user["username"] = data_export.user.username if data_export.user.email: user["email"] = data_export.user.email scope.user = user scope.set_tag("organization.slug", data_export.organization.slug) scope.set_tag("export.type", ExportQueryType.as_str(data_export.query_type)) scope.set_extra("export.query", data_export.query_info) try: # ensure that the export limit is set and capped at EXPORTED_ROWS_LIMIT if export_limit is None: export_limit = EXPORTED_ROWS_LIMIT else: export_limit = min(export_limit, EXPORTED_ROWS_LIMIT) processor = get_processor(data_export, environment_id) with tempfile.TemporaryFile(mode="w+b") as tf: # XXX(python3): # # In python2 land we write utf-8 encoded strings as bytes via # the csv writer (see convert_to_utf8). The CSV writer will # ONLY write bytes, even if you give it unicode it will convert # it to bytes. # # In python3 we write unicode strings (which is all the csv # module is able to do, it will NOT write bytes like in py2). # Because of this we use the codec getwriter to transform our # file handle to a stream writer that will encode to utf8. if six.PY2: tfw = tf else: tfw = codecs.getwriter("utf-8")(tf) writer = csv.DictWriter(tfw, processor.header_fields, extrasaction="ignore") if first_page: writer.writeheader() # the position in the file at the end of the headers starting_pos = tf.tell() # the row offset relative to the start of the current task # this offset tells you the number of rows written during this batch fragment fragment_offset = 0 # the absolute row offset from the beginning of the export next_offset = offset + fragment_offset while True: # the number of rows to export in the next batch fragment fragment_row_count = min(batch_size, max(export_limit - next_offset, 1)) rows = process_rows(processor, data_export, fragment_row_count, next_offset) writer.writerows(rows) fragment_offset += len(rows) next_offset = offset + fragment_offset if ( not rows or len(rows) < batch_size # the batch may exceed MAX_BATCH_SIZE but immediately stops or tf.tell() - starting_pos >= MAX_BATCH_SIZE ): break tf.seek(0) new_bytes_written = store_export_chunk_as_blob(data_export, bytes_written, tf) bytes_written += new_bytes_written except ExportError as error: return data_export.email_failure(message=six.text_type(error)) except Exception as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.error( "dataexport.error: %s", six.text_type(error), extra={"query": data_export.payload, "org": data_export.organization_id}, ) capture_exception(error) try: current.retry() except MaxRetriesExceededError: metrics.incr( "dataexport.end", tags={"success": False, "error": six.text_type(error)}, sample_rate=1.0, ) return data_export.email_failure(message="Internal processing failure") else: if ( rows and len(rows) >= batch_size and new_bytes_written and next_offset < export_limit ): assemble_download.delay( data_export_id, export_limit=export_limit, batch_size=batch_size, offset=next_offset, bytes_written=bytes_written, environment_id=environment_id, ) else: metrics.timing("dataexport.row_count", next_offset, sample_rate=1.0) metrics.timing("dataexport.file_size", bytes_written, sample_rate=1.0) merge_export_blobs.delay(data_export_id)
def merge_export_blobs(data_export_id, **kwargs): with sentry_sdk.start_transaction( op="task.data_export.merge", name="DataExportMerge", sampled=True, ): try: data_export = ExportedData.objects.get(id=data_export_id) except ExportedData.DoesNotExist as error: logger.exception(error) return with sentry_sdk.configure_scope() as scope: if data_export.user: user = {} if data_export.user.id: user["id"] = data_export.user.id if data_export.user.username: user["username"] = data_export.user.username if data_export.user.email: user["email"] = data_export.user.email scope.user = user scope.user = user scope.set_tag("organization.slug", data_export.organization.slug) scope.set_tag("export.type", ExportQueryType.as_str(data_export.query_type)) scope.set_extra("export.query", data_export.query_info) # adapted from `putfile` in `src/sentry/models/file.py` try: with transaction.atomic(): file = File.objects.create( name=data_export.file_name, type="export.csv", headers={"Content-Type": "text/csv"}, ) size = 0 file_checksum = sha1(b"") for export_blob in ExportedDataBlob.objects.filter( data_export=data_export ).order_by("offset"): blob = export_blob.blob FileBlobIndex.objects.create(file=file, blob=blob, offset=size) size += blob.size blob_checksum = sha1(b"") for chunk in blob.getfile().chunks(): blob_checksum.update(chunk) file_checksum.update(chunk) if blob.checksum != blob_checksum.hexdigest(): raise AssembleChecksumMismatch("Checksum mismatch") file.size = size file.checksum = file_checksum.hexdigest() file.save() data_export.finalize_upload(file=file) time_elapsed = (timezone.now() - data_export.date_added).total_seconds() metrics.timing("dataexport.duration", time_elapsed, sample_rate=1.0) logger.info("dataexport.end", extra={"data_export_id": data_export_id}) metrics.incr("dataexport.end", tags={"success": True}, sample_rate=1.0) except Exception as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) metrics.incr( "dataexport.end", tags={"success": False, "error": six.text_type(error)}, sample_rate=1.0, ) logger.error( "dataexport.error: %s", six.text_type(error), extra={"query": data_export.payload, "org": data_export.organization_id}, ) capture_exception(error) if isinstance(error, IntegrityError): message = "Failed to save the assembled file." else: message = "Internal processing failure." return data_export.email_failure(message=message)
def assemble_download(data_export_id, export_limit=EXPORTED_ROWS_LIMIT, batch_size=SNUBA_MAX_RESULTS, offset=0, bytes_written=0, environment_id=None, **kwargs): first_page = offset == 0 try: if first_page: logger.info("dataexport.start", extra={"data_export_id": data_export_id}) data_export = ExportedData.objects.get(id=data_export_id) if first_page: metrics.incr("dataexport.start", tags={"success": True}, sample_rate=1.0) logger.info("dataexport.run", extra={ "data_export_id": data_export_id, "offset": offset }) except ExportedData.DoesNotExist as error: if first_page: metrics.incr("dataexport.start", tags={"success": False}, sample_rate=1.0) logger.exception(error) return try: # ensure that the export limit is set and capped at EXPORTED_ROWS_LIMIT if export_limit is None: export_limit = EXPORTED_ROWS_LIMIT else: export_limit = min(export_limit, EXPORTED_ROWS_LIMIT) processor = get_processor(data_export, environment_id) with tempfile.TemporaryFile() as tf: writer = csv.DictWriter(tf, processor.header_fields, extrasaction="ignore") if first_page: writer.writeheader() # the position in the file at the end of the headers starting_pos = tf.tell() # the row offset relative to the start of the current task # this offset tells you the number of rows written during this batch fragment fragment_offset = 0 # the absolute row offset from the beginning of the export next_offset = offset + fragment_offset while True: # the number of rows to export in the next batch fragment fragment_row_count = min(batch_size, max(export_limit - next_offset, 1)) rows = process_rows(processor, data_export, fragment_row_count, next_offset) writer.writerows(rows) fragment_offset += len(rows) next_offset = offset + fragment_offset if (not rows or len(rows) < batch_size # the batch may exceed MAX_BATCH_SIZE but immediately stops or tf.tell() - starting_pos >= MAX_BATCH_SIZE): break tf.seek(0) new_bytes_written = store_export_chunk_as_blob( data_export, bytes_written, tf) bytes_written += new_bytes_written except ExportError as error: return data_export.email_failure(message=six.text_type(error)) except Exception as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.error( "dataexport.error: %s", six.text_type(error), extra={ "query": data_export.payload, "org": data_export.organization_id }, ) capture_exception(error) try: current.retry() except MaxRetriesExceededError: metrics.incr( "dataexport.end", tags={ "success": False, "error": six.text_type(error) }, sample_rate=1.0, ) return data_export.email_failure( message="Internal processing failure") else: if rows and len( rows ) >= batch_size and new_bytes_written and next_offset < export_limit: assemble_download.delay( data_export_id, export_limit=export_limit, batch_size=batch_size, offset=next_offset, bytes_written=bytes_written, environment_id=environment_id, ) else: metrics.timing("dataexport.row_count", next_offset) metrics.timing("dataexport.file_size", bytes_written) merge_export_blobs.delay(data_export_id)
def inner_dsym_download(project_id: int, config_id: str) -> None: """Downloads the dSYMs from App Store Connect and stores them in the Project's debug files.""" with sdk.configure_scope() as scope: scope.set_tag("project", project_id) scope.set_tag("config_id", config_id) project = Project.objects.get(pk=project_id) config = appconnect.AppStoreConnectConfig.from_project_config( project, config_id) client = appconnect.AppConnectClient.from_config(config) listed_builds = client.list_builds() builds = process_builds(project=project, config=config, to_process=listed_builds) if not builds: # No point in trying to see if we have valid iTunes credentials. return try: itunes_client = client.itunes_client() except itunes_connect.SessionExpiredError: logger.debug("No valid iTunes session, can not download dSYMs") return for i, (build, build_state) in enumerate(builds): with sdk.configure_scope() as scope: scope.set_context("dsym_downloads", { "total": len(builds), "completed": i }) with tempfile.NamedTemporaryFile() as dsyms_zip: try: itunes_client.download_dsyms(build, pathlib.Path(dsyms_zip.name)) except appconnect.NoDsymsError: logger.debug("No dSYMs for build %s", build) except itunes_connect.SessionExpiredError: logger.debug("Error fetching dSYMs: expired iTunes session") # we early-return here to avoid trying all the other builds # as well, since an expired token will error for all of them. # we also swallow the error and not report it because this is # a totally expected error and not actionable. return except itunes_connect.ForbiddenError: sentry_sdk.capture_message( "Forbidden iTunes dSYM download, probably switched to wrong org", level="info") return except requests.RequestException as e: # Assume these are errors with the server side and do not abort all the # pending downloads. sdk.capture_exception(e) continue else: create_difs_from_dsyms_zip(dsyms_zip.name, project) logger.debug("Uploaded dSYMs for build %s", build) # If we either downloaded, or didn't need to download the dSYMs # (there was no dSYM url), we check off this build. build_state.fetched = True build_state.save()
def dispatch(self, request, pipeline): if "finish_pipeline" in request.GET: return pipeline.finish_pipeline() organization = pipeline.organization account_number = pipeline.fetch_state("account_number") region = pipeline.fetch_state("region") project_id = pipeline.fetch_state("project_id") aws_external_id = pipeline.fetch_state("aws_external_id") enabled_lambdas = pipeline.fetch_state("enabled_lambdas") sentry_project_dsn = get_dsn_for_project(organization.id, project_id) lambda_client = gen_aws_client(account_number, region, aws_external_id) lambda_functions = get_supported_functions(lambda_client) lambda_functions.sort(key=lambda x: x["FunctionName"].lower()) def is_lambda_enabled(function): name = function["FunctionName"] # check to see if the user wants to enable this function return enabled_lambdas.get(name) lambda_functions = filter(is_lambda_enabled, lambda_functions) def _enable_lambda(function): try: enable_single_lambda(lambda_client, function, sentry_project_dsn) return (True, function, None) except Exception as e: return (False, function, e) failures = [] success_count = 0 with ThreadPoolExecutor(max_workers=10) as _lambda_setup_thread_pool: # use threading here to parallelize requests # no timeout on the thread since the underlying request will time out # if it takes too long for success, function, e in _lambda_setup_thread_pool.map( _enable_lambda, lambda_functions ): name = function["FunctionName"] if success: success_count += 1 else: # need to make sure we catch any error to continue to the next function err_message = str(e) is_custom_err, err_message = get_sentry_err_message(err_message) if not is_custom_err: capture_exception(e) err_message = _("Unknown Error") failures.append({"name": function["FunctionName"], "error": err_message}) logger.info( "update_function_configuration.error", extra={ "organization_id": organization.id, "lambda_name": name, "account_number": account_number, "region": region, "error": str(e), }, ) analytics.record( "integrations.serverless_setup", user_id=request.user.id, organization_id=organization.id, integration="aws_lambda", success_count=success_count, failure_count=len(failures), ) # if we have failures, show them to the user # otherwise, finish if failures: return self.render_react_view( request, "awsLambdaFailureDetails", {"lambdaFunctionFailures": failures, "successCount": success_count}, ) else: return pipeline.finish_pipeline()