def create_failed_event(cache_key, project_id, issues, event_id, start_time=None, reprocessing_rev=None): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ reprocessing_active = ProjectOption.objects.get_value( project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT) # In case there is reprocessing active but the current reprocessing # revision is already different than when we started, we want to # immediately retry the event. This resolves the problem when # otherwise a concurrent change of debug symbols might leave a # reprocessing issue stuck in the project forever. if reprocessing_active and \ reprocessing.get_reprocessing_revision(project_id, cached=False) != \ reprocessing_rev: raise RetryProcessing() # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, 'sentry:sent_failed_event_hint', False) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={ 'reprocessing_active': reprocessing_active, 'issues': issues }, ).send_notification() ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'raw' }, skip_internal=False) error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key}) return True data = CanonicalKeyDict(data) from sentry.models import RawEvent, ProcessingIssue raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp( data['timestamp']).replace(tzinfo=timezone.utc), data=data) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue['scope'], object=issue['object'], type=issue['type'], data=issue['data'], ) default_cache.delete(cache_key) return True
def _do_process_event(cache_key, start_time, event_id, process_task): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'process' }) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project = data['project'] Raven.tags_context({ 'project': project, }) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project) # Stacktrace based event processors. These run before anything else. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data[ 'project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. process_task.delay(cache_key, start_time=start_time, event_id=event_id) return # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) default_cache.set(cache_key, data, 3600) save_event.delay(cache_key=cache_key, data=None, start_time=start_time, event_id=event_id, project_id=project)
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'process' }, skip_internal=False) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project_id = data['project'] with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in (enhancers or ()): enhanced = safe_execute(enhancer, data) if enhanced: data = enhanced has_changed = True try: # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and (time() - start_time) > 3600: raise RuntimeError('Event spent one hour in processing') retry_process_event.apply_async(args=(), kwargs={ 'process_task_name': process_task.__name__, 'task_kwargs': { 'cache_key': cache_key, 'event_id': event_id, 'start_time': start_time, } }, countdown=e.retry_after) return # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data[ 'project'] == project_id, 'Project cannot be mutated by preprocessor' project = Project.objects.get_from_cache(id=project_id) # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def _do_process_event( cache_key, start_time, event_id, process_task, data=None, data_has_changed=None, from_symbolicate=False, ): from sentry.plugins.base import plugins if data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "process" }, skip_internal=False) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_event_project(project_id) event_id = data["event_id"] if killswitch_matches_context( "store.load-shed-process-event-projects", { "project_id": project_id, "event_id": event_id, "platform": data.get("platform") or "null", }, ): return with sentry_sdk.start_span( op="tasks.store.process_event.get_project_from_cache"): project = Project.objects.get_from_cache(id=project_id) with metrics.timer( "tasks.store.process_event.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) has_changed = bool(data_has_changed) with sentry_sdk.start_span( op="tasks.store.process_event.get_reprocessing_revision"): # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) # Stacktrace based event processors. with sentry_sdk.start_span(op="task.store.process_event.stacktraces"): with metrics.timer("tasks.store.process_event.stacktraces", tags={"from_symbolicate": from_symbolicate}): new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # Second round of datascrubbing after stacktrace and language-specific # processing. First round happened as part of ingest. # # *Right now* the only sensitive data that is added in stacktrace # processing are usernames in filepaths, so we run directly after # stacktrace processors. # # We do not yet want to deal with context data produced by plugins like # sessionstack or fullstory (which are in `get_event_preprocessors`), as # this data is very unlikely to be sensitive data. This is why scrubbing # happens somewhere in the middle of the pipeline. # # On the other hand, Javascript event error translation is happening after # this block because it uses `get_event_preprocessors` instead of # `get_event_enhancers`. # # We are fairly confident, however, that this should run *before* # re-normalization as it is hard to find sensitive data in partially # trimmed strings. if has_changed and options.get("processing.can-use-scrubbers"): with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"): with metrics.timer("tasks.store.datascrubbers.scrub", tags={"from_symbolicate": from_symbolicate}): new_data = safe_execute(scrub_data, project=project, event=data.data) # XXX(markus): When datascrubbing is finally "totally stable", we might want # to drop the event if it crashes to avoid saving PII if new_data is not None: data.data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): with sentry_sdk.start_span( op="task.store.process_event.preprocessors") as span: span.set_data("plugin", plugin.slug) span.set_data("from_symbolicate", from_symbolicate) with metrics.timer( "tasks.store.process_event.preprocessors", tags={ "plugin": plugin.slug, "from_symbolicate": from_symbolicate }, ): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in processors or (): try: result = processor(data) except Exception: error_logger.exception( "tasks.store.preprocessors.error") data.setdefault("_metrics", {})["flag.processing.error"] = True has_changed = True else: if result: data = result has_changed = True assert data[ "project"] == project_id, "Project cannot be mutated by plugins" # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer(remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke ourselves again. This happens when the reprocessing # revision changed while we were processing. _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project) return cache_key = event_processing_store.store(data) from_reprocessing = process_task is process_event_from_reprocessing submit_save_event(project, from_reprocessing, cache_key, event_id, start_time, data)
def create_failed_event(cache_key, data, project_id, issues, event_id, start_time=None, reprocessing_rev=None): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ set_current_event_project(project_id) # We can only create failed events for events that can potentially # create failed events. if not reprocessing.event_supports_reprocessing(data): return False # If this event has just been reprocessed with reprocessing-v2, we don't # put it through reprocessing-v1 again. The value of reprocessing-v2 is # partially that one sees the entire event even in its failed state, all # the time. if reprocessing2.is_reprocessed_event(data): return False reprocessing_active = ProjectOption.objects.get_value( project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT) # In case there is reprocessing active but the current reprocessing # revision is already different than when we started, we want to # immediately retry the event. This resolves the problem when # otherwise a concurrent change of debug symbols might leave a # reprocessing issue stuck in the project forever. if (reprocessing_active and reprocessing.get_reprocessing_revision( project_id, cached=False) != reprocessing_rev): raise RetryProcessing() # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, "sentry:sent_failed_event_hint", False) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={ "reprocessing_active": reprocessing_active, "issues": issues }, ).send_notification() ProjectOption.objects.set_value(project, "sentry:sent_failed_event_hint", True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "raw" }, skip_internal=False) error_logger.error("process.failed_raw.empty", extra={"cache_key": cache_key}) return True data = CanonicalKeyDict(data) from sentry.models import ProcessingIssue, RawEvent raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp( data["timestamp"]).replace(tzinfo=timezone.utc), data=data, ) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue["scope"], object=issue["object"], type=issue["type"], data=issue["data"], ) event_processing_store.delete_by_key(cache_key) return True
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None, reprocessing_rev=None): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ reprocessing_active = ProjectOption.objects.get_value( project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT ) # In case there is reprocessing active but the current reprocessing # revision is already different than when we started, we want to # immediately retry the event. This resolves the problem when # otherwise a concurrent change of debug symbols might leave a # reprocessing issue stuck in the project forever. if reprocessing_active and \ reprocessing.get_reprocessing_revision(project_id, cached=False) != \ reprocessing_rev: raise RetryProcessing() # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, 'sentry:sent_failed_event_hint', False ) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={'reprocessing_active': reprocessing_active, 'issues': issues}, ).send_notification() ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'}, skip_internal=False) error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key}) return True data = CanonicalKeyDict(data) from sentry.models import RawEvent, ProcessingIssue raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp(data['timestamp']).replace(tzinfo=timezone.utc), data=data ) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue['scope'], object=issue['object'], type=issue['type'], data=issue['data'], ) default_cache.delete(cache_key) return True
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins.base import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "process" }, skip_internal=False) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] event_id = data["event_id"] with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) try: # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in enhancers or (): enhanced = safe_execute( enhancer, data, _passthrough_errors=(RetrySymbolication, )) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and (time() - start_time) > 120: error_logger.warning("process.slow", extra={ "project_id": project_id, "event_id": event_id }) if start_time and (time() - start_time) > 3600: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) error_logger.exception( "process.failed.infinite_retry", extra={ "project_id": project_id, "event_id": event_id }, ) else: retry_process_event.apply_async( args=(), kwargs={ "process_task_name": process_task.__name__, "task_kwargs": { "cache_key": cache_key, "event_id": event_id, "start_time": start_time, }, }, countdown=e.retry_after, ) return # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in processors or (): result = safe_execute(processor, data) if result: data = result has_changed = True assert data[ "project"] == project_id, "Project cannot be mutated by preprocessor" project = Project.objects.get_from_cache(id=project_id) # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer(remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'process'}, skip_internal=False) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project_id = data['project'] with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) try: # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in (enhancers or ()): enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,)) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and (time() - start_time) > 3600: raise RuntimeError('Event spent one hour in processing') retry_process_event.apply_async( args=(), kwargs={ 'process_task_name': process_task.__name__, 'task_kwargs': { 'cache_key': cache_key, 'event_id': event_id, 'start_time': start_time, } }, countdown=e.retry_after ) return # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project_id, 'Project cannot be mutated by preprocessor' project = Project.objects.get_from_cache(id=project_id) # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins.base import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "process"}, skip_internal=False ) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] event_id = data["event_id"] project = Project.objects.get_from_cache(id=project_id) with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) try: # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in enhancers or (): enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,)) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT: error_logger.warning( "process.slow", extra={"project_id": project_id, "event_id": event_id} ) if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) error_logger.exception( "process.failed.infinite_retry", extra={"project_id": project_id, "event_id": event_id}, ) else: retry_process_event.apply_async( args=(), kwargs={ "process_task_name": process_task.__name__, "task_kwargs": { "cache_key": cache_key, "event_id": event_id, "start_time": start_time, }, }, countdown=e.retry_after, ) return # Second round of datascrubbing after stacktrace and language-specific # processing. First round happened as part of ingest. # # We assume that all potential PII is produced as part of stacktrace # processors and event enhancers. # # We assume that plugins for eg sessionstack (running via # `plugin.get_event_preprocessors`) are not producing data that should be # PII-stripped, ever. # # XXX(markus): Javascript event error translation is happening after this block # because it uses `get_event_preprocessors` instead of # `get_event_enhancers`, possibly move? if has_changed and features.has( "organizations:datascrubbers-v2", project.organization, actor=None ): with metrics.timer("tasks.store.datascrubbers.scrub"): project_config = get_project_config(project) new_data = safe_execute(scrub_data, project_config=project_config, event=data.data) # XXX(markus): When datascrubbing is finally "totally stable", we might want # to drop the event if it crashes to avoid saving PII if new_data is not None: data.data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in processors or (): result = safe_execute(processor, data) if result: data = result has_changed = True assert data["project"] == project_id, "Project cannot be mutated by plugins" # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer( remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS ) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def _do_process_event(cache_key, start_time, event_id, process_task): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'}) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return project = data['project'] Raven.tags_context({ 'project': project, }) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project) # Stacktrace based event processors. These run before anything else. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) save_event.delay( cache_key=cache_key, data=None, start_time=start_time, event_id=event_id, project_id=project )
def _do_process_event(cache_key, start_time, event_id, process_task): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'process'}, skip_internal=False) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project = data['project'] with configure_scope() as scope: scope.set_tag("project", project) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project) # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in (enhancers or ()): enhanced = safe_execute(enhancer, data) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. process_task.delay(cache_key, start_time=start_time, event_id=event_id) return # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) default_cache.set(cache_key, data, 3600) save_event.delay( cache_key=cache_key, data=None, start_time=start_time, event_id=event_id, project_id=project )
def _do_process_event( cache_key, start_time, event_id, process_task, data=None, data_has_changed=None, new_process_behavior=None, ): from sentry.plugins.base import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "process" }, skip_internal=False) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_project(project_id) event_id = data["event_id"] project = Project.objects.get_from_cache(id=project_id) has_changed = bool(data_has_changed) new_process_behavior = bool(new_process_behavior) metrics.incr("tasks.store.process_event.new_process_behavior", tags={"value": new_process_behavior}) # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) try: if not new_process_behavior: # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): with metrics.timer("tasks.store.process_event.enhancers", tags={"plugin": plugin.slug}): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in enhancers or (): enhanced = safe_execute( enhancer, data, _passthrough_errors=(RetrySymbolication, )) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. with metrics.timer("tasks.store.process_event.stacktraces"): new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and ( time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT: error_logger.warning("process.slow", extra={ "project_id": project_id, "event_id": event_id }) if start_time and ( time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) error_logger.exception( "process.failed.infinite_retry", extra={ "project_id": project_id, "event_id": event_id }, ) else: retry_process_event.apply_async( args=(), kwargs={ "process_task_name": process_task.__name__, "task_kwargs": { "cache_key": cache_key, "event_id": event_id, "start_time": start_time, }, }, countdown=e.retry_after, ) return # Second round of datascrubbing after stacktrace and language-specific # processing. First round happened as part of ingest. # # *Right now* the only sensitive data that is added in stacktrace # processing are usernames in filepaths, so we run directly after # stacktrace processors and `get_event_enhancers`. # # We do not yet want to deal with context data produced by plugins like # sessionstack or fullstory (which are in `get_event_preprocessors`), as # this data is very unlikely to be sensitive data. This is why scrubbing # happens somewhere in the middle of the pipeline. # # On the other hand, Javascript event error translation is happening after # this block because it uses `get_event_preprocessors` instead of # `get_event_enhancers`. # # We are fairly confident, however, that this should run *before* # re-normalization as it is hard to find sensitive data in partially # trimmed strings. if (has_changed and options.get("processing.can-use-scrubbers") and features.has("organizations:datascrubbers-v2", project.organization, actor=None)): with metrics.timer("tasks.store.datascrubbers.scrub"): project_config = get_project_config(project) new_data = safe_execute(scrub_data, project_config=project_config, event=data.data) # XXX(markus): When datascrubbing is finally "totally stable", we might want # to drop the event if it crashes to avoid saving PII if new_data is not None: data.data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): with metrics.timer("tasks.store.process_event.preprocessors", tags={"plugin": plugin.slug}): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in processors or (): result = safe_execute(processor, data) if result: data = result has_changed = True assert data[ "project"] == project_id, "Project cannot be mutated by plugins" # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer(remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)