def delete_raw_event(project_id, event_id, allow_hint_clear=False): set_current_event_project(project_id) if event_id is None: error_logger.error("process.failed_delete_raw_event", extra={"project_id": project_id}) return from sentry.models import RawEvent, ReprocessingReport RawEvent.objects.filter(project_id=project_id, event_id=event_id).delete() ReprocessingReport.objects.filter(project_id=project_id, event_id=event_id).delete() # Clear the sent notification if we reprocessed everything # successfully and reprocessing is enabled reprocessing_active = ProjectOption.objects.get_value( project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT) if reprocessing_active: sent_notification = ProjectOption.objects.get_value( project_id, "sentry:sent_failed_event_hint", False) if sent_notification: if ReprocessingReport.objects.filter(project_id=project_id, event_id=event_id).exists(): project = Project.objects.get_from_cache(id=project_id) ProjectOption.objects.set_value( project, "sentry:sent_failed_event_hint", False)
def _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project): from sentry.lang.native.processing import should_process_with_symbolicator if cache_key and data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "pre" }, skip_internal=False) error_logger.error("preprocess.failed.empty", extra={"cache_key": cache_key}) return original_data = data data = CanonicalKeyDict(data) project_id = data["project"] set_current_event_project(project_id) if project is None: project = Project.objects.get_from_cache(id=project_id) else: assert project.id == project_id, (project.id, project_id) from_reprocessing = process_task is process_event_from_reprocessing with metrics.timer( "tasks.store.preprocess_event.organization.get_from_cache"): project.set_cached_field_value( "organization", Organization.objects.get_from_cache(id=project.organization_id)) if should_process_with_symbolicator(data): reprocessing2.backup_unprocessed_event(project=project, data=original_data) submit_symbolicate(project, from_reprocessing, cache_key, event_id, start_time, original_data) return if should_process(data): submit_process( project, from_reprocessing, cache_key, event_id, start_time, data_has_changed=False, ) return submit_save_event(project, from_reprocessing, cache_key, event_id, start_time, original_data)
def insert( self, group, event, is_new, is_regression, is_new_group_environment, primary_hash, received_timestamp, # type: float skip_consume=False, ): project = event.project set_current_event_project(project.id) retention_days = quotas.get_event_retention( organization=project.organization) event_data = event.get_raw_data(for_stream=True) unexpected_tags = { k for (k, v) in (get_path(event_data, "tags", filter=True) or []) if k in self.UNEXPECTED_TAG_KEYS } if unexpected_tags: logger.error("%r received unexpected tags: %r", self, unexpected_tags) self._send( project.id, "insert", extra_data=( { "group_id": event.group_id, "event_id": event.event_id, "organization_id": project.organization_id, "project_id": event.project_id, # TODO(mitsuhiko): We do not want to send this incorrect # message but this is what snuba needs at the moment. "message": event.search_message, "platform": event.platform, "datetime": event.datetime, "data": event_data, "primary_hash": primary_hash, "retention_days": retention_days, }, { "is_new": is_new, "is_regression": is_regression, "is_new_group_environment": is_new_group_environment, "skip_consume": skip_consume, }, ), headers={"Received-Timestamp": str(received_timestamp)}, )
def plugin_post_process_group(plugin_slug, event, **kwargs): """ Fires post processing hooks for a group. """ set_current_event_project(event.project_id) from sentry.plugins.base import plugins plugin = plugins.get(plugin_slug) safe_execute( plugin.post_process, event=event, group=event.group, expected_errors=(PluginError,), _with_transaction=False, **kwargs, )
def post_process_group( is_new, is_regression, is_new_group_environment, cache_key, group_id=None, **kwargs ): """ Fires post processing hooks for a group. """ from sentry.eventstore.models import Event from sentry.eventstore.processing import event_processing_store from sentry.reprocessing2 import is_reprocessed_event from sentry.utils import snuba with snuba.options_override({"consistent": True}): # We use the data being present/missing in the processing store # to ensure that we don't duplicate work should the forwarding consumers # need to rewind history. data = event_processing_store.get(cache_key) if not data: logger.info( "post_process.skipped", extra={"cache_key": cache_key, "reason": "missing_cache"}, ) return event = Event( project_id=data["project"], event_id=data["event_id"], group_id=group_id, data=data ) set_current_event_project(event.project_id) is_transaction_event = not bool(event.group_id) from sentry.models import EventDict, Organization, Project # Re-bind node data to avoid renormalization. We only want to # renormalize when loading old data from the database. event.data = EventDict(event.data, skip_renormalization=True) # Re-bind Project and Org since we're reading the Event object # from cache which may contain stale parent models. event.project = Project.objects.get_from_cache(id=event.project_id) event.project.set_cached_field_value( "organization", Organization.objects.get_from_cache(id=event.project.organization_id) ) # Simplified post processing for transaction events. # This should eventually be completely removed and transactions # will not go through any post processing. if is_transaction_event: transaction_processed.send_robust( sender=post_process_group, project=event.project, event=event, ) event_processing_store.delete_by_key(cache_key) return is_reprocessed = is_reprocessed_event(event.data) # NOTE: we must pass through the full Event object, and not an # event_id since the Event object may not actually have been stored # in the database due to sampling. from sentry.models import Commit, GroupInboxReason from sentry.models.group import get_group_with_redirect from sentry.models.groupinbox import add_group_to_inbox from sentry.rules.processor import RuleProcessor from sentry.tasks.groupowner import process_suspect_commits from sentry.tasks.servicehooks import process_service_hook # Re-bind Group since we're reading the Event object # from cache, which may contain a stale group and project event.group, _ = get_group_with_redirect(event.group_id) event.group_id = event.group.id event.group.project = event.project event.group.project.set_cached_field_value("organization", event.project.organization) bind_organization_context(event.project.organization) _capture_stats(event, is_new) if is_reprocessed and is_new: add_group_to_inbox(event.group, GroupInboxReason.REPROCESSED) if not is_reprocessed: # we process snoozes before rules as it might create a regression # but not if it's new because you can't immediately snooze a new group has_reappeared = False if is_new else process_snoozes(event.group) if not has_reappeared: # If true, we added the .UNIGNORED reason already if is_new: add_group_to_inbox(event.group, GroupInboxReason.NEW) elif is_regression: add_group_to_inbox(event.group, GroupInboxReason.REGRESSION) handle_owner_assignment(event.project, event.group, event) rp = RuleProcessor( event, is_new, is_regression, is_new_group_environment, has_reappeared ) has_alert = False # TODO(dcramer): ideally this would fanout, but serializing giant # objects back and forth isn't super efficient for callback, futures in rp.apply(): has_alert = True safe_execute(callback, event, futures, _with_transaction=False) try: lock = locks.get( f"w-o:{event.group_id}-d-l", duration=10, ) with lock.acquire(): has_commit_key = f"w-o:{event.project.organization_id}-h-c" org_has_commit = cache.get(has_commit_key) if org_has_commit is None: org_has_commit = Commit.objects.filter( organization_id=event.project.organization_id ).exists() cache.set(has_commit_key, org_has_commit, 3600) if org_has_commit: group_cache_key = f"w-o-i:g-{event.group_id}" if cache.get(group_cache_key): metrics.incr( "sentry.tasks.process_suspect_commits.debounce", tags={"detail": "w-o-i:g debounce"}, ) else: from sentry.utils.committers import get_frame_paths cache.set(group_cache_key, True, 604800) # 1 week in seconds event_frames = get_frame_paths(event.data) process_suspect_commits.delay( event_id=event.event_id, event_platform=event.platform, event_frames=event_frames, group_id=event.group_id, project_id=event.project_id, ) except UnableToAcquireLock: pass except Exception: logger.exception("Failed to process suspect commits") if features.has("projects:servicehooks", project=event.project): allowed_events = {"event.created"} if has_alert: allowed_events.add("event.alert") if allowed_events: for servicehook_id, events in _get_service_hooks(project_id=event.project_id): if any(e in allowed_events for e in events): process_service_hook.delay(servicehook_id=servicehook_id, event=event) from sentry.tasks.sentry_apps import process_resource_change_bound if event.get_event_type() == "error" and _should_send_error_created_hooks( event.project ): process_resource_change_bound.delay( action="created", sender="Error", instance_id=event.event_id, instance=event ) if is_new: process_resource_change_bound.delay( action="created", sender="Group", instance_id=event.group_id ) from sentry.plugins.base import plugins for plugin in plugins.for_project(event.project): plugin_post_process_group( plugin_slug=plugin.slug, event=event, is_new=is_new, is_regresion=is_regression ) from sentry import similarity safe_execute(similarity.record, event.project, [event], _with_transaction=False) # Patch attachments that were ingested on the standalone path. update_existing_attachments(event) if not is_reprocessed: event_processed.send_robust( sender=post_process_group, project=event.project, event=event, primary_hash=kwargs.get("primary_hash"), ) with metrics.timer("tasks.post_process.delete_event_cache"): event_processing_store.delete_by_key(cache_key)
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ set_current_event_project(project_id) from sentry.event_manager import EventManager, HashDiscarded event_type = "none" if cache_key and data is None: with metrics.timer( "tasks.store.do_save_event.get_cache") as metric_tags: data = event_processing_store.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get( "type") or "none" with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") set_current_event_project(project_id) # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. manager.save(project_id, assume_normalized=True, start_time=start_time, cache_key=cache_key) # Put the updated event back into the cache so that post_process # has the most recent data. data = manager.get_data() if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) with metrics.timer( "tasks.store.do_save_event.write_processing_cache"): event_processing_store.store(data) except HashDiscarded: # Delete the event payload from cache since it won't show up in post-processing. if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): event_processing_store.delete_by_key(cache_key) finally: reprocessing2.mark_event_reprocessed(data) if cache_key: with metrics.timer( "tasks.store.do_save_event.delete_attachment_cache"): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"]) time_synthetic_monitoring_event(data, project_id, start_time)
def create_failed_event(cache_key, data, project_id, issues, event_id, start_time=None, reprocessing_rev=None): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ set_current_event_project(project_id) # We can only create failed events for events that can potentially # create failed events. if not reprocessing.event_supports_reprocessing(data): return False # If this event has just been reprocessed with reprocessing-v2, we don't # put it through reprocessing-v1 again. The value of reprocessing-v2 is # partially that one sees the entire event even in its failed state, all # the time. if reprocessing2.is_reprocessed_event(data): return False reprocessing_active = ProjectOption.objects.get_value( project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT) # In case there is reprocessing active but the current reprocessing # revision is already different than when we started, we want to # immediately retry the event. This resolves the problem when # otherwise a concurrent change of debug symbols might leave a # reprocessing issue stuck in the project forever. if (reprocessing_active and reprocessing.get_reprocessing_revision( project_id, cached=False) != reprocessing_rev): raise RetryProcessing() # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, "sentry:sent_failed_event_hint", False) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={ "reprocessing_active": reprocessing_active, "issues": issues }, ).send_notification() ProjectOption.objects.set_value(project, "sentry:sent_failed_event_hint", True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "raw" }, skip_internal=False) error_logger.error("process.failed_raw.empty", extra={"cache_key": cache_key}) return True data = CanonicalKeyDict(data) from sentry.models import ProcessingIssue, RawEvent raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp( data["timestamp"]).replace(tzinfo=timezone.utc), data=data, ) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue["scope"], object=issue["object"], type=issue["type"], data=issue["data"], ) event_processing_store.delete_by_key(cache_key) return True
def _do_process_event( cache_key, start_time, event_id, process_task, data=None, data_has_changed=None, from_symbolicate=False, ): from sentry.plugins.base import plugins if data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "process" }, skip_internal=False) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_event_project(project_id) event_id = data["event_id"] if killswitch_matches_context( "store.load-shed-process-event-projects", { "project_id": project_id, "event_id": event_id, "platform": data.get("platform") or "null", }, ): return with sentry_sdk.start_span( op="tasks.store.process_event.get_project_from_cache"): project = Project.objects.get_from_cache(id=project_id) with metrics.timer( "tasks.store.process_event.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) has_changed = bool(data_has_changed) with sentry_sdk.start_span( op="tasks.store.process_event.get_reprocessing_revision"): # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) # Stacktrace based event processors. with sentry_sdk.start_span(op="task.store.process_event.stacktraces"): with metrics.timer("tasks.store.process_event.stacktraces", tags={"from_symbolicate": from_symbolicate}): new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # Second round of datascrubbing after stacktrace and language-specific # processing. First round happened as part of ingest. # # *Right now* the only sensitive data that is added in stacktrace # processing are usernames in filepaths, so we run directly after # stacktrace processors. # # We do not yet want to deal with context data produced by plugins like # sessionstack or fullstory (which are in `get_event_preprocessors`), as # this data is very unlikely to be sensitive data. This is why scrubbing # happens somewhere in the middle of the pipeline. # # On the other hand, Javascript event error translation is happening after # this block because it uses `get_event_preprocessors` instead of # `get_event_enhancers`. # # We are fairly confident, however, that this should run *before* # re-normalization as it is hard to find sensitive data in partially # trimmed strings. if has_changed and options.get("processing.can-use-scrubbers"): with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"): with metrics.timer("tasks.store.datascrubbers.scrub", tags={"from_symbolicate": from_symbolicate}): new_data = safe_execute(scrub_data, project=project, event=data.data) # XXX(markus): When datascrubbing is finally "totally stable", we might want # to drop the event if it crashes to avoid saving PII if new_data is not None: data.data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): with sentry_sdk.start_span( op="task.store.process_event.preprocessors") as span: span.set_data("plugin", plugin.slug) span.set_data("from_symbolicate", from_symbolicate) with metrics.timer( "tasks.store.process_event.preprocessors", tags={ "plugin": plugin.slug, "from_symbolicate": from_symbolicate }, ): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in processors or (): try: result = processor(data) except Exception: error_logger.exception( "tasks.store.preprocessors.error") data.setdefault("_metrics", {})["flag.processing.error"] = True has_changed = True else: if result: data = result has_changed = True assert data[ "project"] == project_id, "Project cannot be mutated by plugins" # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer(remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke ourselves again. This happens when the reprocessing # revision changed while we were processing. _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project) return cache_key = event_processing_store.store(data) from_reprocessing = process_task is process_event_from_reprocessing submit_save_event(project, from_reprocessing, cache_key, event_id, start_time, data)
def _do_symbolicate_event(cache_key, start_time, event_id, symbolicate_task, data=None): from sentry.lang.native.processing import get_symbolication_function if data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "symbolicate" }, skip_internal=False) error_logger.error("symbolicate.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_event_project(project_id) event_id = data["event_id"] if killswitch_matches_context( "store.load-shed-symbolicate-event-projects", { "project_id": project_id, "event_id": event_id, "platform": data.get("platform") or "null", }, ): return symbolication_function = get_symbolication_function(data) has_changed = False from_reprocessing = symbolicate_task is symbolicate_event_from_reprocessing symbolication_start_time = time() with sentry_sdk.start_span( op="tasks.store.symbolicate_event.symbolication") as span: span.set_data("symbolicaton_function", symbolication_function.__name__) with metrics.timer( "tasks.store.symbolicate_event.symbolication", tags={ "symbolication_function": symbolication_function.__name__ }, ): while True: try: with sentry_sdk.start_span( op="tasks.store.symbolicate_event.%s" % symbolication_function.__name__) as span: symbolicated_data = symbolication_function(data) span.set_data("symbolicated_data", bool(symbolicated_data)) if symbolicated_data: data = symbolicated_data has_changed = True break except RetrySymbolication as e: if (time() - symbolication_start_time ) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT: error_logger.warning( "symbolicate.slow", extra={ "project_id": project_id, "event_id": event_id }, ) if (time() - symbolication_start_time ) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) metrics.incr( "tasks.store.symbolicate_event.fatal", tags={ "reason": "timeout", "symbolication_function": symbolication_function.__name__, }, ) error_logger.exception( "symbolicate.failed.infinite_retry", extra={ "project_id": project_id, "event_id": event_id }, ) data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True break else: # sleep for `retry_after` but max 5 seconds and try again metrics.incr( "tasks.store.symbolicate_event.retry", tags={ "symbolication_function": symbolication_function.__name__ }, ) sleep(min(e.retry_after, SYMBOLICATOR_MAX_RETRY_AFTER)) continue except Exception: metrics.incr( "tasks.store.symbolicate_event.fatal", tags={ "reason": "error", "symbolication_function": symbolication_function.__name__, }, ) error_logger.exception( "tasks.store.symbolicate_event.symbolication") data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True break # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: cache_key = event_processing_store.store(data) process_task = process_event_from_reprocessing if from_reprocessing else process_event _do_process_event( cache_key=cache_key, start_time=start_time, event_id=event_id, process_task=process_task, data=data, data_has_changed=has_changed, from_symbolicate=True, )
def update_config_cache(generate, organization_id=None, project_id=None, public_key=None, update_reason=None): """ Update the Redis cache for the Relay projectconfig. This task is invoked whenever a project/org option has been saved or smart quotas potentially caused a change in projectconfig. Either organization_id or project_id has to be provided. :param organization_id: The organization for which to invalidate configs. :param project_id: The project for which to invalidate configs. :param generate: If `True`, caches will be eagerly regenerated, not only invalidated. """ from sentry.models import Project, ProjectKey, ProjectKeyStatus from sentry.relay import projectconfig_cache from sentry.relay.config import get_project_config if project_id: set_current_event_project(project_id) if organization_id: # Cannot use bind_organization_context here because we do not have a # model and don't want to fetch one sentry_sdk.set_tag("organization_id", organization_id) if public_key: sentry_sdk.set_tag("public_key", public_key) sentry_sdk.set_tag("update_reason", update_reason) sentry_sdk.set_tag("generate", generate) # Delete key before generating configs such that we never have an outdated # but valid cache. # # If this was running at the end of the task, it would be more effective # against bursts of updates, but introduces a different race where an # outdated cache may be used. projectconfig_debounce_cache.mark_task_done(public_key, project_id, organization_id) if organization_id: projects = list( Project.objects.filter(organization_id=organization_id)) keys = list(ProjectKey.objects.filter(project__in=projects)) elif project_id: projects = [Project.objects.get(id=project_id)] keys = list(ProjectKey.objects.filter(project__in=projects)) elif public_key: try: keys = [ProjectKey.objects.get(public_key=public_key)] except ProjectKey.DoesNotExist: # In this particular case, where a project key got deleted and # triggered an update, we at least know the public key that needs # to be deleted from cache. # # In other similar cases, like an org being deleted, we potentially # cannot find any keys anymore, so we don't know which cache keys # to delete. projectconfig_cache.delete_many([public_key]) return else: assert False if generate: config_cache = {} for key in keys: if key.status != ProjectKeyStatus.ACTIVE: project_config = {"disabled": True} else: project_config = get_project_config( key.project, project_keys=[key], full_config=True).to_dict() config_cache[key.public_key] = project_config projectconfig_cache.set_many(config_cache) else: cache_keys_to_delete = [] for key in keys: cache_keys_to_delete.append(key.public_key) projectconfig_cache.delete_many(cache_keys_to_delete)
def _process_suspect_commits(event_id, event_platform, event_frames, group_id, project_id, **kwargs): metrics.incr("sentry.tasks.process_suspect_commits.start") set_current_event_project(project_id) project = Project.objects.get_from_cache(id=project_id) owners = GroupOwner.objects.filter( group_id=group_id, project=project, organization_id=project.organization_id, type=GroupOwnerType.SUSPECT_COMMIT.value, ) owner_count = owners.count() if owner_count >= PREFERRED_GROUP_OWNERS: owners = owners.filter( date_added__lte=timezone.now() - PREFERRED_GROUP_OWNER_AGE).order_by("-date_added") if not owners.exists(): metrics.incr( "sentry.tasks.process_suspect_commits.aborted", tags={"detail": "maxed_owners_none_old"}, ) return with metrics.timer("sentry.tasks.process_suspect_commits.process_loop"): try: with metrics.timer( "sentry.tasks.process_suspect_commits.get_serialized_event_file_committers" ): committers = get_event_file_committers(project, group_id, event_frames, event_platform) owner_scores = {} for committer in committers: if "id" in committer["author"]: author_id = committer["author"]["id"] for commit, score in committer["commits"]: if score >= MIN_COMMIT_SCORE: owner_scores[author_id] = max( score, owner_scores.get(author_id, 0)) if owner_scores: for owner_id in sorted( owner_scores, reverse=True, key=owner_scores.get)[:PREFERRED_GROUP_OWNERS]: try: go, created = GroupOwner.objects.update_or_create( group_id=group_id, type=GroupOwnerType.SUSPECT_COMMIT.value, user_id=owner_id, project=project, organization_id=project.organization_id, defaults={ "date_added": timezone.now() }, # Updates date of an existing owner, since we just matched them with this new event ) if created: owner_count += 1 if owner_count > PREFERRED_GROUP_OWNERS: try: owner = owners[0] except IndexError: pass else: owner.delete() except GroupOwner.MultipleObjectsReturned: GroupOwner.objects.filter( group_id=group_id, type=GroupOwnerType.SUSPECT_COMMIT.value, user_id=owner_id, project=project, organization_id=project.organization_id, )[0].delete() except Commit.DoesNotExist: logger.info( "process_suspect_commits.skipped", extra={ "event": event_id, "reason": "no_commit" }, ) except Release.DoesNotExist: logger.info( "process_suspect_commits.skipped", extra={ "event": event_id, "reason": "no_release" }, )
def update_config_cache(generate, organization_id=None, project_id=None, update_reason=None): """ Update the Redis cache for the Relay projectconfig. This task is invoked whenever a project/org option has been saved or smart quotas potentially caused a change in projectconfig. Either organization_id or project_id has to be provided. :param organization_id: The organization for which to invalidate configs. :param project_id: The project for which to invalidate configs. :param generate: If `True`, caches will be eagerly regenerated, not only invalidated. """ from sentry.models import Project, ProjectKey, ProjectKeyStatus from sentry.relay import projectconfig_cache from sentry.relay.config import get_project_config if project_id: set_current_event_project(project_id) if organization_id: # Cannot use bind_organization_context here because we do not have a # model and don't want to fetch one sentry_sdk.set_tag("organization_id", organization_id) sentry_sdk.set_tag("update_reason", update_reason) sentry_sdk.set_tag("generate", generate) # Delete key before generating configs such that we never have an outdated # but valid cache. # # If this was running at the end of the task, it would be more effective # against bursts of updates, but introduces a different race where an # outdated cache may be used. projectconfig_debounce_cache.mark_task_done(project_id, organization_id) if project_id: projects = [Project.objects.get_from_cache(id=project_id)] elif organization_id: # XXX(markus): I feel like we should be able to cache this but I don't # want to add another method to src/sentry/db/models/manager.py projects = Project.objects.filter(organization_id=organization_id) project_keys = {} for key in ProjectKey.objects.filter( project_id__in=[project.id for project in projects]): project_keys.setdefault(key.project_id, []).append(key) if generate: config_cache = {} for project in projects: project_config = get_project_config(project, project_keys=project_keys.get( project.id, []), full_config=True) config_cache[project.id] = project_config.to_dict() for key in project_keys.get(project.id) or (): # XXX(markus): This is currently the cleanest way to get only # state for a single projectkey (considering quotas and # everything) if key.status != ProjectKeyStatus.ACTIVE: continue project_config = get_project_config(project, project_keys=[key], full_config=True) config_cache[key.public_key] = project_config.to_dict() projectconfig_cache.set_many(config_cache) else: cache_keys_to_delete = [] for project in projects: cache_keys_to_delete.append(project.id) for key in project_keys.get(project.id) or (): cache_keys_to_delete.append(key.public_key) projectconfig_cache.delete_many(cache_keys_to_delete) metrics.incr( "relay.projectconfig_cache.done", tags={ "generate": generate, "update_reason": update_reason }, )
def _do_symbolicate_event( cache_key, start_time, event_id, symbolicate_task, data=None, queue_switches=0 ): from sentry.lang.native.processing import get_symbolication_function if data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "symbolicate"}, skip_internal=False ) error_logger.error("symbolicate.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_event_project(project_id) event_id = data["event_id"] from_reprocessing = ( symbolicate_task is symbolicate_event_from_reprocessing or symbolicate_task is symbolicate_event_from_reprocessing_low_priority ) # check whether the event is in the wrong queue and if so, move it to the other one. # we do this at most SYMBOLICATOR_MAX_QUEUE_SWITCHES times. if queue_switches >= SYMBOLICATOR_MAX_QUEUE_SWITCHES: metrics.gauge("tasks.store.symbolicate_event.low_priority.max_queue_switches", 1) else: is_low_priority = symbolicate_task in [ symbolicate_event_low_priority, symbolicate_event_from_reprocessing_low_priority, ] should_be_low_priority = should_demote_symbolication(project_id) if is_low_priority != should_be_low_priority: metrics.gauge("tasks.store.symbolicate_event.low_priority.wrong_queue", 1) submit_symbolicate( should_be_low_priority, from_reprocessing, cache_key, event_id, start_time, data, queue_switches + 1, ) return def _continue_to_process_event(): process_task = process_event_from_reprocessing if from_reprocessing else process_event _do_process_event( cache_key=cache_key, start_time=start_time, event_id=event_id, process_task=process_task, data=data, data_has_changed=has_changed, from_symbolicate=True, ) symbolication_function = get_symbolication_function(data) symbolication_function_name = getattr(symbolication_function, "__name__", "none") if killswitch_matches_context( "store.load-shed-symbolicate-event-projects", { "project_id": project_id, "event_id": event_id, "platform": data.get("platform") or "null", "symbolication_function": symbolication_function_name, }, ): return _continue_to_process_event() has_changed = False symbolication_start_time = time() submission_ratio = options.get("symbolicate-event.low-priority.metrics.submission-rate") submit_realtime_metrics = not from_reprocessing and random.random() < submission_ratio if submit_realtime_metrics: with sentry_sdk.start_span(op="tasks.store.symbolicate_event.low_priority.metrics.counter"): timestamp = int(symbolication_start_time) try: realtime_metrics.increment_project_event_counter(project_id, timestamp) except Exception as e: sentry_sdk.capture_exception(e) with sentry_sdk.start_span(op="tasks.store.symbolicate_event.symbolication") as span: span.set_data("symbolication_function", symbolication_function_name) with metrics.timer( "tasks.store.symbolicate_event.symbolication", tags={"symbolication_function": symbolication_function_name}, ): while True: try: with sentry_sdk.start_span( op="tasks.store.symbolicate_event.%s" % symbolication_function_name ) as span: symbolicated_data = symbolication_function(data) span.set_data("symbolicated_data", bool(symbolicated_data)) if symbolicated_data: data = symbolicated_data has_changed = True break except RetrySymbolication as e: if ( time() - symbolication_start_time ) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT: error_logger.warning( "symbolicate.slow", extra={"project_id": project_id, "event_id": event_id}, ) if ( time() - symbolication_start_time ) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) metrics.incr( "tasks.store.symbolicate_event.fatal", tags={ "reason": "timeout", "symbolication_function": symbolication_function_name, }, ) error_logger.exception( "symbolicate.failed.infinite_retry", extra={"project_id": project_id, "event_id": event_id}, ) data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True break else: # sleep for `retry_after` but max 5 seconds and try again metrics.incr( "tasks.store.symbolicate_event.retry", tags={"symbolication_function": symbolication_function_name}, ) sleep(min(e.retry_after, SYMBOLICATOR_MAX_RETRY_AFTER)) continue except Exception: metrics.incr( "tasks.store.symbolicate_event.fatal", tags={ "reason": "error", "symbolication_function": symbolication_function_name, }, ) error_logger.exception("tasks.store.symbolicate_event.symbolication") data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True break if submit_realtime_metrics: with sentry_sdk.start_span( op="tasks.store.symbolicate_event.low_priority.metrics.histogram" ): symbolication_duration = int(time() - symbolication_start_time) try: realtime_metrics.increment_project_duration_counter( project_id, timestamp, symbolication_duration ) except Exception as e: sentry_sdk.capture_exception(e) # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: cache_key = event_processing_store.store(data) return _continue_to_process_event()