def _normalize_impl(self, project_id=None): if self._project and project_id and project_id != self._project.id: raise RuntimeError( "Initialized EventManager with one project ID and called save() with another one" ) if self._normalized: raise RuntimeError("Already normalized") self._normalized = True from sentry_relay.processing import StoreNormalizer rust_normalizer = StoreNormalizer( project_id=self._project.id if self._project else project_id, client_ip=self._client_ip, client=self._auth.client if self._auth else None, key_id=six.text_type(self._key.id) if self._key else None, grouping_config=self._grouping_config, protocol_version=six.text_type(self.version) if self.version is not None else None, is_renormalize=self._is_renormalize, remove_other=self._remove_other, normalize_user_agent=True, sent_at=self.sent_at.isoformat() if self.sent_at is not None else None, **DEFAULT_STORE_NORMALIZER_ARGS ) self._data = CanonicalKeyDict(rust_normalizer.normalize_event(dict(self._data)))
def _normalize_impl(self): if self._normalized: raise RuntimeError('Already normalized') self._normalized = True from semaphore.processing import StoreNormalizer rust_normalizer = StoreNormalizer( geoip_lookup=rust_geoip, project_id=self._project.id if self._project else None, client_ip=self._client_ip, client=self._auth.client if self._auth else None, key_id=six.text_type(self._key.id) if self._key else None, grouping_config=self._grouping_config, protocol_version=six.text_type(self.version) if self.version is not None else None, stacktrace_frames_hard_limit=settings. SENTRY_STACKTRACE_FRAMES_HARD_LIMIT, max_stacktrace_frames=settings.SENTRY_MAX_STACKTRACE_FRAMES, valid_platforms=list(VALID_PLATFORMS), max_secs_in_future=MAX_SECS_IN_FUTURE, max_secs_in_past=MAX_SECS_IN_PAST, enable_trimming=True, is_renormalize=self._is_renormalize) self._data = CanonicalKeyDict( rust_normalizer.normalize_event(dict(self._data))) normalize_user_agent(self._data)
def test_delitem(self): d = CanonicalKeyDict({'user': {'id': 'DemoUser'}}) del d['user'] assert d == {} d = CanonicalKeyDict({'user': {'id': 'DemoUser'}}) del d['sentry.interfaces.User'] assert d == {}
def test_delitem(self): d = CanonicalKeyDict({"user": {"id": "DemoUser"}}) del d["user"] assert d == {} d = CanonicalKeyDict({"user": {"id": "DemoUser"}}) del d["sentry.interfaces.User"] assert d == {}
def test_getitem_setitem(self): d = CanonicalKeyDict({"user": {"id": "DemoUser"}}, legacy=True) d["user"] = {"id": "other"} assert d["user"] == {"id": "other"} assert d["sentry.interfaces.User"] == {"id": "other"} d = CanonicalKeyDict({"user": {"id": "DemoUser"}}, legacy=True) d["sentry.interfaces.User"] = {"id": "other"} assert d["user"] == {"id": "other"} assert d["sentry.interfaces.User"] == {"id": "other"}
def __init__(self, data, skip_renormalization=False, **kwargs): is_renormalized = isinstance(data, EventDict) or ( isinstance(data, NodeData) and isinstance(data.data, EventDict) ) if not skip_renormalization and not is_renormalized: normalizer = StoreNormalizer(is_renormalize=True, enable_trimming=False) data = normalizer.normalize_event(dict(data)) CanonicalKeyDict.__init__(self, data, **kwargs)
def test_getitem_setitem(self): d = CanonicalKeyDict({'user': {'id': 'DemoUser'}}, legacy=True) d['user'] = {'id': 'other'} assert d['user'] == {'id': 'other'} assert d['sentry.interfaces.User'] == {'id': 'other'} d = CanonicalKeyDict({'user': {'id': 'DemoUser'}}, legacy=True) d['sentry.interfaces.User'] = {'id': 'other'} assert d['user'] == {'id': 'other'} assert d['sentry.interfaces.User'] == {'id': 'other'}
def __init__(self, data, **kwargs): rust_renormalized = _should_skip_to_python(data.get('event_id')) if rust_renormalized: normalizer = StoreNormalizer(is_renormalize=True) data = normalizer.normalize_event(dict(data)) metrics.incr('rust.renormalized', tags={'value': rust_renormalized}) with configure_scope() as scope: scope.set_tag("rust.renormalized", rust_renormalized) CanonicalKeyDict.__init__(self, data, **kwargs)
def create_event(group=None, project=None, event_id=None, normalize=True, **kwargs): # XXX: Do not use this method for new tests! Prefer `store_event`. if event_id is None: event_id = uuid4().hex kwargs.setdefault('project', project if project else group.project) kwargs.setdefault('data', copy.deepcopy(DEFAULT_EVENT_DATA)) kwargs.setdefault('platform', kwargs['data'].get('platform', 'python')) kwargs.setdefault('message', kwargs['data'].get('message', 'message')) if kwargs.get('tags'): tags = kwargs.pop('tags') if isinstance(tags, dict): tags = list(tags.items()) kwargs['data']['tags'] = tags if kwargs.get('stacktrace'): stacktrace = kwargs.pop('stacktrace') kwargs['data']['stacktrace'] = stacktrace user = kwargs.pop('user', None) if user is not None: kwargs['data']['user'] = user kwargs['data'].setdefault('errors', [{ 'type': EventError.INVALID_DATA, 'name': 'foobar', }]) # maintain simple event Factories by supporting the legacy message # parameter just like our API would if 'logentry' not in kwargs['data']: kwargs['data']['logentry'] = { 'message': kwargs['message'] or '<unlabeled event>', } if normalize: manager = EventManager(CanonicalKeyDict(kwargs['data'])) manager.normalize() kwargs['data'] = manager.get_data() kwargs['data'].update(manager.materialize_metadata()) kwargs['message'] = manager.get_search_message() # This is needed so that create_event saves the event in nodestore # under the correct key. This is usually dont in EventManager.save() kwargs['data'].setdefault( 'node_id', Event.generate_node_id(kwargs['project'].id, event_id)) event = Event(event_id=event_id, group=group, **kwargs) if group: EventMapping.objects.create( project_id=event.project.id, event_id=event_id, group=group, ) # emulate EventManager refs event.data.bind_ref(event) event.save() return event
def _do_preprocess_event(cache_key, data, start_time, event_id, process_event): if cache_key: data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'}) error_logger.error('preprocess.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project = data['project'] Raven.tags_context({ 'project': project, }) if should_process(data): process_event.delay(cache_key=cache_key, start_time=start_time, event_id=event_id) return # If we get here, that means the event had no preprocessing needed to be done # so we can jump directly to save_event if cache_key: data = None save_event.delay(cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project_id=project)
def _do_preprocess_event(cache_key, data, start_time, event_id, process_task): if cache_key and data is None: data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'pre' }, skip_internal=False) error_logger.error('preprocess.failed.empty', extra={'cache_key': cache_key}) return original_data = data data = CanonicalKeyDict(data) project_id = data['project'] with configure_scope() as scope: scope.set_tag("project", project_id) project = Project.objects.get_from_cache(id=project_id) if should_process(data): from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, original_data) return submit_save_event(project, cache_key, event_id, start_time, original_data)
def test_get_path_dict(self): assert get_path({}, 'a') is None assert get_path({'a': 2}, 'a') == 2 assert get_path({'a': 2}, 'b') is None assert get_path({'a': {'b': []}}, 'a', 'b') == [] assert get_path({'a': []}, 'a', 'b') is None assert get_path(CanonicalKeyDict({'a': 2}), 'a') == 2
def _normalize_impl(self): if self._normalized: raise RuntimeError('Already normalized') self._normalized = True from semaphore.processing import StoreNormalizer rust_normalizer = StoreNormalizer( geoip_lookup=rust_geoip, project_id=self._project.id if self._project else None, client_ip=self._client_ip, client=self._auth.client if self._auth else None, key_id=six.text_type(self._key.id) if self._key else None, grouping_config=self._grouping_config, protocol_version=six.text_type(self.version) if self.version is not None else None, stacktrace_frames_hard_limit=settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT, max_stacktrace_frames=settings.SENTRY_MAX_STACKTRACE_FRAMES, valid_platforms=list(VALID_PLATFORMS), max_secs_in_future=MAX_SECS_IN_FUTURE, max_secs_in_past=MAX_SECS_IN_PAST, enable_trimming=True, is_renormalize=self._is_renormalize, remove_other=self._remove_other, ) self._data = CanonicalKeyDict( rust_normalizer.normalize_event(dict(self._data)) ) normalize_user_agent(self._data)
def _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project): if cache_key and data is None: data = default_cache.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "pre" }, skip_internal=False) error_logger.error("preprocess.failed.empty", extra={"cache_key": cache_key}) return original_data = data data = CanonicalKeyDict(data) project_id = data["project"] with configure_scope() as scope: scope.set_tag("project", project_id) if project is None: project = Project.objects.get_from_cache(id=project_id) else: assert project.id == project_id, (project.id, project_id) if should_process(data): from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, original_data) return submit_save_event(project, cache_key, event_id, start_time, original_data)
def test_get_path_dict(self): assert get_path({}, "a") is None assert get_path({"a": 2}, "a") == 2 assert get_path({"a": 2}, "b") is None assert get_path({"a": {"b": []}}, "a", "b") == [] assert get_path({"a": []}, "a", "b") is None assert get_path(CanonicalKeyDict({"a": 2}), "a") == 2
def __setstate__(self, state): # If there is a legacy pickled version that used to have data as a # duplicate, reject it. state.pop('data', None) if state.pop('_node_data_CANONICAL', False): state['_node_data'] = CanonicalKeyDict(state['_node_data']) self.__dict__ = state
def test_mixed(self): assert CanonicalKeyDict({ 'release': 'asdf', 'exception': {'type': 'DemoException'}, 'user': {'id': 'DemoUser'}, 'sentry.interfaces.Exception': {'type': 'INVALID'}, 'sentry.interfaces.User': {'id': 'INVALID'}, }) == self.canonical_data
def test_len(self): assert len(CanonicalKeyDict({ 'release': 'asdf', 'exception': {'type': 'DemoException'}, 'user': {'id': 'DemoUser'}, 'sentry.interfaces.Exception': {'type': 'INVALID'}, 'sentry.interfaces.User': {'id': 'INVALID'}, })) == 3
def test_get_path(self): assert get_path({}, ['a']) is None assert get_path({}, ['a'], 1) == 1 assert get_path({'a': 2}, ['a']) == 2 assert get_path({'a': 2}, ['b']) is None assert get_path({'a': 2}, ['b'], 1) == 1 assert get_path({'a': {'b': []}}, ['a', 'b']) == [] assert get_path({'a': []}, ['a', 'b']) is None assert get_path(CanonicalKeyDict({'a': 2}), ['a']) == 2
def _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project): from sentry.lang.native.processing import should_process_with_symbolicator if cache_key and data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "pre" }, skip_internal=False) error_logger.error("preprocess.failed.empty", extra={"cache_key": cache_key}) return original_data = data data = CanonicalKeyDict(data) project_id = data["project"] set_current_project(project_id) if project is None: project = Project.objects.get_from_cache(id=project_id) else: assert project.id == project_id, (project.id, project_id) from_reprocessing = process_task is process_event_from_reprocessing with metrics.timer( "tasks.store.preprocess_event.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) if should_process_with_symbolicator(data): reprocessing2.backup_unprocessed_event(project=project, data=original_data) submit_symbolicate(project, from_reprocessing, cache_key, event_id, start_time, original_data) return if should_process(data): reprocessing2.backup_unprocessed_event(project=project, data=original_data) submit_process( project, from_reprocessing, cache_key, event_id, start_time, data_has_changed=False, ) return submit_save_event(project, from_reprocessing, cache_key, event_id, start_time, original_data)
def create_event(self, event_id=None, normalize=True, **kwargs): if event_id is None: event_id = uuid4().hex if 'group' not in kwargs: kwargs['group'] = self.group kwargs.setdefault('project', kwargs['group'].project) kwargs.setdefault('data', copy.deepcopy(DEFAULT_EVENT_DATA)) kwargs.setdefault('platform', kwargs['data'].get('platform', 'python')) kwargs.setdefault('message', kwargs['data'].get('message', 'message')) if kwargs.get('tags'): tags = kwargs.pop('tags') if isinstance(tags, dict): tags = list(tags.items()) kwargs['data']['tags'] = tags if kwargs.get('stacktrace'): stacktrace = kwargs.pop('stacktrace') kwargs['data']['stacktrace'] = stacktrace user = kwargs.pop('user', None) if user is not None: kwargs['data']['user'] = user kwargs['data'].setdefault( 'errors', [{ 'type': EventError.INVALID_DATA, 'name': 'foobar', }] ) # maintain simple event fixtures by supporting the legacy message # parameter just like our API would if 'logentry' not in kwargs['data']: kwargs['data']['logentry'] = { 'message': kwargs.get('message') or '<unlabeled event>', } if normalize: manager = EventManager(CanonicalKeyDict(kwargs['data']), for_store=False) manager.normalize() kwargs['data'] = manager.get_data() kwargs['message'] = manager.get_search_message() else: assert 'message' not in kwargs, 'do not pass message this way' event = Event(event_id=event_id, **kwargs) EventMapping.objects.create( project_id=event.project.id, event_id=event_id, group=event.group, ) # emulate EventManager refs event.data.bind_ref(event) event.save() return event
def test_canonical(self): assert CanonicalKeyDict({ 'release': 'asdf', 'exception': { 'type': 'DemoException' }, 'user': { 'id': 'DemoUser' }, }) == self.canonical_data
def test_canonical(self): assert (CanonicalKeyDict({ "release": "asdf", "exception": { "type": "DemoException" }, "user": { "id": "DemoUser" }, }) == self.canonical_data)
def test_legacy(self): assert (CanonicalKeyDict({ "release": "asdf", "sentry.interfaces.Exception": { "type": "DemoException" }, "sentry.interfaces.User": { "id": "DemoUser" }, }) == self.canonical_data)
def create_event(group=None, project=None, event_id=None, normalize=True, **kwargs): # XXX: Do not use this method for new tests! Prefer `store_event`. if event_id is None: event_id = uuid4().hex kwargs.setdefault("project", project if project else group.project) kwargs.setdefault("data", copy.deepcopy(DEFAULT_EVENT_DATA)) kwargs.setdefault("platform", kwargs["data"].get("platform", "python")) kwargs.setdefault("message", kwargs["data"].get("message", "message")) if kwargs.get("tags"): tags = kwargs.pop("tags") if isinstance(tags, dict): tags = list(tags.items()) kwargs["data"]["tags"] = tags if kwargs.get("stacktrace"): stacktrace = kwargs.pop("stacktrace") kwargs["data"]["stacktrace"] = stacktrace user = kwargs.pop("user", None) if user is not None: kwargs["data"]["user"] = user kwargs["data"].setdefault("errors", [{ "type": EventError.INVALID_DATA, "name": "foobar" }]) # maintain simple event Factories by supporting the legacy message # parameter just like our API would if "logentry" not in kwargs["data"]: kwargs["data"]["logentry"] = { "message": kwargs["message"] or "<unlabeled event>" } if normalize: manager = EventManager(CanonicalKeyDict(kwargs["data"])) manager.normalize() kwargs["data"] = manager.get_data() kwargs["data"].update(manager.materialize_metadata()) kwargs["message"] = manager.get_search_message() # This is needed so that create_event saves the event in nodestore # under the correct key. This is usually dont in EventManager.save() kwargs["data"].setdefault( "node_id", Event.generate_node_id(kwargs["project"].id, event_id)) event = Event(event_id=event_id, group=group, **kwargs) # emulate EventManager refs event.data.bind_ref(event) event.save() event.data.save() return event
def _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project): from sentry.lang.native.processing import should_process_with_symbolicator if cache_key and data is None: data = default_cache.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "pre" }, skip_internal=False) error_logger.error("preprocess.failed.empty", extra={"cache_key": cache_key}) return original_data = data data = CanonicalKeyDict(data) project_id = data["project"] set_current_project(project_id) if project is None: project = Project.objects.get_from_cache(id=project_id) else: assert project.id == project_id, (project.id, project_id) from_reprocessing = process_task is process_event_from_reprocessing new_process_behavior = bool( options.get("sentry:preprocess-use-new-behavior", False)) metrics.incr("tasks.store.preprocess_event.new_process_behavior", tags={"value": new_process_behavior}) if new_process_behavior and should_process_with_symbolicator(data): submit_symbolicate(project, from_reprocessing, cache_key, event_id, start_time, original_data) return if should_process(data): submit_process( project, from_reprocessing, cache_key, event_id, start_time, original_data, data_has_changed=False, new_process_behavior=new_process_behavior, ) return submit_save_event(project, cache_key, event_id, start_time, original_data)
def test_legacy(self): assert CanonicalKeyDict( { 'release': 'asdf', 'sentry.interfaces.Exception': { 'type': 'DemoException' }, 'sentry.interfaces.User': { 'id': 'DemoUser' }, }, legacy=True) == self.canonical_data
def create_event(self, event_id=None, **kwargs): if event_id is None: event_id = uuid4().hex if 'group' not in kwargs: kwargs['group'] = self.group kwargs.setdefault('project', kwargs['group'].project) kwargs.setdefault('data', copy.deepcopy(DEFAULT_EVENT_DATA)) kwargs.setdefault('platform', kwargs['data'].get('platform', 'python')) kwargs.setdefault('message', kwargs['data'].get('message', 'message')) if kwargs.get('tags'): tags = kwargs.pop('tags') if isinstance(tags, dict): tags = list(tags.items()) kwargs['data']['tags'] = tags if kwargs.get('stacktrace'): stacktrace = kwargs.pop('stacktrace') kwargs['data']['sentry.interfaces.Stacktrace'] = stacktrace kwargs['data'].setdefault( 'errors', [{ 'type': EventError.INVALID_DATA, 'name': 'foobar', }] ) # maintain simple event fixtures by supporting the legacy message # parameter just like our API would if 'sentry.interfaces.Message' not in kwargs['data']: kwargs['data']['sentry.interfaces.Message'] = { 'message': kwargs.get('message') or '<unlabeled event>', } if 'type' not in kwargs['data']: kwargs['data'].update( { 'type': 'default', 'metadata': { 'title': kwargs['data']['sentry.interfaces.Message']['message'], }, } ) kwargs['data'] = CanonicalKeyDict(kwargs.pop('data')) event = Event(event_id=event_id, **kwargs) EventMapping.objects.create( project_id=event.project.id, event_id=event_id, group=event.group, ) # emulate EventManager refs event.data.bind_ref(event) event.save() return event
def _normalize_impl(self): if self._normalized: raise RuntimeError("Already normalized") self._normalized = True from semaphore.processing import StoreNormalizer rust_normalizer = StoreNormalizer( project_id=self._project.id if self._project else None, client_ip=self._client_ip, client=self._auth.client if self._auth else None, key_id=six.text_type(self._key.id) if self._key else None, grouping_config=self._grouping_config, protocol_version=six.text_type(self.version) if self.version is not None else None, is_renormalize=self._is_renormalize, remove_other=self._remove_other, normalize_user_agent=True, **DEFAULT_STORE_NORMALIZER_ARGS ) self._data = CanonicalKeyDict(rust_normalizer.normalize_event(dict(self._data)))
def _decode_event(data, content_encoding): if isinstance(data, six.binary_type): if content_encoding == "gzip": data = decompress_gzip(data) elif content_encoding == "deflate": data = decompress_deflate(data) elif data[0] != b"{": data = decode_and_decompress_data(data) else: data = decode_data(data) if isinstance(data, six.text_type): data = safely_load_json_string(data) return CanonicalKeyDict(data)
def __init__(self, data, skip_renormalization=False, **kwargs): is_renormalized = (isinstance(data, EventDict) or (isinstance(data, NodeData) and isinstance(data.data, EventDict))) with configure_scope() as scope: scope.set_tag("rust.is_renormalized", is_renormalized) scope.set_tag("rust.skip_renormalization", skip_renormalization) scope.set_tag("rust.renormalized", "null") if not skip_renormalization and not is_renormalized: rust_renormalized = _should_skip_to_python(data.get('event_id')) if rust_renormalized: normalizer = StoreNormalizer(is_renormalize=True) data = normalizer.normalize_event(dict(data)) metrics.incr('rust.renormalized', tags={'value': rust_renormalized}) with configure_scope() as scope: scope.set_tag("rust.renormalized", rust_renormalized) CanonicalKeyDict.__init__(self, data, **kwargs)
def __init__(self, data, skip_renormalization=False, **kwargs): is_renormalized = ( isinstance(data, EventDict) or (isinstance(data, NodeData) and isinstance(data.data, EventDict)) ) with configure_scope() as scope: scope.set_tag("rust.is_renormalized", is_renormalized) scope.set_tag("rust.skip_renormalization", skip_renormalization) scope.set_tag("rust.renormalized", "null") if not skip_renormalization and not is_renormalized: rust_renormalized = _should_skip_to_python(data.get('event_id')) if rust_renormalized: normalizer = StoreNormalizer(is_renormalize=True) data = normalizer.normalize_event(dict(data)) metrics.incr('rust.renormalized', tags={'value': rust_renormalized}) with configure_scope() as scope: scope.set_tag("rust.renormalized", rust_renormalized) CanonicalKeyDict.__init__(self, data, **kwargs)
def _do_process_event(cache_key, start_time, event_id, process_task): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'}) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project = data['project'] Raven.tags_context({ 'project': project, }) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project) # Stacktrace based event processors. These run before anything else. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. process_task.delay(cache_key, start_time=start_time, event_id=event_id) return # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) default_cache.set(cache_key, data, 3600) save_event.delay( cache_key=cache_key, data=None, start_time=start_time, event_id=event_id, project_id=project )
def save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas, tsdb from sentry.models import ProjectKey if cache_key: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'}) return Raven.tags_context({ 'project': project_id, }) try: manager = EventManager(data) event = manager.save(project_id) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) except HashDiscarded: increment_list = [ (tsdb.models.project_total_received_discarded, project_id), ] try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: pass else: increment_list.extend([ (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_blacklisted, project.organization_id), ]) project_key = None if data.get('key_id') is not None: try: project_key = ProjectKey.objects.get_from_cache(id=data['key_id']) except ProjectKey.DoesNotExist: pass else: increment_list.append((tsdb.models.key_total_blacklisted, project_key.id)) quotas.refund( project, key=project_key, timestamp=start_time, ) tsdb.incr_multi( increment_list, timestamp=to_datetime(start_time) if start_time is not None else None, ) finally: if cache_key: default_cache.delete(cache_key) attachment_cache.delete(cache_key) if start_time: metrics.timing( 'events.time-to-process', time() - start_time, instance=data['platform'])
def load_data(platform, default=None, sample_name=None): # NOTE: Before editing this data, make sure you understand the context # in which its being used. It is NOT only used for local development and # has production consequences. # * bin/load-mocks to generate fake data for local testing # * When a new project is created, a fake event is generated as a "starter" # event so it's not an empty project. # * When a user clicks Test Configuration from notification plugin settings page, # a fake event is generated to go through the pipeline. data = None language = None platform_data = INTEGRATION_ID_TO_PLATFORM_DATA.get(platform) if platform_data is not None and platform_data['type'] != 'language': language = platform_data['language'] for platform in (platform, language, default): if not platform: continue json_path = os.path.join(DATA_ROOT, 'samples', '%s.json' % (platform.encode('utf-8'), )) if not os.path.exists(json_path): continue if not sample_name: try: sample_name = INTEGRATION_ID_TO_PLATFORM_DATA[platform]['name'] except KeyError: pass with open(json_path) as fp: data = json.loads(fp.read()) break if data is None: return data = CanonicalKeyDict(data) if platform in ('csp', 'hkpk', 'expectct', 'expectstaple'): return data data['platform'] = platform # XXX: Message is a legacy alias for logentry. Do not overwrite if set. if 'message' not in data: data['message'] = 'This is an example %s exception' % (sample_name or platform, ) data.setdefault('user', generate_user( ip_address='127.0.0.1', username='******', id=1, email='*****@*****.**', )) data.setdefault('extra', { 'session': { 'foo': 'bar', }, 'results': [1, 2, 3, 4, 5], 'emptyList': [], 'emptyMap': {}, 'length': 10837790, 'unauthorized': False, 'url': 'http://example.org/foo/bar/', }) data.setdefault('modules', { 'my.package': '1.0.0', }) data.setdefault('request', { "cookies": 'foo=bar;biz=baz', "url": "http://example.com/foo", "headers": { "Referer": "http://example.com", "Content-Type": "application/json", "User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36" }, "env": { 'ENV': 'prod', }, "query_string": "foo=bar", "data": '{"hello": "world"}', "method": "GET" }) return data
class EventManager(object): """ Handles normalization in both the store endpoint and the save task. The intention is to swap this class out with a reimplementation in Rust. """ def __init__( self, data, version='5', project=None, grouping_config=None, client_ip=None, user_agent=None, auth=None, key=None, content_encoding=None, is_renormalize=False, remove_other=None ): self._data = _decode_event(data, content_encoding=content_encoding) self.version = version self._project = project if grouping_config is None and project is not None: grouping_config = get_grouping_config_dict_for_project(self._project) self._grouping_config = grouping_config self._client_ip = client_ip self._user_agent = user_agent self._auth = auth self._key = key self._is_renormalize = is_renormalize self._remove_other = remove_other self._normalized = False def process_csp_report(self): """Only called from the CSP report endpoint.""" data = self._data try: interface = get_interface(data.pop('interface')) report = data.pop('report') except KeyError: raise APIForbidden('No report or interface data') # To support testing, we can either accept a built interface instance, or the raw data in # which case we build the instance ourselves try: instance = ( report if isinstance(report, interface) else interface.from_raw(report) ) except jsonschema.ValidationError as e: raise APIError('Invalid security report: %s' % str(e).splitlines()[0]) def clean(d): return dict(filter(lambda x: x[1], d.items())) data.update( { 'logger': 'csp', 'message': instance.get_message(), 'culprit': instance.get_culprit(), instance.path: instance.to_json(), 'tags': instance.get_tags(), 'errors': [], 'user': {'ip_address': self._client_ip}, # Construct a faux Http interface based on the little information we have # This is a bit weird, since we don't have nearly enough # information to create an Http interface, but # this automatically will pick up tags for the User-Agent # which is actually important here for CSP 'request': { 'url': instance.get_origin(), 'headers': clean( { 'User-Agent': self._user_agent, 'Referer': instance.get_referrer(), } ), }, } ) self._data = data def normalize(self): with metrics.timer('events.store.normalize.duration'): self._normalize_impl() metrics.timing( 'events.store.normalize.errors', len(self._data.get("errors") or ()), ) def _normalize_impl(self): if self._normalized: raise RuntimeError('Already normalized') self._normalized = True from semaphore.processing import StoreNormalizer rust_normalizer = StoreNormalizer( geoip_lookup=rust_geoip, project_id=self._project.id if self._project else None, client_ip=self._client_ip, client=self._auth.client if self._auth else None, key_id=six.text_type(self._key.id) if self._key else None, grouping_config=self._grouping_config, protocol_version=six.text_type(self.version) if self.version is not None else None, stacktrace_frames_hard_limit=settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT, max_stacktrace_frames=settings.SENTRY_MAX_STACKTRACE_FRAMES, valid_platforms=list(VALID_PLATFORMS), max_secs_in_future=MAX_SECS_IN_FUTURE, max_secs_in_past=MAX_SECS_IN_PAST, enable_trimming=True, is_renormalize=self._is_renormalize, remove_other=self._remove_other, ) self._data = CanonicalKeyDict( rust_normalizer.normalize_event(dict(self._data)) ) normalize_user_agent(self._data) def should_filter(self): ''' returns (result: bool, reason: string or None) Result is True if an event should be filtered The reason for filtering is passed along as a string so that we can store it in metrics ''' for name in SECURITY_REPORT_INTERFACES: if name in self._data: interface = get_interface(name) if interface.to_python(self._data[name]).should_filter(self._project): return (True, FilterStatKeys.INVALID_CSP) if self._client_ip and not is_valid_ip(self._project, self._client_ip): return (True, FilterStatKeys.IP_ADDRESS) release = self._data.get('release') if release and not is_valid_release(self._project, release): return (True, FilterStatKeys.RELEASE_VERSION) error_message = get_path(self._data, 'logentry', 'formatted') \ or get_path(self._data, 'logentry', 'message') \ or '' if error_message and not is_valid_error_message(self._project, error_message): return (True, FilterStatKeys.ERROR_MESSAGE) for exc in get_path(self._data, 'exception', 'values', filter=True, default=[]): message = u': '.join( filter(None, map(exc.get, ['type', 'value'])) ) if message and not is_valid_error_message(self._project, message): return (True, FilterStatKeys.ERROR_MESSAGE) for filter_cls in filters.all(): filter_obj = filter_cls(self._project) if filter_obj.is_enabled() and filter_obj.test(self._data): return (True, six.text_type(filter_obj.id)) return (False, None) def get_data(self): return self._data def _get_event_instance(self, project_id=None): data = self._data event_id = data.get('event_id') platform = data.get('platform') recorded_timestamp = data.get('timestamp') date = datetime.fromtimestamp(recorded_timestamp) date = date.replace(tzinfo=timezone.utc) time_spent = data.get('time_spent') data['node_id'] = Event.generate_node_id(project_id, event_id) return Event( project_id=project_id or self._project.id, event_id=event_id, data=EventDict(data, skip_renormalization=True), time_spent=time_spent, datetime=date, platform=platform ) def get_culprit(self): """Helper to calculate the default culprit""" return force_text( self._data.get('culprit') or self._data.get('transaction') or generate_culprit(self._data) or '' ) def get_event_type(self): """Returns the event type.""" return eventtypes.get(self._data.get('type', 'default'))() def materialize_metadata(self): """Returns the materialized metadata to be merged with group or event data. This currently produces the keys `type`, `metadata`, `title` and `location`. This should most likely also produce `culprit` here. """ event_type = self.get_event_type() event_metadata = event_type.get_metadata(self._data) return { 'type': event_type.key, 'metadata': event_metadata, 'title': event_type.get_title(event_metadata), 'location': event_type.get_location(event_metadata), } def get_search_message(self, event_metadata=None, culprit=None): """This generates the internal event.message attribute which is used for search purposes. It adds a bunch of data from the metadata and the culprit. """ if event_metadata is None: event_metadata = self.get_event_type().get_metadata(self._data) if culprit is None: culprit = self.get_culprit() data = self._data message = '' if data.get('logentry'): message += (data['logentry'].get('formatted') or data['logentry'].get('message') or '') if event_metadata: for value in six.itervalues(event_metadata): value_u = force_text(value, errors='replace') if value_u not in message: message = u'{} {}'.format(message, value_u) if culprit and culprit not in message: culprit_u = force_text(culprit, errors='replace') message = u'{} {}'.format(message, culprit_u) return trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) def save(self, project_id, raw=False, assume_normalized=False): # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize() self._normalized = True data = self._data project = Project.objects.get_from_cache(id=project_id) project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) # Check to make sure we're not about to do a bunch of work that's # already been done if we've processed an event with this ID. (This # isn't a perfect solution -- this doesn't handle ``EventMapping`` and # there's a race condition between here and when the event is actually # saved, but it's an improvement. See GH-7677.) try: event = Event.objects.get( project_id=project.id, event_id=data['event_id'], ) except Event.DoesNotExist: pass else: # Make sure we cache on the project before returning event._project_cache = project logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': data['event_id'], 'project_id': project.id, 'model': Event.__name__, } ) return event # Pull out the culprit culprit = self.get_culprit() # Pull the toplevel data we're interested in level = data.get('level') # TODO(mitsuhiko): this code path should be gone by July 2018. # This is going to be fine because no code actually still depends # on integers here. When we need an integer it will be converted # into one later. Old workers used to send integers here. if level is not None and isinstance(level, six.integer_types): level = LOG_LEVELS[level] transaction_name = data.get('transaction') logger_name = data.get('logger') release = data.get('release') dist = data.get('dist') environment = data.get('environment') recorded_timestamp = data.get('timestamp') # We need to swap out the data with the one internal to the newly # created event object event = self._get_event_instance(project_id=project_id) self._data = data = event.data.data event._project_cache = project date = event.datetime platform = event.platform event_id = event.event_id if transaction_name: transaction_name = force_text(transaction_name) # Some of the data that are toplevel attributes are duplicated # into tags (logger, level, environment, transaction). These are # different from legacy attributes which are normalized into tags # ahead of time (site, server_name). setdefault_path(data, 'tags', value=[]) set_tag(data, 'level', level) if logger_name: set_tag(data, 'logger', logger_name) if environment: set_tag(data, 'environment', environment) if transaction_name: set_tag(data, 'transaction', transaction_name) if release: # dont allow a conflicting 'release' tag pop_tag(data, 'release') release = Release.get_or_create( project=project, version=release, date_added=date, ) set_tag(data, 'sentry:release', release.version) if dist and release: dist = release.add_dist(dist, date) # dont allow a conflicting 'dist' tag pop_tag(data, 'dist') set_tag(data, 'sentry:dist', dist.name) else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag pop_tag(data, 'user') set_tag(data, 'sentry:user', event_user.tag_value) # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(data, project)) normalize_stacktraces_for_grouping(data, grouping_config) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: if get_tag(data, key) is None: set_tag(data, key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): set_tag(data, k, v) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.path, None) # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right paramters. data['fingerprint'] = data.get('fingerprint') or ['{{ default }}'] apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project)) hashes = event.get_hashes() data['hashes'] = hashes # we want to freeze not just the metadata and type in but also the # derived attributes. The reason for this is that we push this # data into kafka for snuba processing and our postprocessing # picks up the data right from the snuba topic. For most usage # however the data is dynamically overriden by Event.title and # Event.location (See Event.as_dict) materialized_metadata = self.materialize_metadata() event_metadata = materialized_metadata['metadata'] data.update(materialized_metadata) data['culprit'] = culprit # index components into ``Event.message`` # See GH-3248 event.message = self.get_search_message(event_metadata, culprit) received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s')) # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(materialized_metadata) group_metadata['last_received'] = received_timestamp kwargs = { 'platform': platform, 'message': event.message, 'culprit': culprit, 'logger': logger_name, 'level': LOG_LEVELS_MAP.get(level), 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': group_metadata, } if release: kwargs['first_release'] = release try: group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **kwargs ) except HashDiscarded: event_discarded.send_robust( project=project, sender=EventManager, ) metrics.incr( 'events.discarded', skip_internal=True, tags={ 'organization_id': project.organization_id, 'platform': platform, }, ) raise else: event_saved.send_robust( project=project, event_size=event.size, sender=EventManager, ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) # When an event was sampled, the canonical source of truth # is the EventMapping table since we aren't going to be writing out an actual # Event row. Otherwise, if the Event isn't being sampled, we can safely # rely on the Event table itself as the source of truth and ignore # EventMapping since it's redundant information. if is_sample: try: with transaction.atomic(using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': EventMapping.__name__, } ) return event environment = Environment.get_or_create( project=project, name=environment, ) group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={ 'first_release': release if release else None, }, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) ReleaseProjectEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append( (tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, }) ) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update( group=group, environment=environment, ) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': Event.__name__, } ) return event tagstore.delay_index_event_tags( organization_id=project.organization_id, project_id=project.id, group_id=group.id, environment_id=environment.id, event_id=event.id, tags=event.tags, date_added=event.datetime, ) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )), ), timestamp=event.datetime, environment_id=environment.id, ) if release: if is_new: buffer.incr( ReleaseProject, {'new_groups': 1}, { 'release_id': release.id, 'project_id': project.id, } ) if is_new_group_environment: buffer.incr( ReleaseProjectEnvironment, {'new_issues_count': 1}, { 'project_id': project.id, 'release_id': release.id, 'environment_id': environment.id, } ) safe_execute( Group.objects.add_tags, group, environment, event.get_tags(), _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send_robust(project=project, group=group, sender=Project) eventstream.insert( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], # We are choosing to skip consuming the event back # in the eventstream if it's flagged as raw. # This means that we want to publish the event # through the event stream, but we don't care # about post processing and handling the commit. skip_consume=raw, ) metrics.timing( 'events.latency', received_timestamp - recorded_timestamp, tags={ 'project_id': project.id, }, ) metrics.timing( 'events.size.data.post_save', event.size, tags={'project_id': project.id} ) return event def _get_event_user(self, project, data): user_data = data.get('user') if not user_data: return euser = EventUser( project_id=project.id, ident=user_data.get('id'), email=user_data.get('email'), username=user_data.get('username'), ip_address=user_data.get('ip_address'), name=user_data.get('name'), ) euser.set_hash() if not euser.hash: return cache_key = u'euserid:1:{}:{}'.format( project.id, euser.hash, ) euser_id = default_cache.get(cache_key) if euser_id is None: try: with transaction.atomic(using=router.db_for_write(EventUser)): euser.save() except IntegrityError: try: euser = EventUser.objects.get( project_id=project.id, hash=euser.hash, ) except EventUser.DoesNotExist: # why??? e_userid = -1 else: if euser.name != (user_data.get('name') or euser.name): euser.update( name=user_data['name'], ) e_userid = euser.id default_cache.set(cache_key, e_userid, 3600) return euser def _find_hashes(self, project, hash_list): return map( lambda hash: GroupHash.objects.get_or_create( project=project, hash=hash, )[0], hash_list, ) def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) existing_group_id = None for h in all_hashes: if h.group_id is not None: existing_group_id = h.group_id break if h.group_tombstone_id is not None: raise HashDiscarded('Matches group tombstone %s' % h.group_tombstone_id) # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: # it's possible the release was deleted between # when we queried for the release and now, so # make sure it still exists first_release = kwargs.pop('first_release', None) with transaction.atomic(): short_id = project.next_short_id() group, group_is_new = Group.objects.create( project=project, short_id=short_id, first_release_id=Release.objects.filter( id=first_release.id, ).values_list('id', flat=True).first() if first_release else None, **kwargs ), True metrics.incr( 'group.created', skip_internal=True, tags={'platform': event.platform or 'unknown'} ) else: group = Group.objects.get(id=existing_group_id) group_is_new = False # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h for h in all_hashes if h.group_id is None] if new_hashes: # XXX: There is a race condition here wherein another process could # create a new group that is associated with one of the new hashes, # add some event(s) to it, and then subsequently have the hash # "stolen" by this process. This then "orphans" those events from # their "siblings" in the group we've created here. We don't have a # way to fix this, since we can't update the group on those hashes # without filtering on `group_id` (which we can't do due to query # planner weirdness.) For more context, see 84c6f75a and d0e22787, # as well as GH-5085. GroupHash.objects.filter( id__in=[h.id for h in new_hashes], ).exclude( state=GroupHash.State.LOCKED_IN_MIGRATION, ).update(group=group) if group_is_new and len(new_hashes) == len(all_hashes): is_new = True # XXX(dcramer): it's important this gets called **before** the aggregate # is processed as otherwise values like last_seen will get mutated can_sample = ( features.has('projects:sample-events', project=project) and should_sample( event.data.get('received') or float(event.datetime.strftime('%s')), group.data.get('last_received') or float(group.last_seen.strftime('%s')), group.times_seen, ) ) if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release, ) else: is_regression = False # Determine if we've sampled enough data to store this event if is_new or is_regression: is_sample = False else: is_sample = can_sample if not is_sample: GroupHash.record_last_processed_event_id( all_hashes[0].id, event.event_id, ) return group, is_new, is_regression, is_sample def _handle_regression(self, group, event, release): if not group.is_resolved(): return # we only mark it as a regression if the event's release is newer than # the release which we originally marked this as resolved elif GroupResolution.has_resolution(group, release): return elif has_pending_commit_resolution(group): return if not plugin_is_regression(group, event): return # we now think its a regression, rely on the database to validate that # no one beat us to this date = max(event.datetime, group.last_seen) is_regression = bool( Group.objects.filter( id=group.id, # ensure we cant update things if the status has been set to # ignored status__in=[GroupStatus.RESOLVED, GroupStatus.UNRESOLVED], ).exclude( # add to the regression window to account for races here active_at__gte=date - timedelta(seconds=5), ).update( active_at=date, # explicitly set last_seen here as ``is_resolved()`` looks # at the value last_seen=date, status=GroupStatus.UNRESOLVED ) ) group.active_at = date group.status = GroupStatus.UNRESOLVED if is_regression and release: # resolutions are only valid if the state of the group is still # resolved -- if it were to change the resolution should get removed try: resolution = GroupResolution.objects.get( group=group, ) except GroupResolution.DoesNotExist: affected = False else: cursor = connection.cursor() # delete() API does not return affected rows cursor.execute("DELETE FROM sentry_groupresolution WHERE id = %s", [resolution.id]) affected = cursor.rowcount > 0 if affected: # if we had to remove the GroupResolution (i.e. we beat the # the queue to handling this) then we need to also record # the corresponding event try: activity = Activity.objects.filter( group=group, type=Activity.SET_RESOLVED_IN_RELEASE, ident=resolution.id, ).order_by('-datetime')[0] except IndexError: # XXX: handle missing data, as its not overly important pass else: activity.update(data={ 'version': release.version, }) if is_regression: activity = Activity.objects.create( project=group.project, group=group, type=Activity.SET_REGRESSION, data={ 'version': release.version if release else '', } ) activity.send_notification() kick_off_status_syncs.apply_async(kwargs={ 'project_id': group.project_id, 'group_id': group.id, }) return is_regression def _process_existing_aggregate(self, group, event, data, release): date = max(event.datetime, group.last_seen) extra = { 'last_seen': date, 'score': ScoreClause(group), 'data': data['data'], } if event.message and event.message != group.message: extra['message'] = event.message if group.level != data['level']: extra['level'] = data['level'] if group.culprit != data['culprit']: extra['culprit'] = data['culprit'] is_regression = self._handle_regression(group, event, release) group.last_seen = extra['last_seen'] update_kwargs = { 'times_seen': 1, } buffer.incr(Group, update_kwargs, { 'id': group.id, }, extra) return is_regression
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') key_id = None if data is None else data.get('key_id') if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'post'}, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) event = manager.save(project_id, assume_normalized=True) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id ) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or \ features.has('organizations:event-attachments', event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing( 'events.time-to-process', time() - start_time, instance=data['platform'])
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'process'}, skip_internal=False) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project_id = data['project'] with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) try: # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in (enhancers or ()): enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,)) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and (time() - start_time) > 3600: raise RuntimeError('Event spent one hour in processing') retry_process_event.apply_async( args=(), kwargs={ 'process_task_name': process_task.__name__, 'task_kwargs': { 'cache_key': cache_key, 'event_id': event_id, 'start_time': start_time, } }, countdown=e.retry_after ) return # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project_id, 'Project cannot be mutated by preprocessor' project = Project.objects.get_from_cache(id=project_id) # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)