def test_simple(self, mock_save_event): project = self.create_project() data = { 'project': project.id, 'message': 'test', 'extra': {'foo': 'bar'}, } preprocess_event(data=data) mock_save_event.delay.assert_called_once()
def test_move_to_save_event(default_project, mock_process_event, mock_save_event, register_plugin): register_plugin(BasicPreprocessorPlugin) data = { "project": default_project.id, "platform": "NOTMATTLANG", "logentry": {"formatted": "test"}, "event_id": EVENT_ID, "extra": {"foo": "bar"}, } preprocess_event(data=data) assert mock_process_event.delay.call_count == 0 assert mock_save_event.delay.call_count == 1
def test_simple(self, mock_save_event): project = self.create_project() data = { 'project': project.id, 'message': 'test', 'extra': { 'foo': 'bar' }, } preprocess_event(data=data) assert mock_save_event.delay.call_count == 1
def inner(data): data.setdefault("timestamp", iso_format(before_now(seconds=1))) mgr = EventManager(data=data, project=default_project) mgr.normalize() data = mgr.get_data() event_id = data["event_id"] cache_key = event_processing_store.store(data) with task_runner(): # factories.store_event would almost be suitable for this, but let's # actually run through stacktrace processing once preprocess_event(start_time=time(), cache_key=cache_key, data=data) return event_id
def test_move_to_save_event(self, mock_process_event, mock_save_event): project = self.create_project() data = { 'project': project.id, 'platform': 'NOTMATTLANG', 'message': 'test', 'extra': {'foo': 'bar'}, } preprocess_event(data=data) assert mock_process_event.delay.call_count == 0 assert mock_save_event.delay.call_count == 1
def test_move_to_save_event(self, mock_process_event, mock_save_event): project = self.create_project() data = { 'project': project.id, 'platform': 'NOTMATTLANG', 'message': 'test', 'extra': { 'foo': 'bar' }, } preprocess_event(data=data) assert mock_process_event.delay.call_count == 0 assert mock_save_event.delay.call_count == 1
def test_move_to_symbolicate_event( default_project, mock_process_event, mock_save_event, mock_symbolicate_event, register_plugin ): register_plugin(globals(), BasicPreprocessorPlugin) data = { "project": default_project.id, "platform": "native", "logentry": {"formatted": "test"}, "event_id": EVENT_ID, "extra": {"foo": "bar"}, } preprocess_event(cache_key="", data=data) assert mock_symbolicate_event.delay.call_count == 1 assert mock_process_event.delay.call_count == 0 assert mock_save_event.delay.call_count == 0
def test_move_to_process_event(self, mock_process_event, mock_save_event): project = self.create_project() data = { 'project': project.id, 'event_id': uuid.uuid4().hex, 'platform': 'mattlang', 'message': 'test', 'extra': { 'foo': 'bar' }, } preprocess_event(data=data) assert mock_process_event.delay.call_count == 1 assert mock_save_event.delay.call_count == 0
def dispatch_task(cache_key: str) -> None: if attachments: with sentry_sdk.start_span( op="ingest_consumer.set_attachment_cache"): attachment_objects = [ CachedAttachment(type=attachment.pop("attachment_type"), **attachment) for attachment in attachments ] attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) if data.get("type") == "transaction": # No need for preprocess/process for transactions thus submit # directly transaction specific save_event task. save_event_transaction.delay( cache_key=cache_key, data=None, start_time=start_time, event_id=event_id, project_id=project_id, ) else: # Preprocess this event, which spawns either process_event or # save_event. Pass data explicitly to avoid fetching it again from the # cache. with sentry_sdk.start_span( op="ingest_consumer.process_event.preprocess_event"): preprocess_event( cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project=project, ) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", CACHE_TIMEOUT) # emit event_accepted once everything is done event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event)
def test_move_to_process_event(self, mock_process_event, mock_save_event): project = self.create_project() data = { 'project': project.id, 'platform': 'mattlang', 'logentry': { 'formatted': 'test', }, 'extra': { 'foo': 'bar' }, } preprocess_event(data=data) assert mock_process_event.delay.call_count == 1 assert mock_save_event.delay.call_count == 0
def test_move_to_save_event(self, mock_process_event, mock_save_event): project = self.create_project() data = { "project": project.id, "platform": "NOTMATTLANG", "logentry": { "formatted": "test" }, "extra": { "foo": "bar" }, } preprocess_event(data=data) assert mock_process_event.delay.call_count == 0 assert mock_save_event.delay.call_count == 1
def test_move_to_symbolicate_event( default_project, mock_process_event, mock_save_event, mock_symbolicate_event, register_plugin ): register_plugin(BasicPreprocessorPlugin) data = { "project": default_project.id, "platform": "native", "logentry": {"formatted": "test"}, "event_id": EVENT_ID, "extra": {"foo": "bar"}, } options.set("sentry:preprocess-use-new-behavior", True) preprocess_event(data=data) assert mock_symbolicate_event.delay.call_count == 1 assert mock_process_event.delay.call_count == 0 assert mock_save_event.delay.call_count == 0
def inner(data, seconds_ago=1): # Set platform to native so all parts of reprocessing fire, symbolication will # not happen without this set to certain values data.setdefault("platform", "native") # Every request to snuba has a timestamp that's clamped in a curious way to # ensure data consistency data.setdefault("timestamp", iso_format(before_now(seconds=seconds_ago))) mgr = EventManager(data=data, project=default_project) mgr.normalize() data = mgr.get_data() event_id = data["event_id"] cache_key = event_processing_store.store(data) with task_runner(): # factories.store_event would almost be suitable for this, but let's # actually run through stacktrace processing once preprocess_event(start_time=time(), cache_key=cache_key, data=data) return event_id
def test_simple(self, mock_save_event): project = self.create_project() data = { 'project': project.id, 'message': 'test', 'extra': { 'foo': 'bar' }, } preprocess_event(data=data) mock_save_event.delay.assert_called_once_with( cache_key=None, data={ 'project': project.id, 'message': 'test', }, )
def test_move_to_symbolicate_event_low_priority( default_project, mock_process_event, mock_save_event, mock_symbolicate_event, mock_symbolicate_event_low_priority, register_plugin, ): with override_options({"store.symbolicate-event-lpq-always": [default_project.id]}): register_plugin(globals(), BasicPreprocessorPlugin) data = { "project": default_project.id, "platform": "native", "logentry": {"formatted": "test"}, "event_id": EVENT_ID, "extra": {"foo": "bar"}, } preprocess_event(cache_key="", data=data) assert mock_symbolicate_event_low_priority.delay.call_count == 1 assert mock_symbolicate_event.delay.call_count == 0 assert mock_process_event.delay.call_count == 0 assert mock_save_event.delay.call_count == 0
def test_basic(task_runner, default_project, register_plugin, change_groups, reset_snuba, change_stacktrace): # Replace this with an int and nonlocal when we have Python 3 abs_count = [] def event_preprocessor(data): tags = data.setdefault("tags", []) assert all(not x or x[0] != "processing_counter" for x in tags) tags.append(("processing_counter", "x{}".format(len(abs_count)))) abs_count.append(None) if change_stacktrace and len(abs_count) > 0: data["exception"] = { "values": [{ "type": "ZeroDivisionError", "stacktrace": { "frames": [{ "function": "foo" }] } }] } if change_groups: data["fingerprint"] = [uuid.uuid4().hex] else: data["fingerprint"] = ["foo"] return data class ReprocessingTestPlugin(Plugin2): def get_event_preprocessors(self, data): return [event_preprocessor] def is_enabled(self, project=None): return True register_plugin(globals(), ReprocessingTestPlugin) mgr = EventManager( data={ "timestamp": iso_format(before_now(seconds=1)), "tags": [["key1", "value"], None, ["key2", "value"]], }, project=default_project, ) mgr.normalize() data = mgr.get_data() event_id = data["event_id"] cache_key = event_processing_store.store(data) def get_event_by_processing_counter(n): return list( eventstore.get_events( eventstore.Filter( project_ids=[default_project.id], conditions=[["tags[processing_counter]", "=", n]], ))) with task_runner(), Feature({"projects:reprocessing-v2": True}): # factories.store_event would almost be suitable for this, but let's # actually run through stacktrace processing once preprocess_event(start_time=time(), cache_key=cache_key, data=data) event = eventstore.get_event_by_id(default_project.id, event_id) assert event.get_tag("processing_counter") == "x0" assert not event.data.get("errors") assert get_event_by_processing_counter("x0")[0].event_id == event.event_id old_event = event with task_runner(), Feature({"projects:reprocessing-v2": True}): reprocess_group(default_project.id, event.group_id) new_events = get_event_by_processing_counter("x1") if not change_stacktrace: assert not new_events else: (event, ) = new_events # Assert original data is used assert event.get_tag("processing_counter") == "x1" assert not event.data.get("errors") if change_groups: assert event.group_id != old_event.group_id else: assert event.group_id == old_event.group_id assert event.get_tag("original_event_id") == old_event.event_id assert int(event.get_tag("original_group_id")) == old_event.group_id
def process_event(message, projects): payload = message["payload"] start_time = float(message["start_time"]) event_id = message["event_id"] project_id = int(message["project_id"]) remote_addr = message.get("remote_addr") attachments = message.get("attachments") or () # check that we haven't already processed this event (a previous instance of the forwarder # died before it could commit the event queue offset) deduplication_key = "ev:{}:{}".format(project_id, event_id) if cache.get(deduplication_key) is not None: logger.warning( "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.", event_id, project_id, ) return # message already processed do not reprocess try: project = projects[project_id] except KeyError: logger.error("Project for ingested event does not exist: %s", project_id) return # Parse the JSON payload. This is required to compute the cache key and # call process_event. The payload will be put into Kafka raw, to avoid # serializing it again. # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event # which assumes that data passed in is a raw dictionary. data = json.loads(payload) cache_key = cache_key_for_event(data) default_cache.set(cache_key, data, CACHE_TIMEOUT) if attachments: attachment_objects = [ CachedAttachment(type=attachment.pop("attachment_type"), **attachment) for attachment in attachments ] attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) # Preprocess this event, which spawns either process_event or # save_event. Pass data explicitly to avoid fetching it again from the # cache. preprocess_event(cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project=project) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", CACHE_TIMEOUT) # emit event_accepted once everything is done event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event)
def _do_process_event(message, projects): payload = message["payload"] start_time = float(message["start_time"]) event_id = message["event_id"] project_id = int(message["project_id"]) remote_addr = message.get("remote_addr") attachments = message.get("attachments") or () # check that we haven't already processed this event (a previous instance of the forwarder # died before it could commit the event queue offset) # # XXX(markus): I believe this code is extremely broken: # # * it practically uses memcached in prod which has no consistency # guarantees (no idea how we don't run into issues there) # # * a TTL of 1h basically doesn't guarantee any deduplication at all. It # just guarantees a good error message... for one hour. # # This code has been ripped from the old python store endpoint. We're # keeping it around because it does provide some protection against # reprocessing good events if a single consumer is in a restart loop. deduplication_key = f"ev:{project_id}:{event_id}" if cache.get(deduplication_key) is not None: logger.warning( "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.", event_id, project_id, ) return # message already processed do not reprocess try: project = projects[project_id] except KeyError: logger.error("Project for ingested event does not exist: %s", project_id) return # Parse the JSON payload. This is required to compute the cache key and # call process_event. The payload will be put into Kafka raw, to avoid # serializing it again. # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event # which assumes that data passed in is a raw dictionary. data = json.loads(payload) cache_key = event_processing_store.store(data) if attachments: attachment_objects = [ CachedAttachment(type=attachment.pop("attachment_type"), **attachment) for attachment in attachments ] attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) # Preprocess this event, which spawns either process_event or # save_event. Pass data explicitly to avoid fetching it again from the # cache. with sentry_sdk.start_span(op="ingest_consumer.process_event.preprocess_event"): preprocess_event( cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project=project, ) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", CACHE_TIMEOUT) # emit event_accepted once everything is done event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event)