Exemplo n.º 1
0
    def test_attachment_outcomes(self):
        manager = EventManager(make_event(message="foo"), project=self.project)
        manager.normalize()

        a1 = CachedAttachment(name="a1", data=b"hello")
        a2 = CachedAttachment(name="a2", data=b"limited", rate_limited=True)
        a3 = CachedAttachment(name="a3", data=b"world")

        cache_key = cache_key_for_event(manager.get_data())
        attachment_cache.set(cache_key, attachments=[a1, a2, a3])

        mock_track_outcome = mock.Mock()
        with mock.patch("sentry.event_manager.track_outcome", mock_track_outcome):
            with self.feature("organizations:event-attachments"):
                manager.save(1, cache_key=cache_key)

        assert mock_track_outcome.call_count == 3

        for o in mock_track_outcome.mock_calls:
            assert o.kwargs["outcome"] == Outcome.ACCEPTED

        for o in mock_track_outcome.mock_calls[:2]:
            assert o.kwargs["category"] == DataCategory.ATTACHMENT
            assert o.kwargs["quantity"] == 5

        final = mock_track_outcome.mock_calls[2]
        assert final.kwargs["category"] == DataCategory.DEFAULT
Exemplo n.º 2
0
def save_unprocessed_event(project, event_id):
    """
    Move event from event_processing_store into nodestore. Only call if event
    has outcome=accepted.
    """
    if not features.has("projects:reprocessing-v2", project, actor=None):
        return

    with sentry_sdk.start_span(
            op=
            "sentry.reprocessing2.save_unprocessed_event.get_unprocessed_event"
    ):
        data = event_processing_store.get(cache_key_for_event({
            "project":
            project.id,
            "event_id":
            event_id
        }),
                                          unprocessed=True)
        if data is None:
            return

    with sentry_sdk.start_span(
            op="sentry.reprocessing2.save_unprocessed_event.set_nodestore"):
        node_id = _generate_unprocessed_event_node_id(project_id=project.id,
                                                      event_id=event_id)
        nodestore.set(node_id, data)
Exemplo n.º 3
0
 def store(self, event: Event, unprocessed: bool = False) -> str:
     with sentry_sdk.start_span(op="eventstore.processing.store"):
         key = cache_key_for_event(event)
         if unprocessed:
             key = self.__get_unprocessed_key(key)
         self.inner.set(key, event, self.timeout)
         return key
Exemplo n.º 4
0
def process_individual_attachment(message, projects):
    event_id = message["event_id"]
    project_id = int(message["project_id"])
    cache_key = cache_key_for_event({
        "event_id": event_id,
        "project": project_id
    })

    try:
        project = projects[project_id]
    except KeyError:
        logger.error("Project for ingested event does not exist: %s",
                     project_id)
        return

    if not features.has("organizations:event-attachments",
                        project.organization,
                        actor=None):
        logger.info("Organization has no event attachments: %s", project_id)
        return

    # Attachments may be uploaded for events that already exist. Fetch the
    # existing group_id, so that the attachment can be fetched by group-level
    # APIs. This is inherently racy.
    events = eventstore.get_unfetched_events(filter=eventstore.Filter(
        event_ids=[event_id], project_ids=[project.id]),
                                             limit=1)

    group_id = None
    if events:
        group_id = events[0].group_id

    attachment = message["attachment"]
    attachment = attachment_cache.get_from_chunks(
        key=cache_key, type=attachment.pop("attachment_type"), **attachment)
    if attachment.type != "event.attachment":
        logger.exception("invalid individual attachment type: %s",
                         attachment.type)
        return

    file = File.objects.create(
        name=attachment.name,
        type=attachment.type,
        headers={"Content-Type": attachment.content_type},
    )

    try:
        data = attachment.data
    except MissingAttachmentChunks:
        logger.exception("Missing chunks for cache_key=%s", cache_key)
        return

    file.putfile(BytesIO(data))
    EventAttachment.objects.create(project_id=project.id,
                                   group_id=group_id,
                                   event_id=event_id,
                                   name=attachment.name,
                                   file=file)

    attachment.delete()
Exemplo n.º 5
0
def insert_data_to_database_legacy(data,
                                   start_time=None,
                                   from_reprocessing=False,
                                   attachments=None):
    """
    Yet another "fast path" to ingest an event without making it go
    through Relay. Please consider using functions from the ingest consumer
    instead, or, if you're within tests, to use `TestCase.store_event`.
    """

    # XXX(markus): Delete this function and merge with ingest consumer logic.

    if start_time is None:
        start_time = time()

    # we might be passed some subclasses of dict that fail dumping
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    cache_timeout = 3600
    cache_key = cache_key_for_event(data)
    default_cache.set(cache_key, data, cache_timeout)

    # Attachments will be empty or None if the "event-attachments" feature
    # is turned off. For native crash reports it will still contain the
    # crash dump (e.g. minidump) so we can load it during processing.
    if attachments is not None:
        attachment_cache.set(cache_key, attachments, cache_timeout)

    task = from_reprocessing and preprocess_event_from_reprocessing or preprocess_event
    task.delay(cache_key=cache_key,
               start_time=start_time,
               event_id=data["event_id"])
Exemplo n.º 6
0
 def _dispatch_post_process_group_task(
     self,
     event,
     is_new,
     is_regression,
     is_new_group_environment,
     primary_hash,
     skip_consume=False,
 ):
     if skip_consume:
         logger.info("post_process.skip.raw_event",
                     extra={"event_id": event.event_id})
     else:
         cache_key = cache_key_for_event({
             "project": event.project_id,
             "event_id": event.event_id
         })
         post_process_group.delay(
             is_new=is_new,
             is_regression=is_regression,
             is_new_group_environment=is_new_group_environment,
             primary_hash=primary_hash,
             cache_key=cache_key,
             group_id=event.group_id,
         )
Exemplo n.º 7
0
    def _dispatch_post_process_group_task(
        self,
        event_id: str,
        project_id: int,
        group_id: Optional[int],
        is_new: bool,
        is_regression: bool,
        is_new_group_environment: bool,
        primary_hash: Optional[str],
        skip_consume: bool = False,
    ) -> None:
        if skip_consume:
            logger.info("post_process.skip.raw_event",
                        extra={"event_id": event_id})
        else:
            cache_key = cache_key_for_event({
                "project": project_id,
                "event_id": event_id
            })

            post_process_group.delay(
                is_new=is_new,
                is_regression=is_regression,
                is_new_group_environment=is_new_group_environment,
                primary_hash=primary_hash,
                cache_key=cache_key,
                group_id=group_id,
            )
Exemplo n.º 8
0
    def insert_data_to_database(self,
                                data,
                                start_time=None,
                                from_reprocessing=False,
                                attachments=None):
        if start_time is None:
            start_time = time()

        # we might be passed some subclasses of dict that fail dumping
        if isinstance(data, CANONICAL_TYPES):
            data = dict(data.items())

        cache_timeout = 3600
        cache_key = cache_key_for_event(data)
        default_cache.set(cache_key, data, cache_timeout)

        # Attachments will be empty or None if the "event-attachments" feature
        # is turned off. For native crash reports it will still contain the
        # crash dump (e.g. minidump) so we can load it during processing.
        if attachments is not None:
            attachment_cache.set(cache_key, attachments, cache_timeout)

        task = from_reprocessing and preprocess_event_from_reprocessing or preprocess_event
        task.delay(cache_key=cache_key,
                   start_time=start_time,
                   event_id=data["event_id"])
Exemplo n.º 9
0
def process_individual_attachment(message):
    event_id = message["event_id"]
    project_id = message["project_id"]
    cache_key = cache_key_for_event({
        "event_id": event_id,
        "project": project_id
    })

    try:
        project = Project.objects.get_from_cache(id=project_id)
    except Project.DoesNotExist:
        logger.error("Project for ingested event does not exist: %s",
                     project_id)
        return

    attachment = message["attachment"]
    attachment = attachment_cache.get_from_chunks(
        key=cache_key, type=attachment.pop("attachment_type"), **attachment)
    assert attachment.type == "event.attachment", attachment.type

    file = File.objects.create(
        name=attachment.name,
        type=attachment.type,
        headers={"Content-Type": attachment.content_type},
    )

    file.putfile(BytesIO(attachment.data))
    EventAttachment.objects.create(project_id=project.id,
                                   event_id=event_id,
                                   name=attachment.name,
                                   file=file)

    attachment.delete()
Exemplo n.º 10
0
    def process_message(self, message):
        message = msgpack.unpackb(message.value(), use_list=False)
        payload = message["payload"]
        start_time = float(message["start_time"])
        event_id = message["event_id"]
        project_id = message["project_id"]
        remote_addr = message.get("remote_addr")

        # check that we haven't already processed this event (a previous instance of the forwarder
        # died before it could commit the event queue offset)
        deduplication_key = "ev:{}:{}".format(project_id, event_id)
        if cache.get(deduplication_key) is not None:
            logger.warning(
                "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.",
                event_id,
                project_id,
            )
            return True  # message already processed do not reprocess

        try:
            project = Project.objects.get_from_cache(id=project_id)
        except Project.DoesNotExist:
            logger.error("Project for ingested event does not exist: %s", project_id)
            return True

        # Parse the JSON payload. This is required to compute the cache key and
        # call process_event. The payload will be put into Kafka raw, to avoid
        # serializing it again.
        # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event
        # which assumes that data passed in is a raw dictionary.
        data = json.loads(payload)

        cache_timeout = 3600
        cache_key = cache_key_for_event(data)
        default_cache.set(cache_key, data, cache_timeout)

        # Preprocess this event, which spawns either process_event or
        # save_event. Pass data explicitly to avoid fetching it again from the
        # cache.
        preprocess_event(
            cache_key=cache_key,
            data=data,
            start_time=start_time,
            event_id=event_id,
            project=project,
        )

        # remember for an 1 hour that we saved this event (deduplication protection)
        cache.set(deduplication_key, "", 3600)

        # emit event_accepted once everything is done
        event_accepted.send_robust(
            ip=remote_addr, data=data, project=project, sender=self.process_message
        )

        # Return *something* so that it counts against batch size
        return True
Exemplo n.º 11
0
def process_attachment_chunk(message, projects):
    payload = message["payload"]
    event_id = message["event_id"]
    project_id = message["project_id"]
    id = message["id"]
    chunk_index = message["chunk_index"]
    cache_key = cache_key_for_event({"event_id": event_id, "project": project_id})
    attachment_cache.set_chunk(
        key=cache_key, id=id, chunk_index=chunk_index, chunk_data=payload, timeout=CACHE_TIMEOUT
    )
Exemplo n.º 12
0
    def event_preprocessor(data):
        extra = data.setdefault("extra", {})
        extra.setdefault("processing_counter", 0)
        extra["processing_counter"] += 1

        cache_key = cache_key_for_event(data)
        attachments = attachment_cache.get(cache_key)
        extra.setdefault("attachments", []).append([attachment.type for attachment in attachments])

        return data
Exemplo n.º 13
0
def process_individual_attachment(message, projects):
    event_id = message["event_id"]
    project_id = int(message["project_id"])
    cache_key = cache_key_for_event({
        "event_id": event_id,
        "project": project_id
    })

    try:
        project = projects[project_id]
    except KeyError:
        logger.error("Project for ingested event does not exist: %s",
                     project_id)
        return

    if not features.has("organizations:event-attachments",
                        project.organization,
                        actor=None):
        logger.info("Organization has no event attachments: %s", project_id)
        return

    # Attachments may be uploaded for events that already exist. Fetch the
    # existing group_id, so that the attachment can be fetched by group-level
    # APIs. This is inherently racy.
    events = eventstore.get_unfetched_events(filter=eventstore.Filter(
        event_ids=[event_id], project_ids=[project.id]),
                                             limit=1)

    group_id = None
    if events:
        group_id = events[0].group_id

    attachment = message["attachment"]
    attachment = attachment_cache.get_from_chunks(
        key=cache_key, type=attachment.pop("attachment_type"), **attachment)
    if attachment.type != "event.attachment":
        logger.exception("invalid individual attachment type: %s",
                         attachment.type)
        return

    save_attachment(
        cache_key,
        attachment,
        project,
        event_id,
        key_id=None,  # TODO: Inject this from Relay
        group_id=group_id,
        start_time=None,  # TODO: Inject this from Relay
    )

    attachment.delete()
Exemplo n.º 14
0
def process_individual_attachment(message, projects):
    event_id = message["event_id"]
    project_id = int(message["project_id"])
    cache_key = cache_key_for_event({
        "event_id": event_id,
        "project": project_id
    })

    try:
        project = projects[project_id]
    except KeyError:
        logger.error("Project for ingested event does not exist: %s",
                     project_id)
        return

    if not features.has("organizations:event-attachments",
                        project.organization,
                        actor=None):
        logger.info("Organization has no event attachments: %s", project_id)
        return

    attachment = message["attachment"]
    attachment = attachment_cache.get_from_chunks(
        key=cache_key, type=attachment.pop("attachment_type"), **attachment)
    assert attachment.type == "event.attachment", attachment.type

    file = File.objects.create(
        name=attachment.name,
        type=attachment.type,
        headers={"Content-Type": attachment.content_type},
    )

    file.putfile(BytesIO(attachment.data))
    EventAttachment.objects.create(project_id=project.id,
                                   event_id=event_id,
                                   name=attachment.name,
                                   file=file)

    attachment.delete()
Exemplo n.º 15
0
 def _dispatch_post_process_group_task(
     self,
     event,
     is_new,
     is_regression,
     is_new_group_environment,
     primary_hash,
     skip_consume=False,
 ):
     if skip_consume:
         logger.info("post_process.skip.raw_event",
                     extra={"event_id": event.event_id})
     else:
         random_val = random.random()
         cache_key = cache_key_for_event({
             "project": event.project_id,
             "event_id": event.event_id
         })
         if options.get("postprocess.use-cache-key") > random_val:
             post_process_group.delay(
                 event=None,
                 is_new=is_new,
                 is_regression=is_regression,
                 is_new_group_environment=is_new_group_environment,
                 primary_hash=primary_hash,
                 cache_key=cache_key,
                 group_id=event.group_id,
             )
         else:
             # Pass the cache key here to ensure that the processing cache is removed.
             post_process_group.delay(
                 event=event,
                 is_new=is_new,
                 is_regression=is_regression,
                 is_new_group_environment=is_new_group_environment,
                 primary_hash=primary_hash,
                 cache_key=cache_key,
             )
Exemplo n.º 16
0
def _do_process_event(message, projects):
    payload = message["payload"]
    start_time = float(message["start_time"])
    event_id = message["event_id"]
    project_id = int(message["project_id"])
    remote_addr = message.get("remote_addr")
    attachments = message.get("attachments") or ()

    # check that we haven't already processed this event (a previous instance of the forwarder
    # died before it could commit the event queue offset)
    #
    # XXX(markus): I believe this code is extremely broken:
    #
    # * it practically uses memcached in prod which has no consistency
    #   guarantees (no idea how we don't run into issues there)
    #
    # * a TTL of 1h basically doesn't guarantee any deduplication at all. It
    #   just guarantees a good error message... for one hour.
    #
    # This code has been ripped from the old python store endpoint. We're
    # keeping it around because it does provide some protection against
    # reprocessing good events if a single consumer is in a restart loop.
    deduplication_key = "ev:{}:{}".format(project_id, event_id)
    if cache.get(deduplication_key) is not None:
        logger.warning(
            "pre-process-forwarder detected a duplicated event"
            " with id:%s for project:%s.",
            event_id,
            project_id,
        )
        return  # message already processed do not reprocess

    try:
        project = projects[project_id]
    except KeyError:
        logger.error("Project for ingested event does not exist: %s",
                     project_id)
        return

    # Parse the JSON payload. This is required to compute the cache key and
    # call process_event. The payload will be put into Kafka raw, to avoid
    # serializing it again.
    # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event
    # which assumes that data passed in is a raw dictionary.
    data = json.loads(payload)

    cache_key = cache_key_for_event(data)
    default_cache.set(cache_key, data, CACHE_TIMEOUT)

    if attachments:
        attachment_objects = [
            CachedAttachment(type=attachment.pop("attachment_type"),
                             **attachment) for attachment in attachments
        ]

        attachment_cache.set(cache_key,
                             attachments=attachment_objects,
                             timeout=CACHE_TIMEOUT)

    # Preprocess this event, which spawns either process_event or
    # save_event. Pass data explicitly to avoid fetching it again from the
    # cache.
    with sentry_sdk.start_span(
            op="ingest_consumer.process_event.preprocess_event"):
        preprocess_event(
            cache_key=cache_key,
            data=data,
            start_time=start_time,
            event_id=event_id,
            project=project,
        )

    # remember for an 1 hour that we saved this event (deduplication protection)
    cache.set(deduplication_key, "", CACHE_TIMEOUT)

    # emit event_accepted once everything is done
    event_accepted.send_robust(ip=remote_addr,
                               data=data,
                               project=project,
                               sender=process_event)
Exemplo n.º 17
0
def process_event(message, projects):
    payload = message["payload"]
    start_time = float(message["start_time"])
    event_id = message["event_id"]
    project_id = int(message["project_id"])
    remote_addr = message.get("remote_addr")
    attachments = message.get("attachments") or ()

    # check that we haven't already processed this event (a previous instance of the forwarder
    # died before it could commit the event queue offset)
    deduplication_key = "ev:{}:{}".format(project_id, event_id)
    if cache.get(deduplication_key) is not None:
        logger.warning(
            "pre-process-forwarder detected a duplicated event"
            " with id:%s for project:%s.",
            event_id,
            project_id,
        )
        return  # message already processed do not reprocess

    try:
        project = projects[project_id]
    except KeyError:
        logger.error("Project for ingested event does not exist: %s",
                     project_id)
        return

    # Parse the JSON payload. This is required to compute the cache key and
    # call process_event. The payload will be put into Kafka raw, to avoid
    # serializing it again.
    # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event
    # which assumes that data passed in is a raw dictionary.
    data = json.loads(payload)

    cache_key = cache_key_for_event(data)
    default_cache.set(cache_key, data, CACHE_TIMEOUT)

    if attachments:
        attachment_objects = [
            CachedAttachment(type=attachment.pop("attachment_type"),
                             **attachment) for attachment in attachments
        ]

        attachment_cache.set(cache_key,
                             attachments=attachment_objects,
                             timeout=CACHE_TIMEOUT)

    # Preprocess this event, which spawns either process_event or
    # save_event. Pass data explicitly to avoid fetching it again from the
    # cache.
    preprocess_event(cache_key=cache_key,
                     data=data,
                     start_time=start_time,
                     event_id=event_id,
                     project=project)

    # remember for an 1 hour that we saved this event (deduplication protection)
    cache.set(deduplication_key, "", CACHE_TIMEOUT)

    # emit event_accepted once everything is done
    event_accepted.send_robust(ip=remote_addr,
                               data=data,
                               project=project,
                               sender=process_event)
Exemplo n.º 18
0
 def store(self, event: Event, unprocessed: bool = False) -> str:
     key = cache_key_for_event(event)
     if unprocessed:
         key = self.__get_unprocessed_key(key)
     self.inner.set(key, event, self.timeout)
     return key
Exemplo n.º 19
0
 def _key_for_event(self, event):
     return cache_key_for_event(event)
Exemplo n.º 20
0
 def delete(self, event: Event) -> None:
     key = cache_key_for_event(event)
     self.delete_by_key(key)
Exemplo n.º 21
0
 def store(self, event, unprocessed=False):
     key = cache_key_for_event(event)
     if unprocessed:
         key = _get_unprocessed_key(key)
     self.inner.set(key, event, self.timeout)
     return key
Exemplo n.º 22
0
 def delete(self, event):
     key = cache_key_for_event(event)
     self.delete_by_key(key)
Exemplo n.º 23
0
def get_event_attachment(data, attachment_type):
    cache_key = cache_key_for_event(data)
    attachments = attachment_cache.get(cache_key) or []
    return next((a for a in attachments if a.type == attachment_type), None)
Exemplo n.º 24
0
 def delete(self, event, unprocessed=False):
     key = cache_key_for_event(event)
     if unprocessed:
         key = _get_unprocessed_key(key)
     self.delete_by_key(key)