示例#1
0
def create_failed_event(cache_key,
                        project_id,
                        issues,
                        event_id,
                        start_time=None,
                        reprocessing_rev=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    reprocessing_active = ProjectOption.objects.get_value(
        project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT)

    # In case there is reprocessing active but the current reprocessing
    # revision is already different than when we started, we want to
    # immediately retry the event.  This resolves the problem when
    # otherwise a concurrent change of debug symbols might leave a
    # reprocessing issue stuck in the project forever.
    if reprocessing_active and \
       reprocessing.get_reprocessing_revision(project_id, cached=False) != \
       reprocessing_rev:
        raise RetryProcessing()

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, 'sentry:sent_failed_event_hint', False)
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={
                'reprocessing_active': reprocessing_active,
                'issues': issues
            },
        ).send_notification()
        ProjectOption.objects.set_value(project,
                                        'sentry:sent_failed_event_hint', True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'raw'
                     },
                     skip_internal=False)
        error_logger.error('process.failed_raw.empty',
                           extra={'cache_key': cache_key})
        return True

    data = CanonicalKeyDict(data)
    from sentry.models import RawEvent, ProcessingIssue
    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(
            data['timestamp']).replace(tzinfo=timezone.utc),
        data=data)

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue['scope'],
            object=issue['object'],
            type=issue['type'],
            data=issue['data'],
        )

    default_cache.delete(cache_key)

    return True
def _do_process_event(cache_key, start_time, event_id, process_task):
    from sentry.plugins import plugins

    data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'process'
                     })
        error_logger.error('process.failed.empty',
                           extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project = data['project']
    Raven.tags_context({
        'project': project,
    })
    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project)

    # Stacktrace based event processors.  These run before anything else.
    new_data = process_stacktraces(data)
    if new_data is not None:
        has_changed = True
        data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(plugin.get_event_preprocessors,
                                  data=data,
                                  _with_transaction=False)
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data[
        'project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                    cache_key,
                    project,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            process_task.delay(cache_key,
                               start_time=start_time,
                               event_id=event_id)
            return

        # We cannot persist canonical types in the cache, so we need to
        # downgrade this.
        if isinstance(data, CANONICAL_TYPES):
            data = dict(data.items())
        default_cache.set(cache_key, data, 3600)

    save_event.delay(cache_key=cache_key,
                     data=None,
                     start_time=start_time,
                     event_id=event_id,
                     project_id=project)
示例#3
0
def _do_process_event(cache_key,
                      start_time,
                      event_id,
                      process_task,
                      data=None):
    from sentry.plugins import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'process'
                     },
                     skip_internal=False)
        error_logger.error('process.failed.empty',
                           extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project_id = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    # Event enhancers.  These run before anything else.
    for plugin in plugins.all(version=2):
        enhancers = safe_execute(plugin.get_event_enhancers, data=data)
        for enhancer in (enhancers or ()):
            enhanced = safe_execute(enhancer, data)
            if enhanced:
                data = enhanced
                has_changed = True

    try:
        # Stacktrace based event processors.
        new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (time() - start_time) > 3600:
            raise RuntimeError('Event spent one hour in processing')

        retry_process_event.apply_async(args=(),
                                        kwargs={
                                            'process_task_name':
                                            process_task.__name__,
                                            'task_kwargs': {
                                                'cache_key': cache_key,
                                                'event_id': event_id,
                                                'start_time': start_time,
                                            }
                                        },
                                        countdown=e.retry_after)
        return

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(plugin.get_event_preprocessors,
                                  data=data,
                                  _with_transaction=False)
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data[
        'project'] == project_id, 'Project cannot be mutated by preprocessor'
    project = Project.objects.get_from_cache(id=project_id)

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                    cache_key,
                    project_id,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            from_reprocessing = process_task is process_event_from_reprocessing
            submit_process(project, from_reprocessing, cache_key, event_id,
                           start_time, data)
            process_task.delay(cache_key,
                               start_time=start_time,
                               event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)
示例#4
0
def _do_process_event(
    cache_key,
    start_time,
    event_id,
    process_task,
    data=None,
    data_has_changed=None,
    from_symbolicate=False,
):
    from sentry.plugins.base import plugins

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "process"
                     },
                     skip_internal=False)
        error_logger.error("process.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_event_project(project_id)

    event_id = data["event_id"]

    if killswitch_matches_context(
            "store.load-shed-process-event-projects",
        {
            "project_id": project_id,
            "event_id": event_id,
            "platform": data.get("platform") or "null",
        },
    ):
        return

    with sentry_sdk.start_span(
            op="tasks.store.process_event.get_project_from_cache"):
        project = Project.objects.get_from_cache(id=project_id)

    with metrics.timer(
            "tasks.store.process_event.organization.get_from_cache"):
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

    has_changed = bool(data_has_changed)

    with sentry_sdk.start_span(
            op="tasks.store.process_event.get_reprocessing_revision"):
        # Fetch the reprocessing revision
        reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    # Stacktrace based event processors.
    with sentry_sdk.start_span(op="task.store.process_event.stacktraces"):
        with metrics.timer("tasks.store.process_event.stacktraces",
                           tags={"from_symbolicate": from_symbolicate}):
            new_data = process_stacktraces(data)

    if new_data is not None:
        has_changed = True
        data = new_data

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # *Right now* the only sensitive data that is added in stacktrace
    # processing are usernames in filepaths, so we run directly after
    # stacktrace processors.
    #
    # We do not yet want to deal with context data produced by plugins like
    # sessionstack or fullstory (which are in `get_event_preprocessors`), as
    # this data is very unlikely to be sensitive data. This is why scrubbing
    # happens somewhere in the middle of the pipeline.
    #
    # On the other hand, Javascript event error translation is happening after
    # this block because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`.
    #
    # We are fairly confident, however, that this should run *before*
    # re-normalization as it is hard to find sensitive data in partially
    # trimmed strings.
    if has_changed and options.get("processing.can-use-scrubbers"):
        with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"):
            with metrics.timer("tasks.store.datascrubbers.scrub",
                               tags={"from_symbolicate": from_symbolicate}):
                new_data = safe_execute(scrub_data,
                                        project=project,
                                        event=data.data)

                # XXX(markus): When datascrubbing is finally "totally stable", we might want
                # to drop the event if it crashes to avoid saving PII
                if new_data is not None:
                    data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        with sentry_sdk.start_span(
                op="task.store.process_event.preprocessors") as span:
            span.set_data("plugin", plugin.slug)
            span.set_data("from_symbolicate", from_symbolicate)
            with metrics.timer(
                    "tasks.store.process_event.preprocessors",
                    tags={
                        "plugin": plugin.slug,
                        "from_symbolicate": from_symbolicate
                    },
            ):
                processors = safe_execute(plugin.get_event_preprocessors,
                                          data=data,
                                          _with_transaction=False)
                for processor in processors or ():
                    try:
                        result = processor(data)
                    except Exception:
                        error_logger.exception(
                            "tasks.store.preprocessors.error")
                        data.setdefault("_metrics",
                                        {})["flag.processing.error"] = True
                        has_changed = True
                    else:
                        if result:
                            data = result
                            has_changed = True

    assert data[
        "project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(remove_other=False,
                                     is_renormalize=True,
                                     **DEFAULT_STORE_NORMALIZER_ARGS)
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                    cache_key,
                    data,
                    project_id,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke ourselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            _do_preprocess_event(cache_key, data, start_time, event_id,
                                 process_task, project)
            return

        cache_key = event_processing_store.store(data)

    from_reprocessing = process_task is process_event_from_reprocessing
    submit_save_event(project, from_reprocessing, cache_key, event_id,
                      start_time, data)
示例#5
0
def create_failed_event(cache_key,
                        data,
                        project_id,
                        issues,
                        event_id,
                        start_time=None,
                        reprocessing_rev=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    set_current_event_project(project_id)

    # We can only create failed events for events that can potentially
    # create failed events.
    if not reprocessing.event_supports_reprocessing(data):
        return False

    # If this event has just been reprocessed with reprocessing-v2, we don't
    # put it through reprocessing-v1 again. The value of reprocessing-v2 is
    # partially that one sees the entire event even in its failed state, all
    # the time.
    if reprocessing2.is_reprocessed_event(data):
        return False

    reprocessing_active = ProjectOption.objects.get_value(
        project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT)

    # In case there is reprocessing active but the current reprocessing
    # revision is already different than when we started, we want to
    # immediately retry the event.  This resolves the problem when
    # otherwise a concurrent change of debug symbols might leave a
    # reprocessing issue stuck in the project forever.
    if (reprocessing_active and reprocessing.get_reprocessing_revision(
            project_id, cached=False) != reprocessing_rev):
        raise RetryProcessing()

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, "sentry:sent_failed_event_hint", False)
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={
                "reprocessing_active": reprocessing_active,
                "issues": issues
            },
        ).send_notification()
        ProjectOption.objects.set_value(project,
                                        "sentry:sent_failed_event_hint", True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "raw"
                     },
                     skip_internal=False)
        error_logger.error("process.failed_raw.empty",
                           extra={"cache_key": cache_key})
        return True

    data = CanonicalKeyDict(data)
    from sentry.models import ProcessingIssue, RawEvent

    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(
            data["timestamp"]).replace(tzinfo=timezone.utc),
        data=data,
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue["scope"],
            object=issue["object"],
            type=issue["type"],
            data=issue["data"],
        )

    event_processing_store.delete_by_key(cache_key)

    return True
示例#6
0
文件: store.py 项目: yaoqi/sentry
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None,
                        reprocessing_rev=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    reprocessing_active = ProjectOption.objects.get_value(
        project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT
    )

    # In case there is reprocessing active but the current reprocessing
    # revision is already different than when we started, we want to
    # immediately retry the event.  This resolves the problem when
    # otherwise a concurrent change of debug symbols might leave a
    # reprocessing issue stuck in the project forever.
    if reprocessing_active and \
       reprocessing.get_reprocessing_revision(project_id, cached=False) != \
       reprocessing_rev:
        raise RetryProcessing()

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, 'sentry:sent_failed_event_hint', False
    )
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={'reprocessing_active': reprocessing_active,
                  'issues': issues},
        ).send_notification()
        ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'}, skip_internal=False)
        error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key})
        return True

    data = CanonicalKeyDict(data)
    from sentry.models import RawEvent, ProcessingIssue
    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(data['timestamp']).replace(tzinfo=timezone.utc),
        data=data
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue['scope'],
            object=issue['object'],
            type=issue['type'],
            data=issue['data'],
        )

    default_cache.delete(cache_key)

    return True
示例#7
0
def _do_process_event(cache_key,
                      start_time,
                      event_id,
                      process_task,
                      data=None):
    from sentry.plugins.base import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "process"
                     },
                     skip_internal=False)
        error_logger.error("process.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)
    project_id = data["project"]
    event_id = data["event_id"]

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    try:
        # Event enhancers.  These run before anything else.
        for plugin in plugins.all(version=2):
            enhancers = safe_execute(plugin.get_event_enhancers, data=data)
            for enhancer in enhancers or ():
                enhanced = safe_execute(
                    enhancer, data, _passthrough_errors=(RetrySymbolication, ))
                if enhanced:
                    data = enhanced
                    has_changed = True

        # Stacktrace based event processors.
        new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (time() - start_time) > 120:
            error_logger.warning("process.slow",
                                 extra={
                                     "project_id": project_id,
                                     "event_id": event_id
                                 })

        if start_time and (time() - start_time) > 3600:
            # Do not drop event but actually continue with rest of pipeline
            # (persisting unsymbolicated event)
            error_logger.exception(
                "process.failed.infinite_retry",
                extra={
                    "project_id": project_id,
                    "event_id": event_id
                },
            )
        else:
            retry_process_event.apply_async(
                args=(),
                kwargs={
                    "process_task_name": process_task.__name__,
                    "task_kwargs": {
                        "cache_key": cache_key,
                        "event_id": event_id,
                        "start_time": start_time,
                    },
                },
                countdown=e.retry_after,
            )
            return

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(plugin.get_event_preprocessors,
                                  data=data,
                                  _with_transaction=False)
        for processor in processors or ():
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data[
        "project"] == project_id, "Project cannot be mutated by preprocessor"
    project = Project.objects.get_from_cache(id=project_id)

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(remove_other=False,
                                     is_renormalize=True,
                                     **DEFAULT_STORE_NORMALIZER_ARGS)
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                    cache_key,
                    data,
                    project_id,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            from_reprocessing = process_task is process_event_from_reprocessing
            submit_process(project, from_reprocessing, cache_key, event_id,
                           start_time, data)
            process_task.delay(cache_key,
                               start_time=start_time,
                               event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)
示例#8
0
文件: store.py 项目: yaoqi/sentry
def _do_process_event(cache_key, start_time, event_id, process_task,
                      data=None):
    from sentry.plugins import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'process'},
            skip_internal=False)
        error_logger.error('process.failed.empty', extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project_id = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    try:
        # Event enhancers.  These run before anything else.
        for plugin in plugins.all(version=2):
            enhancers = safe_execute(plugin.get_event_enhancers, data=data)
            for enhancer in (enhancers or ()):
                enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,))
                if enhanced:
                    data = enhanced
                    has_changed = True

        # Stacktrace based event processors.
        new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (time() - start_time) > 3600:
            raise RuntimeError('Event spent one hour in processing')

        retry_process_event.apply_async(
            args=(),
            kwargs={
                'process_task_name': process_task.__name__,
                'task_kwargs': {
                    'cache_key': cache_key,
                    'event_id': event_id,
                    'start_time': start_time,
                }
            },
            countdown=e.retry_after
        )
        return

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project_id, 'Project cannot be mutated by preprocessor'
    project = Project.objects.get_from_cache(id=project_id)

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                cache_key, project_id, list(issues.values()),
                event_id=event_id, start_time=start_time,
                reprocessing_rev=reprocessing_rev
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            from_reprocessing = process_task is process_event_from_reprocessing
            submit_process(project, from_reprocessing, cache_key, event_id, start_time, data)
            process_task.delay(cache_key, start_time=start_time,
                               event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)
示例#9
0
文件: store.py 项目: y1024/sentry
def _do_process_event(cache_key, start_time, event_id, process_task, data=None):
    from sentry.plugins.base import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr(
            "events.failed", tags={"reason": "cache", "stage": "process"}, skip_internal=False
        )
        error_logger.error("process.failed.empty", extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    event_id = data["event_id"]

    project = Project.objects.get_from_cache(id=project_id)

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    try:
        # Event enhancers.  These run before anything else.
        for plugin in plugins.all(version=2):
            enhancers = safe_execute(plugin.get_event_enhancers, data=data)
            for enhancer in enhancers or ():
                enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,))
                if enhanced:
                    data = enhanced
                    has_changed = True

        # Stacktrace based event processors.
        new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
            error_logger.warning(
                "process.slow", extra={"project_id": project_id, "event_id": event_id}
            )

        if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
            # Do not drop event but actually continue with rest of pipeline
            # (persisting unsymbolicated event)
            error_logger.exception(
                "process.failed.infinite_retry",
                extra={"project_id": project_id, "event_id": event_id},
            )
        else:
            retry_process_event.apply_async(
                args=(),
                kwargs={
                    "process_task_name": process_task.__name__,
                    "task_kwargs": {
                        "cache_key": cache_key,
                        "event_id": event_id,
                        "start_time": start_time,
                    },
                },
                countdown=e.retry_after,
            )
            return

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # We assume that all potential PII is produced as part of stacktrace
    # processors and event enhancers.
    #
    # We assume that plugins for eg sessionstack (running via
    # `plugin.get_event_preprocessors`) are not producing data that should be
    # PII-stripped, ever.
    #
    # XXX(markus): Javascript event error translation is happening after this block
    # because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`, possibly move?
    if has_changed and features.has(
        "organizations:datascrubbers-v2", project.organization, actor=None
    ):
        with metrics.timer("tasks.store.datascrubbers.scrub"):
            project_config = get_project_config(project)

            new_data = safe_execute(scrub_data, project_config=project_config, event=data.data)

            # XXX(markus): When datascrubbing is finally "totally stable", we might want
            # to drop the event if it crashes to avoid saving PII
            if new_data is not None:
                data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in processors or ():
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data["project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(
            remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS
        )
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                cache_key,
                data,
                project_id,
                list(issues.values()),
                event_id=event_id,
                start_time=start_time,
                reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            from_reprocessing = process_task is process_event_from_reprocessing
            submit_process(project, from_reprocessing, cache_key, event_id, start_time, data)
            process_task.delay(cache_key, start_time=start_time, event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)
示例#10
0
def _do_process_event(cache_key, start_time, event_id, process_task):
    from sentry.plugins import plugins

    data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'})
        error_logger.error('process.failed.empty', extra={'cache_key': cache_key})
        return

    project = data['project']
    Raven.tags_context({
        'project': project,
    })
    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project)

    # Stacktrace based event processors.  These run before anything else.
    new_data = process_stacktraces(data)
    if new_data is not None:
        has_changed = True
        data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                cache_key, project, list(issues.values()),
                event_id=event_id, start_time=start_time,
                reprocessing_rev=reprocessing_rev
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            process_task.delay(cache_key, start_time=start_time,
                               event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    save_event.delay(
        cache_key=cache_key, data=None, start_time=start_time, event_id=event_id,
        project_id=project
    )
示例#11
0
文件: store.py 项目: Kayle009/sentry
def _do_process_event(cache_key, start_time, event_id, process_task):
    from sentry.plugins import plugins

    data = default_cache.get(cache_key)

    if data is None:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'process'},
            skip_internal=False)
        error_logger.error('process.failed.empty', extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project)

    # Event enhancers.  These run before anything else.
    for plugin in plugins.all(version=2):
        enhancers = safe_execute(plugin.get_event_enhancers, data=data)
        for enhancer in (enhancers or ()):
            enhanced = safe_execute(enhancer, data)
            if enhanced:
                data = enhanced
                has_changed = True

    # Stacktrace based event processors.
    new_data = process_stacktraces(data)
    if new_data is not None:
        has_changed = True
        data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                cache_key, project, list(issues.values()),
                event_id=event_id, start_time=start_time,
                reprocessing_rev=reprocessing_rev
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            process_task.delay(cache_key, start_time=start_time,
                               event_id=event_id)
            return

        # We cannot persist canonical types in the cache, so we need to
        # downgrade this.
        if isinstance(data, CANONICAL_TYPES):
            data = dict(data.items())
        default_cache.set(cache_key, data, 3600)

    save_event.delay(
        cache_key=cache_key, data=None, start_time=start_time, event_id=event_id,
        project_id=project
    )
示例#12
0
def _do_process_event(
    cache_key,
    start_time,
    event_id,
    process_task,
    data=None,
    data_has_changed=None,
    new_process_behavior=None,
):
    from sentry.plugins.base import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "process"
                     },
                     skip_internal=False)
        error_logger.error("process.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_project(project_id)

    event_id = data["event_id"]

    project = Project.objects.get_from_cache(id=project_id)

    has_changed = bool(data_has_changed)
    new_process_behavior = bool(new_process_behavior)

    metrics.incr("tasks.store.process_event.new_process_behavior",
                 tags={"value": new_process_behavior})

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    try:
        if not new_process_behavior:
            # Event enhancers.  These run before anything else.
            for plugin in plugins.all(version=2):
                with metrics.timer("tasks.store.process_event.enhancers",
                                   tags={"plugin": plugin.slug}):
                    enhancers = safe_execute(plugin.get_event_enhancers,
                                             data=data)
                    for enhancer in enhancers or ():
                        enhanced = safe_execute(
                            enhancer,
                            data,
                            _passthrough_errors=(RetrySymbolication, ))
                        if enhanced:
                            data = enhanced
                            has_changed = True

        # Stacktrace based event processors.
        with metrics.timer("tasks.store.process_event.stacktraces"):
            new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (
                time() -
                start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
            error_logger.warning("process.slow",
                                 extra={
                                     "project_id": project_id,
                                     "event_id": event_id
                                 })

        if start_time and (
                time() -
                start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
            # Do not drop event but actually continue with rest of pipeline
            # (persisting unsymbolicated event)
            error_logger.exception(
                "process.failed.infinite_retry",
                extra={
                    "project_id": project_id,
                    "event_id": event_id
                },
            )
        else:
            retry_process_event.apply_async(
                args=(),
                kwargs={
                    "process_task_name": process_task.__name__,
                    "task_kwargs": {
                        "cache_key": cache_key,
                        "event_id": event_id,
                        "start_time": start_time,
                    },
                },
                countdown=e.retry_after,
            )
            return

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # *Right now* the only sensitive data that is added in stacktrace
    # processing are usernames in filepaths, so we run directly after
    # stacktrace processors and `get_event_enhancers`.
    #
    # We do not yet want to deal with context data produced by plugins like
    # sessionstack or fullstory (which are in `get_event_preprocessors`), as
    # this data is very unlikely to be sensitive data. This is why scrubbing
    # happens somewhere in the middle of the pipeline.
    #
    # On the other hand, Javascript event error translation is happening after
    # this block because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`.
    #
    # We are fairly confident, however, that this should run *before*
    # re-normalization as it is hard to find sensitive data in partially
    # trimmed strings.
    if (has_changed and options.get("processing.can-use-scrubbers")
            and features.has("organizations:datascrubbers-v2",
                             project.organization,
                             actor=None)):
        with metrics.timer("tasks.store.datascrubbers.scrub"):
            project_config = get_project_config(project)

            new_data = safe_execute(scrub_data,
                                    project_config=project_config,
                                    event=data.data)

            # XXX(markus): When datascrubbing is finally "totally stable", we might want
            # to drop the event if it crashes to avoid saving PII
            if new_data is not None:
                data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        with metrics.timer("tasks.store.process_event.preprocessors",
                           tags={"plugin": plugin.slug}):
            processors = safe_execute(plugin.get_event_preprocessors,
                                      data=data,
                                      _with_transaction=False)
            for processor in processors or ():
                result = safe_execute(processor, data)
                if result:
                    data = result
                    has_changed = True

    assert data[
        "project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(remove_other=False,
                                     is_renormalize=True,
                                     **DEFAULT_STORE_NORMALIZER_ARGS)
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                    cache_key,
                    data,
                    project_id,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            _do_preprocess_event(cache_key, data, start_time, event_id,
                                 process_task, project)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)