Exemplo n.º 1
0
    def _update_cachefiles(self, project, debug_files, cls):
        rv = []
        conversion_errors = {}

        for debug_file in debug_files:
            debug_id = debug_file.debug_id

            # Find all the known bad files we could not convert last time. We
            # use the debug identifier and file checksum to identify the source
            # DIF for historic reasons (debug_file.id would do, too).
            cache_key = 'scbe:%s:%s' % (debug_id, debug_file.file.checksum)
            err = default_cache.get(cache_key)
            if err is not None:
                conversion_errors[debug_id] = err
                continue

            # Download the original debug symbol and convert the object file to
            # a cache. This can either yield a cache object, an error or none of
            # the above. THE FILE DOWNLOAD CAN TAKE SIGNIFICANT TIME.
            with debug_file.file.getfile(as_tempfile=True) as tf:
                file, cache, err = self._update_cachefile(debug_file, tf.name, cls)

            # Store this conversion error so that we can skip subsequent
            # conversions. There might be concurrent conversions running for the
            # same debug file, however.
            if err is not None:
                default_cache.set(cache_key, err, CONVERSION_ERROR_TTL)
                conversion_errors[debug_id] = err
                continue

            if file is not None or cache is not None:
                rv.append((debug_id, file, cache))

        return rv, conversion_errors
Exemplo n.º 2
0
def preprocess_event(cache_key=None, data=None, start_time=None, **kwargs):
    from sentry.plugins import plugins

    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'})
        error_logger.error('preprocess.failed.empty', extra={'cache_key': cache_key})
        return

    project = data['project']
    Raven.tags_context({
        'project': project,
    })

    # Iterate over all plugins looking for processors based on the input data
    # plugins should yield a processor function only if it actually can operate
    # on the input data, otherwise it should yield nothing
    for plugin in plugins.all(version=2):
        processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False)
        for processor in (processors or ()):
            # On the first processor found, we just defer to the process_event
            # queue to handle the actual work.
            process_event.delay(cache_key=cache_key, start_time=start_time)
            return

    # If we get here, that means the event had no preprocessing needed to be done
    # so we can jump directly to save_event
    if cache_key:
        data = None
    save_event.delay(cache_key=cache_key, data=data, start_time=start_time)
Exemplo n.º 3
0
    def get(self, request, wizard_hash=None):
        """
        This tries to retrieve and return the cache content if possible
        otherwise creates new cache
        """
        if wizard_hash is not None:
            key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash)
            wizard_data = default_cache.get(key)

            if wizard_data is None:
                return Response(status=404)
            elif wizard_data == 'empty':
                # when we just created a clean cache
                return Response(status=400)

            return Response(serialize(wizard_data))
        else:
            # This creates a new available hash url for the project wizard
            rate_limited = ratelimits.is_limited(
                key='rl:setup-wizard:ip:%s' % request.META['REMOTE_ADDR'],
                limit=10,
            )
            if rate_limited:
                logger.info('setup-wizard.rate-limit')
                return Response(
                    {
                        'Too wizard requests',
                    }, status=403
                )
            wizard_hash = get_random_string(
                64, allowed_chars='abcdefghijklmnopqrstuvwxyz012345679')

            key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash)
            default_cache.set(key, 'empty', SETUP_WIZARD_CACHE_TIMEOUT)
            return Response(serialize({'hash': wizard_hash}))
Exemplo n.º 4
0
def create_failed_event(cache_key, project, issues, event_id):
    """If processing failed we put the original data from the cache into a
    raw event.
    """
    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'})
        error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key})
        return

    from sentry.models import RawEvent, ProcessingIssue
    raw_event = RawEvent.objects.create(
        project_id=project,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(
            data['timestamp']).replace(tzinfo=timezone.utc),
        data=data
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue['scope'],
            object=issue['object'],
            type=issue['type'],
            data=issue['data'],
        )

    default_cache.delete(cache_key)
Exemplo n.º 5
0
def process_event(cache_key, start_time=None, **kwargs):
    from sentry.plugins import plugins

    data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'})
        error_logger.error('process.failed.empty', extra={'cache_key': cache_key})
        return

    project = data['project']
    Raven.tags_context({
        'project': project,
    })

    # TODO(dcramer): ideally we would know if data changed by default
    has_changed = False
    for plugin in plugins.all(version=2):
        processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False)
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed:
        default_cache.set(cache_key, data, 3600)

    save_event.delay(cache_key=cache_key, data=None, start_time=start_time)
Exemplo n.º 6
0
def _do_preprocess_event(cache_key, data, start_time, event_id,
                         process_event):
    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'})
        error_logger.error('preprocess.failed.empty',
            extra={'cache_key': cache_key})
        return

    project = data['project']
    Raven.tags_context({
        'project': project,
    })

    if should_process(data):
        process_event.delay(cache_key=cache_key, start_time=start_time,
            event_id=event_id)
        return

    # If we get here, that means the event had no preprocessing needed to be done
    # so we can jump directly to save_event
    if cache_key:
        data = None
    save_event.delay(cache_key=cache_key, data=data, start_time=start_time,
        event_id=event_id)
Exemplo n.º 7
0
def save_event(cache_key=None, data=None, start_time=None, event_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import EventManager

    if cache_key:
        data = default_cache.get(cache_key)

    if event_id is None and data is not None:
        event_id = data['event_id']

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'})
        return

    project = data.pop('project')

    delete_raw_event(project, event_id)

    Raven.tags_context({
        'project': project,
    })

    try:
        manager = EventManager(data)
        manager.save(project)
    finally:
        if cache_key:
            default_cache.delete(cache_key)
        if start_time:
            metrics.timing('events.time-to-process', time() - start_time,
                           instance=data['platform'])
Exemplo n.º 8
0
def preprocess_event(cache_key=None, data=None, start_time=None, **kwargs):
    from sentry.plugins import plugins

    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'})
        logger.error('Data not available in preprocess_event (cache_key=%s)', cache_key)
        return

    project = data['project']

    # TODO(dcramer): ideally we would know if data changed by default
    has_changed = False
    for plugin in plugins.all(version=2):
        for processor in (safe_execute(plugin.get_event_preprocessors) or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed and cache_key:
        default_cache.set(cache_key, data, 3600)

    if cache_key:
        data = None
    save_event.delay(cache_key=cache_key, data=data, start_time=start_time)
Exemplo n.º 9
0
    def _update_cachefiles(self, project, debug_files):
        rv = []

        # Find all the known bad files we could not convert last time
        # around
        conversion_errors = {}
        for debug_file in debug_files:
            cache_key = 'scbe:%s:%s' % (debug_file.debug_id, debug_file.file.checksum)
            err = default_cache.get(cache_key)
            if err is not None:
                conversion_errors[debug_file.debug_id] = err

        for debug_file in debug_files:
            dsym_id = debug_file.debug_id
            if dsym_id in conversion_errors:
                continue

            with debug_file.file.getfile(as_tempfile=True) as tf:
                symcache_file, conversion_error = self._update_cachefile(
                    debug_file, tf)
            if symcache_file is not None:
                rv.append((dsym_id, symcache_file))
            elif conversion_error is not None:
                conversion_errors[dsym_id] = conversion_error

        return rv, conversion_errors
Exemplo n.º 10
0
def _do_preprocess_event(cache_key, data, start_time, event_id, process_event):
    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'}, skip_internal=False)
        error_logger.error('preprocess.failed.empty', extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project)

    if should_process(data):
        process_event.delay(cache_key=cache_key, start_time=start_time, event_id=event_id)
        return

    # If we get here, that means the event had no preprocessing needed to be done
    # so we can jump directly to save_event
    if cache_key:
        data = None
    save_event.delay(
        cache_key=cache_key, data=data, start_time=start_time, event_id=event_id,
        project_id=project
    )
Exemplo n.º 11
0
def reprocess_minidump(data):
    project = Project.objects.get_from_cache(id=data['project'])

    minidump_is_reprocessed_cache_key = minidump_reprocessed_cache_key_for_event(data)
    if default_cache.get(minidump_is_reprocessed_cache_key):
        return

    if not _is_symbolicator_enabled(project, data):
        rv = reprocess_minidump_with_cfi(data)
        default_cache.set(minidump_is_reprocessed_cache_key, True, 3600)
        return rv

    minidump = get_attached_minidump(data)

    if not minidump:
        logger.error("Missing minidump for minidump event")
        return

    request_id_cache_key = request_id_cache_key_for_event(data)

    response = run_symbolicator(
        project=project,
        request_id_cache_key=request_id_cache_key,
        create_task=create_minidump_task,
        minidump=make_buffered_slice_reader(minidump.data, None)
    )

    if handle_symbolicator_response_status(data, response):
        merge_symbolicator_minidump_response(data, response)

    event_cache_key = cache_key_for_event(data)
    default_cache.set(event_cache_key, dict(data), 3600)
    default_cache.set(minidump_is_reprocessed_cache_key, True, 3600)

    return data
Exemplo n.º 12
0
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    reprocessing_active = ProjectOption.objects.get_value(
        project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT
    )

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, 'sentry:sent_failed_event_hint', False
    )
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={'reprocessing_active': reprocessing_active,
                  'issues': issues},
        ).send_notification()
        ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'})
        error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key})
        return True

    from sentry.models import RawEvent, ProcessingIssue
    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(data['timestamp']).replace(tzinfo=timezone.utc),
        data=data
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue['scope'],
            object=issue['object'],
            type=issue['type'],
            data=issue['data'],
        )

    default_cache.delete(cache_key)

    return True
Exemplo n.º 13
0
def get_assemble_status(project, checksum):
    """For a given file it checks what the current status of the assembling is.
    Returns a tuple in the form ``(status, details)`` where details is either
    `None` or a string identifying an error condition or notice.
    """
    cache_key = 'assemble-status:%s' % _get_idempotency_id(
        project, checksum)
    rv = default_cache.get(cache_key)
    if rv is None:
        return None, None
    return tuple(rv)
Exemplo n.º 14
0
def public_dsn():
    project_id = settings.SENTRY_FRONTEND_PROJECT or settings.SENTRY_PROJECT
    cache_key = 'dsn:%s' % (project_id, )

    result = default_cache.get(cache_key)
    if result is None:
        key = _get_project_key(project_id)
        if key:
            result = key.dsn_public
        else:
            result = ''
        default_cache.set(cache_key, result, 60)
    return result
Exemplo n.º 15
0
def _do_process_event(cache_key, start_time, event_id):
    from sentry.plugins import plugins

    data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'})
        error_logger.error('process.failed.empty',
            extra={'cache_key': cache_key})
        return

    project = data['project']
    Raven.tags_context({
        'project': project,
    })
    has_changed = False

    # Stacktrace based event processors.  These run before anything else.
    new_data = process_stacktraces(data)
    if new_data is not None:
        has_changed = True
        data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(plugin.get_event_preprocessors,
                                  data=data, _with_transaction=False)
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed:
        issues = data.get('processing_issues')
        if issues:
            create_failed_event(cache_key, project, list(issues.values()),
                event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    save_event.delay(cache_key=cache_key, data=None, start_time=start_time,
        event_id=event_id)
Exemplo n.º 16
0
def save_event(cache_key=None, data=None, start_time=None, event_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import tsdb

    if cache_key:
        data = default_cache.get(cache_key)

    if event_id is None and data is not None:
        event_id = data['event_id']

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'})
        return

    project = data.pop('project')

    delete_raw_event(project, event_id, allow_hint_clear=True)

    Raven.tags_context({
        'project': project,
    })

    try:
        manager = EventManager(data)
        manager.save(project)
    except HashDiscarded as exc:
        # TODO(jess): remove this before it goes out to a wider audience
        info_logger.info(
            'discarded.hash', extra={
                'project_id': project,
                'description': exc.message,
            }
        )
        tsdb.incr(tsdb.models.project_total_received_discarded, project, timestamp=start_time)
    finally:
        if cache_key:
            default_cache.delete(cache_key)
        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
Exemplo n.º 17
0
def save_event(cache_key=None, data=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import EventManager

    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        return

    project = data.pop('project')

    try:
        manager = EventManager(data)
        manager.save(project)
    finally:
        if cache_key:
            default_cache.delete(cache_key)
Exemplo n.º 18
0
def save_event(cache_key=None, data=None, start_time=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import EventManager

    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        return

    project = data.pop('project')

    try:
        manager = EventManager(data)
        manager.save(project)
    finally:
        if cache_key:
            default_cache.delete(cache_key)
        if start_time:
            metrics.timing('events.time-to-process', time() - start_time)
Exemplo n.º 19
0
def _do_preprocess_event(cache_key, data, start_time, event_id, process_task):
    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'}, skip_internal=False)
        error_logger.error('preprocess.failed.empty', extra={'cache_key': cache_key})
        return

    original_data = data
    data = CanonicalKeyDict(data)
    project_id = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    project = Project.objects.get_from_cache(id=project_id)

    if should_process(data):
        from_reprocessing = process_task is process_event_from_reprocessing
        submit_process(project, from_reprocessing, cache_key, event_id, start_time, original_data)
        return

    submit_save_event(project, cache_key, event_id, start_time, original_data)
Exemplo n.º 20
0
def create_failed_event(
    cache_key, data, project_id, issues, event_id, start_time=None, reprocessing_rev=None
):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    # We can only create failed events for events that can potentially
    # create failed events.
    if not reprocessing.event_supports_reprocessing(data):
        return False

    reprocessing_active = ProjectOption.objects.get_value(
        project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT
    )

    # In case there is reprocessing active but the current reprocessing
    # revision is already different than when we started, we want to
    # immediately retry the event.  This resolves the problem when
    # otherwise a concurrent change of debug symbols might leave a
    # reprocessing issue stuck in the project forever.
    if (
        reprocessing_active
        and reprocessing.get_reprocessing_revision(project_id, cached=False) != reprocessing_rev
    ):
        raise RetryProcessing()

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, "sentry:sent_failed_event_hint", False
    )
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={"reprocessing_active": reprocessing_active, "issues": issues},
        ).send_notification()
        ProjectOption.objects.set_value(project, "sentry:sent_failed_event_hint", True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr("events.failed", tags={"reason": "cache", "stage": "raw"}, skip_internal=False)
        error_logger.error("process.failed_raw.empty", extra={"cache_key": cache_key})
        return True

    data = CanonicalKeyDict(data)
    from sentry.models import RawEvent, ProcessingIssue

    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(data["timestamp"]).replace(tzinfo=timezone.utc),
        data=data,
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue["scope"],
            object=issue["object"],
            type=issue["type"],
            data=issue["data"],
        )

    default_cache.delete(cache_key)

    return True
Exemplo n.º 21
0
def _do_process_event(cache_key, start_time, event_id, process_task):
    from sentry.plugins import plugins

    data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'process'
                     })
        error_logger.error('process.failed.empty',
                           extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project)

    # Event enhancers.  These run before anything else.
    for plugin in plugins.all(version=2):
        enhancers = safe_execute(plugin.get_event_enhancers, data=data)
        for enhancer in (enhancers or ()):
            enhanced = safe_execute(enhancer, data)
            if enhanced:
                data = enhanced
                has_changed = True

    # Stacktrace based event processors.
    new_data = process_stacktraces(data)
    if new_data is not None:
        has_changed = True
        data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(plugin.get_event_preprocessors,
                                  data=data,
                                  _with_transaction=False)
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data[
        'project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                    cache_key,
                    project,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            process_task.delay(cache_key,
                               start_time=start_time,
                               event_id=event_id)
            return

        # We cannot persist canonical types in the cache, so we need to
        # downgrade this.
        if isinstance(data, CANONICAL_TYPES):
            data = dict(data.items())
        default_cache.set(cache_key, data, 3600)

    save_event.delay(cache_key=cache_key,
                     data=None,
                     start_time=start_time,
                     event_id=event_id,
                     project_id=project)
Exemplo n.º 22
0
    def get(self, request, wizard_hash):
        """
        This opens a page where with an active session fill stuff into the cache
        Redirects to organization whenever cache has been deleted
        """
        context = {
            'hash': wizard_hash
        }
        key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash)

        wizard_data = default_cache.get(key)
        if wizard_data is None:
            return self.redirect_to_org(request)

        orgs = Organization.objects.filter(
            member_set__role__in=[x.id for x in roles.with_scope('org:read')],
            member_set__user=request.user,
            status=OrganizationStatus.VISIBLE,
        ).order_by('-date_added')[:50]

        filled_projects = []

        for org in orgs:
            projects = list(Project.objects.filter(
                organization=org,
                status=ProjectStatus.VISIBLE,
            ).order_by('-date_added')[:50])
            for project in projects:
                enriched_project = serialize(project)
                enriched_project['organization'] = serialize(org)
                keys = list(ProjectKey.objects.filter(
                    project=project,
                    roles=ProjectKey.roles.store,
                    status=ProjectKeyStatus.ACTIVE,
                ))
                enriched_project['keys'] = serialize(keys)
                filled_projects.append(enriched_project)

        # Fetching or creating a token
        token = None
        tokens = [
            x for x in ApiToken.objects.filter(user=request.user).all()
            if 'project:releases' in x.get_scopes()
        ]
        if not tokens:
            token = ApiToken.objects.create(
                user=request.user,
                scope_list=['project:releases'],
                refresh_token=None,
                expires_at=None,
            )
        else:
            token = tokens[0]

        result = {
            'apiKeys': serialize(token),
            'projects': filled_projects
        }

        key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash)
        default_cache.set(key, result, SETUP_WIZARD_CACHE_TIMEOUT)

        return render_to_response('sentry/setup-wizard.html', context, request)
Exemplo n.º 23
0
def _do_process_event(cache_key, start_time, event_id, process_task, data=None):
    from sentry.plugins import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr(
            "events.failed", tags={"reason": "cache", "stage": "process"}, skip_internal=False
        )
        error_logger.error("process.failed.empty", extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)
    project_id = data["project"]

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    try:
        # Event enhancers.  These run before anything else.
        for plugin in plugins.all(version=2):
            enhancers = safe_execute(plugin.get_event_enhancers, data=data)
            for enhancer in enhancers or ():
                enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,))
                if enhanced:
                    data = enhanced
                    has_changed = True

        # Stacktrace based event processors.
        new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (time() - start_time) > 3600:
            raise RuntimeError("Event spent one hour in processing")

        retry_process_event.apply_async(
            args=(),
            kwargs={
                "process_task_name": process_task.__name__,
                "task_kwargs": {
                    "cache_key": cache_key,
                    "event_id": event_id,
                    "start_time": start_time,
                },
            },
            countdown=e.retry_after,
        )
        return

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in processors or ():
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data["project"] == project_id, "Project cannot be mutated by preprocessor"
    project = Project.objects.get_from_cache(id=project_id)

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(
            remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS
        )
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                cache_key,
                data,
                project_id,
                list(issues.values()),
                event_id=event_id,
                start_time=start_time,
                reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            from_reprocessing = process_task is process_event_from_reprocessing
            submit_process(project, from_reprocessing, cache_key, event_id, start_time, data)
            process_task.delay(cache_key, start_time=start_time, event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)
Exemplo n.º 24
0
def save_event(cache_key=None,
               data=None,
               start_time=None,
               event_id=None,
               project_id=None,
               **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas, tsdb
    from sentry.models import ProjectKey

    if cache_key:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'post'
                     })
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    try:
        manager = EventManager(data)
        event = manager.save(project_id)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments',
                        event.project.organization,
                        actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

    except HashDiscarded:
        increment_list = [
            (tsdb.models.project_total_received_discarded, project_id),
        ]

        try:
            project = Project.objects.get_from_cache(id=project_id)
        except Project.DoesNotExist:
            pass
        else:
            increment_list.extend([
                (tsdb.models.project_total_blacklisted, project.id),
                (tsdb.models.organization_total_blacklisted,
                 project.organization_id),
            ])

            project_key = None
            if data.get('key_id') is not None:
                try:
                    project_key = ProjectKey.objects.get_from_cache(
                        id=data['key_id'])
                except ProjectKey.DoesNotExist:
                    pass
                else:
                    increment_list.append(
                        (tsdb.models.key_total_blacklisted, project_key.id))

            quotas.refund(
                project,
                key=project_key,
                timestamp=start_time,
            )

        tsdb.incr_multi(
            increment_list,
            timestamp=to_datetime(start_time)
            if start_time is not None else None,
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)
            attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing('events.time-to-process',
                           time() - start_time,
                           instance=data['platform'])
Exemplo n.º 25
0
def run_symbolicator(project,
                     request_id_cache_key,
                     create_task=create_payload_task,
                     **kwargs):
    symbolicator_options = options.get('symbolicator.options')
    base_url = symbolicator_options['url'].rstrip('/')
    assert base_url

    project_id = six.text_type(project.id)
    request_id = default_cache.get(request_id_cache_key)
    sess = Session()

    # Will be set lazily when a symbolicator request is fired
    sources = None

    attempts = 0
    wait = 0.5

    with sess:
        while True:
            try:
                if request_id:
                    rv = _poll_symbolication_task(
                        sess=sess,
                        base_url=base_url,
                        request_id=request_id,
                        project_id=project_id,
                    )
                else:
                    if sources is None:
                        sources = get_sources_for_project(project)

                    rv = create_task(sess=sess,
                                     base_url=base_url,
                                     project_id=project_id,
                                     sources=sources,
                                     **kwargs)

                metrics.incr('events.symbolicator.status_code',
                             tags={
                                 'status_code': rv.status_code,
                                 'project_id': project_id,
                             })

                if rv.status_code == 404 and request_id:
                    default_cache.delete(request_id_cache_key)
                    request_id = None
                    continue
                elif rv.status_code == 503:
                    raise RetrySymbolication(retry_after=10)

                rv.raise_for_status()
                json = rv.json()
                metrics.incr('events.symbolicator.response',
                             tags={
                                 'response': json['status'],
                                 'project_id': project_id,
                             })

                if json['status'] == 'pending':
                    default_cache.set(request_id_cache_key, json['request_id'],
                                      REQUEST_CACHE_TIMEOUT)
                    raise RetrySymbolication(retry_after=json['retry_after'])
                else:
                    default_cache.delete(request_id_cache_key)
                    return json

            except (IOError, RequestException):
                attempts += 1
                if attempts > MAX_ATTEMPTS:
                    logger.error('Failed to contact symbolicator',
                                 exc_info=True)

                    default_cache.delete(request_id_cache_key)
                    return

                time.sleep(wait)
                wait *= 2.0
Exemplo n.º 26
0
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None,
                   project_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    key_id = None if data is None else data.get('key_id')
    if key_id is not None:
        key_id = int(key_id)
    timestamp = to_datetime(start_time) if start_time is not None else None

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'post'},
            skip_internal=False)
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    event = None
    try:
        manager = EventManager(data)
        event = manager.save(project_id, assume_normalized=True)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments', event.project.organization, actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

        # This is where we can finally say that we have accepted the event.
        track_outcome(
            event.project.organization_id,
            event.project.id,
            key_id,
            Outcome.ACCEPTED,
            None,
            timestamp,
            event_id
        )

    except HashDiscarded:
        project = Project.objects.get_from_cache(id=project_id)
        reason = FilterStatKeys.DISCARDED_HASH
        project_key = None
        try:
            if key_id is not None:
                project_key = ProjectKey.objects.get_from_cache(id=key_id)
        except ProjectKey.DoesNotExist:
            pass

        quotas.refund(project, key=project_key, timestamp=start_time)
        track_outcome(
            project.organization_id,
            project_id,
            key_id,
            Outcome.FILTERED,
            reason,
            timestamp,
            event_id
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)

            # For the unlikely case that we did not manage to persist the
            # event we also delete the key always.
            if event is None or \
               features.has('organizations:event-attachments', event.project.organization, actor=None):
                attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
Exemplo n.º 27
0
def _do_process_event(cache_key, start_time, event_id, process_task):
    from sentry.plugins import plugins

    data = default_cache.get(cache_key)

    if data is None:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'process'},
            skip_internal=False)
        error_logger.error('process.failed.empty', extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project)

    # Event enhancers.  These run before anything else.
    for plugin in plugins.all(version=2):
        enhancers = safe_execute(plugin.get_event_enhancers, data=data)
        for enhancer in (enhancers or ()):
            enhanced = safe_execute(enhancer, data)
            if enhanced:
                data = enhanced
                has_changed = True

    # Stacktrace based event processors.
    new_data = process_stacktraces(data)
    if new_data is not None:
        has_changed = True
        data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                cache_key, project, list(issues.values()),
                event_id=event_id, start_time=start_time,
                reprocessing_rev=reprocessing_rev
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            process_task.delay(cache_key, start_time=start_time,
                               event_id=event_id)
            return

        # We cannot persist canonical types in the cache, so we need to
        # downgrade this.
        if isinstance(data, CANONICAL_TYPES):
            data = dict(data.items())
        default_cache.set(cache_key, data, 3600)

    save_event.delay(
        cache_key=cache_key, data=None, start_time=start_time, event_id=event_id,
        project_id=project
    )
Exemplo n.º 28
0
def _do_process_event(cache_key, start_time, event_id, process_task,
                      data=None):
    from sentry.plugins import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'process'},
            skip_internal=False)
        error_logger.error('process.failed.empty', extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project_id = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    try:
        # Event enhancers.  These run before anything else.
        for plugin in plugins.all(version=2):
            enhancers = safe_execute(plugin.get_event_enhancers, data=data)
            for enhancer in (enhancers or ()):
                enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,))
                if enhanced:
                    data = enhanced
                    has_changed = True

        # Stacktrace based event processors.
        new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (time() - start_time) > 3600:
            raise RuntimeError('Event spent one hour in processing')

        retry_process_event.apply_async(
            args=(),
            kwargs={
                'process_task_name': process_task.__name__,
                'task_kwargs': {
                    'cache_key': cache_key,
                    'event_id': event_id,
                    'start_time': start_time,
                }
            },
            countdown=e.retry_after
        )
        return

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project_id, 'Project cannot be mutated by preprocessor'
    project = Project.objects.get_from_cache(id=project_id)

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                cache_key, project_id, list(issues.values()),
                event_id=event_id, start_time=start_time,
                reprocessing_rev=reprocessing_rev
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            from_reprocessing = process_task is process_event_from_reprocessing
            submit_process(project, from_reprocessing, cache_key, event_id, start_time, data)
            process_task.delay(cache_key, start_time=start_time,
                               event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)
Exemplo n.º 29
0
def save_event(cache_key=None, data=None, start_time=None, event_id=None,
               project_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas, tsdb
    from sentry.models import ProjectKey

    if cache_key:
        data = default_cache.get(cache_key)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'})
        return

    Raven.tags_context({
        'project': project_id,
    })

    try:
        manager = EventManager(data)
        manager.save(project_id)
    except HashDiscarded:
        increment_list = [
            (tsdb.models.project_total_received_discarded, project_id),
        ]

        try:
            project = Project.objects.get_from_cache(id=project_id)
        except Project.DoesNotExist:
            pass
        else:
            increment_list.extend([
                (tsdb.models.project_total_blacklisted, project.id),
                (tsdb.models.organization_total_blacklisted, project.organization_id),
            ])

            project_key = None
            if data.get('key_id') is not None:
                try:
                    project_key = ProjectKey.objects.get_from_cache(id=data['key_id'])
                except ProjectKey.DoesNotExist:
                    pass
                else:
                    increment_list.append((tsdb.models.key_total_blacklisted, project_key.id))

            quotas.refund(
                project,
                key=project_key,
                timestamp=start_time,
            )

        tsdb.incr_multi(
            increment_list,
            timestamp=to_datetime(start_time) if start_time is not None else None,
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)
        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
Exemplo n.º 30
0
def _do_process_event(
    cache_key,
    start_time,
    event_id,
    process_task,
    data=None,
    data_has_changed=None,
    new_process_behavior=None,
):
    from sentry.plugins.base import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "process"
                     },
                     skip_internal=False)
        error_logger.error("process.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_project(project_id)

    event_id = data["event_id"]

    project = Project.objects.get_from_cache(id=project_id)

    has_changed = bool(data_has_changed)
    new_process_behavior = bool(new_process_behavior)

    metrics.incr("tasks.store.process_event.new_process_behavior",
                 tags={"value": new_process_behavior})

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    try:
        if not new_process_behavior:
            # Event enhancers.  These run before anything else.
            for plugin in plugins.all(version=2):
                with metrics.timer("tasks.store.process_event.enhancers",
                                   tags={"plugin": plugin.slug}):
                    enhancers = safe_execute(plugin.get_event_enhancers,
                                             data=data)
                    for enhancer in enhancers or ():
                        enhanced = safe_execute(
                            enhancer,
                            data,
                            _passthrough_errors=(RetrySymbolication, ))
                        if enhanced:
                            data = enhanced
                            has_changed = True

        # Stacktrace based event processors.
        with metrics.timer("tasks.store.process_event.stacktraces"):
            new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (
                time() -
                start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
            error_logger.warning("process.slow",
                                 extra={
                                     "project_id": project_id,
                                     "event_id": event_id
                                 })

        if start_time and (
                time() -
                start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
            # Do not drop event but actually continue with rest of pipeline
            # (persisting unsymbolicated event)
            error_logger.exception(
                "process.failed.infinite_retry",
                extra={
                    "project_id": project_id,
                    "event_id": event_id
                },
            )
        else:
            retry_process_event.apply_async(
                args=(),
                kwargs={
                    "process_task_name": process_task.__name__,
                    "task_kwargs": {
                        "cache_key": cache_key,
                        "event_id": event_id,
                        "start_time": start_time,
                    },
                },
                countdown=e.retry_after,
            )
            return

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # *Right now* the only sensitive data that is added in stacktrace
    # processing are usernames in filepaths, so we run directly after
    # stacktrace processors and `get_event_enhancers`.
    #
    # We do not yet want to deal with context data produced by plugins like
    # sessionstack or fullstory (which are in `get_event_preprocessors`), as
    # this data is very unlikely to be sensitive data. This is why scrubbing
    # happens somewhere in the middle of the pipeline.
    #
    # On the other hand, Javascript event error translation is happening after
    # this block because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`.
    #
    # We are fairly confident, however, that this should run *before*
    # re-normalization as it is hard to find sensitive data in partially
    # trimmed strings.
    if (has_changed and options.get("processing.can-use-scrubbers")
            and features.has("organizations:datascrubbers-v2",
                             project.organization,
                             actor=None)):
        with metrics.timer("tasks.store.datascrubbers.scrub"):
            project_config = get_project_config(project)

            new_data = safe_execute(scrub_data,
                                    project_config=project_config,
                                    event=data.data)

            # XXX(markus): When datascrubbing is finally "totally stable", we might want
            # to drop the event if it crashes to avoid saving PII
            if new_data is not None:
                data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        with metrics.timer("tasks.store.process_event.preprocessors",
                           tags={"plugin": plugin.slug}):
            processors = safe_execute(plugin.get_event_preprocessors,
                                      data=data,
                                      _with_transaction=False)
            for processor in processors or ():
                result = safe_execute(processor, data)
                if result:
                    data = result
                    has_changed = True

    assert data[
        "project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(remove_other=False,
                                     is_renormalize=True,
                                     **DEFAULT_STORE_NORMALIZER_ARGS)
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                    cache_key,
                    data,
                    project_id,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            _do_preprocess_event(cache_key, data, start_time, event_id,
                                 process_task, project)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)
Exemplo n.º 31
0
def _do_process_event(cache_key, start_time, event_id, process_task):
    from sentry.plugins import plugins

    data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'})
        error_logger.error('process.failed.empty', extra={'cache_key': cache_key})
        return

    project = data['project']
    Raven.tags_context({
        'project': project,
    })
    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project)

    # Stacktrace based event processors.  These run before anything else.
    new_data = process_stacktraces(data)
    if new_data is not None:
        has_changed = True
        data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                cache_key, project, list(issues.values()),
                event_id=event_id, start_time=start_time,
                reprocessing_rev=reprocessing_rev
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            process_task.delay(cache_key, start_time=start_time,
                               event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    save_event.delay(
        cache_key=cache_key, data=None, start_time=start_time, event_id=event_id,
        project_id=project
    )
Exemplo n.º 32
0
def _do_process_event(cache_key,
                      start_time,
                      event_id,
                      process_task,
                      data=None):
    from sentry.plugins.base import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "process"
                     },
                     skip_internal=False)
        error_logger.error("process.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    event_id = data["event_id"]

    project = Project.objects.get_from_cache(id=project_id)

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    try:
        # Event enhancers.  These run before anything else.
        for plugin in plugins.all(version=2):
            enhancers = safe_execute(plugin.get_event_enhancers, data=data)
            for enhancer in enhancers or ():
                enhanced = safe_execute(
                    enhancer, data, _passthrough_errors=(RetrySymbolication, ))
                if enhanced:
                    data = enhanced
                    has_changed = True

        # Stacktrace based event processors.
        new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (
                time() -
                start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
            error_logger.warning("process.slow",
                                 extra={
                                     "project_id": project_id,
                                     "event_id": event_id
                                 })

        if start_time and (
                time() -
                start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
            # Do not drop event but actually continue with rest of pipeline
            # (persisting unsymbolicated event)
            error_logger.exception(
                "process.failed.infinite_retry",
                extra={
                    "project_id": project_id,
                    "event_id": event_id
                },
            )
        else:
            retry_process_event.apply_async(
                args=(),
                kwargs={
                    "process_task_name": process_task.__name__,
                    "task_kwargs": {
                        "cache_key": cache_key,
                        "event_id": event_id,
                        "start_time": start_time,
                    },
                },
                countdown=e.retry_after,
            )
            return

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # We assume that all potential PII is produced as part of stacktrace
    # processors and event enhancers.
    #
    # We assume that plugins for eg sessionstack (running via
    # `plugin.get_event_preprocessors`) are not producing data that should be
    # PII-stripped, ever.
    #
    # XXX(markus): Javascript event error translation is happening after this block
    # because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`, possibly move?
    if has_changed and features.has("organizations:datascrubbers-v2",
                                    project.organization,
                                    actor=None):
        with metrics.timer("tasks.store.datascrubbers.scrub"):
            project_config = get_project_config(project)

            new_data = safe_execute(scrub_data,
                                    project_config=project_config,
                                    event=data.data)

            # XXX(markus): When datascrubbing is finally "totally stable", we might want
            # to drop the event if it crashes to avoid saving PII
            if new_data is not None:
                data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(plugin.get_event_preprocessors,
                                  data=data,
                                  _with_transaction=False)
        for processor in processors or ():
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data[
        "project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(remove_other=False,
                                     is_renormalize=True,
                                     **DEFAULT_STORE_NORMALIZER_ARGS)
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                    cache_key,
                    data,
                    project_id,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            from_reprocessing = process_task is process_event_from_reprocessing
            submit_process(project, from_reprocessing, cache_key, event_id,
                           start_time, data)
            process_task.delay(cache_key,
                               start_time=start_time,
                               event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)
Exemplo n.º 33
0
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """

    from sentry.event_manager import EventManager, HashDiscarded

    event_type = "none"

    if cache_key and data is None:
        with metrics.timer(
                "tasks.store.do_save_event.get_cache") as metric_tags:
            data = default_cache.get(cache_key)
            if data is not None:
                metric_tags["event_type"] = event_type = data.get(
                    "type") or "none"

    with metrics.global_tags(event_type=event_type):
        if data is not None:
            data = CanonicalKeyDict(data)

        if event_id is None and data is not None:
            event_id = data["event_id"]

        # only when we come from reprocessing we get a project_id sent into
        # the task.
        if project_id is None:
            project_id = data.pop("project")

        # We only need to delete raw events for events that support
        # reprocessing.  If the data cannot be found we want to assume
        # that we need to delete the raw event.
        if not data or reprocessing.event_supports_reprocessing(data):
            with metrics.timer("tasks.store.do_save_event.delete_raw_event"):
                delete_raw_event(project_id, event_id, allow_hint_clear=True)

        # This covers two cases: where data is None because we did not manage
        # to fetch it from the default cache or the empty dictionary was
        # stored in the default cache.  The former happens if the event
        # expired while being on the queue, the second happens on reprocessing
        # if the raw event was deleted concurrently while we held on to
        # it.  This causes the node store to delete the data and we end up
        # fetching an empty dict.  We could in theory not invoke `save_event`
        # in those cases but it's important that we always clean up the
        # reprocessing reports correctly or they will screw up the UI.  So
        # to future proof this correctly we just handle this case here.
        if not data:
            metrics.incr("events.failed",
                         tags={
                             "reason": "cache",
                             "stage": "post"
                         },
                         skip_internal=False)
            return

        with configure_scope() as scope:
            scope.set_tag("project", project_id)

        event = None
        try:
            with metrics.timer("tasks.store.do_save_event.event_manager.save"):
                manager = EventManager(data)
                # event.project.organization is populated after this statement.
                event = manager.save(project_id,
                                     assume_normalized=True,
                                     start_time=start_time,
                                     cache_key=cache_key)

        except HashDiscarded:
            pass

        finally:
            if cache_key:
                with metrics.timer("tasks.store.do_save_event.delete_cache"):
                    default_cache.delete(cache_key)

                with metrics.timer(
                        "tasks.store.do_save_event.delete_attachment_cache"):
                    # For the unlikely case that we did not manage to persist the
                    # event we also delete the key always.
                    if event is None or features.has(
                            "organizations:event-attachments",
                            event.project.organization,
                            actor=None):
                        attachment_cache.delete(cache_key)

            if start_time:
                metrics.timing("events.time-to-process",
                               time() - start_time,
                               instance=data["platform"])
Exemplo n.º 34
0
    def post(self, request):
        """
        Registers a Relay
        `````````````````

        Registers the relay with the sentry installation.  If a relay boots
        it will always attempt to invoke this endpoint.
        """

        try:
            json_data = json.loads(request.body)
        except ValueError:
            return Response({
                'detail': 'No valid json body',
            }, status=status.HTTP_400_BAD_REQUEST)

        serializer = RelayRegisterResponseSerializer(data=json_data)

        if not serializer.is_valid():
            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)

        sig = get_header_relay_signature(request)
        if not sig:
            return Response({
                'detail': 'Missing relay signature',
            }, status=status.HTTP_400_BAD_REQUEST)

        relay_id = six.text_type(get_register_response_relay_id(request.body))
        if relay_id != get_header_relay_id(request):
            return Response({
                'detail': 'relay_id in payload did not match header',
            }, status=status.HTTP_400_BAD_REQUEST)

        params = default_cache.get('relay-auth:%s' % relay_id)
        if params is None:
            return Response({
                'detail': 'Challenge expired'
            }, status=status.HTTP_401_UNAUTHORIZED)

        key = PublicKey.parse(params['public_key'])

        try:
            validate_register_response(key, request.body, sig)
        except Exception as exc:
            return Response({
                'detail': str(exc).splitlines()[0],
            }, status=status.HTTP_400_BAD_REQUEST)

        is_internal = is_internal_relay(request, params['public_key'])
        try:
            relay = Relay.objects.get(relay_id=relay_id)
        except Relay.DoesNotExist:
            relay = Relay.objects.create(
                relay_id=relay_id,
                public_key=params['public_key'],
                is_internal=is_internal
            )
        else:
            relay.last_seen = timezone.now()
            relay.is_internal = is_internal
            relay.save()
        default_cache.delete('relay-auth:%s' % relay_id)

        return Response(serialize({
            'relay_id': relay.relay_id,
        }))
Exemplo n.º 35
0
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None,
                        reprocessing_rev=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    reprocessing_active = ProjectOption.objects.get_value(
        project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT
    )

    # In case there is reprocessing active but the current reprocessing
    # revision is already different than when we started, we want to
    # immediately retry the event.  This resolves the problem when
    # otherwise a concurrent change of debug symbols might leave a
    # reprocessing issue stuck in the project forever.
    if reprocessing_active and \
       reprocessing.get_reprocessing_revision(project_id, cached=False) != \
       reprocessing_rev:
        raise RetryProcessing()

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, 'sentry:sent_failed_event_hint', False
    )
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={'reprocessing_active': reprocessing_active,
                  'issues': issues},
        ).send_notification()
        ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'}, skip_internal=False)
        error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key})
        return True

    data = CanonicalKeyDict(data)
    from sentry.models import RawEvent, ProcessingIssue
    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(data['timestamp']).replace(tzinfo=timezone.utc),
        data=data
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue['scope'],
            object=issue['object'],
            type=issue['type'],
            data=issue['data'],
        )

    default_cache.delete(cache_key)

    return True
Exemplo n.º 36
0
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None,
                   project_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    key_id = None if data is None else data.get('key_id')
    if key_id is not None:
        key_id = int(key_id)
    timestamp = to_datetime(start_time) if start_time is not None else None

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'post'},
            skip_internal=False)
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    event = None
    try:
        manager = EventManager(data)
        event = manager.save(project_id, assume_normalized=True)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments', event.project.organization, actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

        # This is where we can finally say that we have accepted the event.
        track_outcome(
            event.project.organization_id,
            event.project.id,
            key_id,
            Outcome.ACCEPTED,
            None,
            timestamp,
            event_id
        )

    except HashDiscarded:
        project = Project.objects.get_from_cache(id=project_id)
        reason = FilterStatKeys.DISCARDED_HASH
        project_key = None
        try:
            if key_id is not None:
                project_key = ProjectKey.objects.get_from_cache(id=key_id)
        except ProjectKey.DoesNotExist:
            pass

        quotas.refund(project, key=project_key, timestamp=start_time)
        track_outcome(
            project.organization_id,
            project_id,
            key_id,
            Outcome.FILTERED,
            reason,
            timestamp,
            event_id
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)

            # For the unlikely case that we did not manage to persist the
            # event we also delete the key always.
            if event is None or \
               features.has('organizations:event-attachments', event.project.organization, actor=None):
                attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
Exemplo n.º 37
0
def _do_symbolicate_event(cache_key,
                          start_time,
                          event_id,
                          symbolicate_task,
                          data=None):
    from sentry.lang.native.processing import get_symbolication_function

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "symbolicate"
                     },
                     skip_internal=False)
        error_logger.error("symbolicate.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_project(project_id)

    event_id = data["event_id"]

    project = Project.objects.get_from_cache(id=project_id)

    symbolication_function = get_symbolication_function(data)

    has_changed = False

    from_reprocessing = symbolicate_task is symbolicate_event_from_reprocessing

    try:
        with metrics.timer("tasks.store.symbolicate_event.symbolication"):
            symbolicated_data = safe_execute(
                symbolication_function,
                data,
                _passthrough_errors=(RetrySymbolication, ))
        if symbolicated_data:
            data = symbolicated_data
            has_changed = True

    except RetrySymbolication as e:
        error_logger.warn("retry symbolication")

        if start_time and (
                time() -
                start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
            error_logger.warning("symbolicate.slow",
                                 extra={
                                     "project_id": project_id,
                                     "event_id": event_id
                                 })

        if start_time and (
                time() -
                start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
            # Do not drop event but actually continue with rest of pipeline
            # (persisting unsymbolicated event)
            error_logger.exception(
                "symbolicate.failed.infinite_retry",
                extra={
                    "project_id": project_id,
                    "event_id": event_id
                },
            )
        else:
            # Requeue the task in the "sleep" queue
            retry_symbolicate_event.apply_async(
                args=(),
                kwargs={
                    "symbolicate_task_name": symbolicate_task.__name__,
                    "task_kwargs": {
                        "cache_key": cache_key,
                        "event_id": event_id,
                        "start_time": start_time,
                    },
                },
                countdown=e.retry_after,
            )
            return

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        default_cache.set(cache_key, data, 3600)

    submit_process(
        project,
        from_reprocessing,
        cache_key,
        event_id,
        start_time,
        data,
        has_changed,
        new_process_behavior=True,
    )
Exemplo n.º 38
0
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """

    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome
    from sentry.ingest.outcomes_consumer import mark_signal_sent

    event_type = "none"

    if cache_key and data is None:
        with metrics.timer(
                "tasks.store.do_save_event.get_cache") as metric_tags:
            data = default_cache.get(cache_key)
            if data is not None:
                metric_tags["event_type"] = event_type = data.get(
                    "type") or "none"

    with metrics.global_tags(event_type=event_type):
        if data is not None:
            data = CanonicalKeyDict(data)

        if event_id is None and data is not None:
            event_id = data["event_id"]

        # only when we come from reprocessing we get a project_id sent into
        # the task.
        if project_id is None:
            project_id = data.pop("project")

        key_id = None if data is None else data.get("key_id")
        if key_id is not None:
            key_id = int(key_id)
        timestamp = to_datetime(start_time) if start_time is not None else None

        # We only need to delete raw events for events that support
        # reprocessing.  If the data cannot be found we want to assume
        # that we need to delete the raw event.
        if not data or reprocessing.event_supports_reprocessing(data):
            with metrics.timer("tasks.store.do_save_event.delete_raw_event"):
                delete_raw_event(project_id, event_id, allow_hint_clear=True)

        # This covers two cases: where data is None because we did not manage
        # to fetch it from the default cache or the empty dictionary was
        # stored in the default cache.  The former happens if the event
        # expired while being on the queue, the second happens on reprocessing
        # if the raw event was deleted concurrently while we held on to
        # it.  This causes the node store to delete the data and we end up
        # fetching an empty dict.  We could in theory not invoke `save_event`
        # in those cases but it's important that we always clean up the
        # reprocessing reports correctly or they will screw up the UI.  So
        # to future proof this correctly we just handle this case here.
        if not data:
            metrics.incr("events.failed",
                         tags={
                             "reason": "cache",
                             "stage": "post"
                         },
                         skip_internal=False)
            return

        with configure_scope() as scope:
            scope.set_tag("project", project_id)

        event = None
        try:
            with metrics.timer("tasks.store.do_save_event.event_manager.save"):
                manager = EventManager(data)
                # event.project.organization is populated after this statement.
                event = manager.save(project_id,
                                     assume_normalized=True,
                                     cache_key=cache_key)

            with metrics.timer("tasks.store.do_save_event.track_outcome"):
                # This is where we can finally say that we have accepted the event.
                track_outcome(
                    event.project.organization_id,
                    event.project.id,
                    key_id,
                    Outcome.ACCEPTED,
                    None,
                    timestamp,
                    event_id,
                )

        except HashDiscarded:
            project = Project.objects.get_from_cache(id=project_id)
            reason = FilterStatKeys.DISCARDED_HASH
            project_key = None
            try:
                if key_id is not None:
                    project_key = ProjectKey.objects.get_from_cache(id=key_id)
            except ProjectKey.DoesNotExist:
                pass

            quotas.refund(project, key=project_key, timestamp=start_time)
            # There is no signal supposed to be sent for this particular
            # outcome-reason combination. Prevent the outcome consumer from
            # emitting it for now.
            #
            # XXX(markus): Revisit decision about signals once outcomes consumer is stable.
            mark_signal_sent(project_id, event_id)
            track_outcome(
                project.organization_id,
                project_id,
                key_id,
                Outcome.FILTERED,
                reason,
                timestamp,
                event_id,
            )

        finally:
            if cache_key:
                with metrics.timer("tasks.store.do_save_event.delete_cache"):
                    default_cache.delete(cache_key)

                with metrics.timer(
                        "tasks.store.do_save_event.delete_attachment_cache"):
                    # For the unlikely case that we did not manage to persist the
                    # event we also delete the key always.
                    if event is None or features.has(
                            "organizations:event-attachments",
                            event.project.organization,
                            actor=None):
                        attachment_cache.delete(cache_key)

            if start_time:
                metrics.timing("events.time-to-process",
                               time() - start_time,
                               instance=data["platform"])
Exemplo n.º 39
0
def run_symbolicator(project, request_id_cache_key, create_task=create_payload_task, **kwargs):
    symbolicator_options = options.get('symbolicator.options')
    base_url = symbolicator_options['url'].rstrip('/')
    assert base_url

    project_id = six.text_type(project.id)
    request_id = default_cache.get(request_id_cache_key)
    sess = Session()

    # Will be set lazily when a symbolicator request is fired
    sources = None

    attempts = 0
    wait = 0.5

    with sess:
        while True:
            try:
                if request_id:
                    rv = _poll_symbolication_task(
                        sess=sess, base_url=base_url,
                        request_id=request_id, project_id=project_id,
                    )
                else:
                    if sources is None:
                        sources = get_sources_for_project(project)

                    rv = create_task(
                        sess=sess, base_url=base_url,
                        project_id=project_id,
                        sources=sources,
                        **kwargs
                    )

                metrics.incr('events.symbolicator.status_code', tags={
                    'status_code': rv.status_code,
                    'project_id': project_id,
                })

                if rv.status_code == 404 and request_id:
                    default_cache.delete(request_id_cache_key)
                    request_id = None
                    continue
                elif rv.status_code == 503:
                    raise RetrySymbolication(retry_after=10)

                rv.raise_for_status()
                json = rv.json()
                metrics.incr('events.symbolicator.response', tags={
                    'response': json['status'],
                    'project_id': project_id,
                })

                if json['status'] == 'pending':
                    default_cache.set(
                        request_id_cache_key,
                        json['request_id'],
                        REQUEST_CACHE_TIMEOUT)
                    raise RetrySymbolication(retry_after=json['retry_after'])
                elif json['status'] == 'completed':
                    default_cache.delete(request_id_cache_key)
                    return rv.json()
                else:
                    logger.error("Unexpected status: %s", json['status'])

                    default_cache.delete(request_id_cache_key)
                    return

            except (IOError, RequestException):
                attempts += 1
                if attempts > MAX_ATTEMPTS:
                    logger.error('Failed to contact symbolicator', exc_info=True)

                    default_cache.delete(request_id_cache_key)
                    return

                time.sleep(wait)
                wait *= 2.0