Пример #1
0
    def after(self, event, state):
        service = self.get_option('service')

        extra = {'event_id': event.id}
        if not service:
            self.logger.info('rules.fail.is_configured', extra=extra)
            return

        app = None
        try:
            app = SentryApp.objects.get(slug=service)
        except SentryApp.DoesNotExist:
            pass

        if app:
            kwargs = {'sentry_app': app}
            metrics.incr('notifications.sent', instance=app.slug, skip_internal=False)
            yield self.future(notify_sentry_app, **kwargs)
        else:
            plugin = plugins.get(service)
            if not plugin.is_enabled(self.project):
                extra['project_id'] = self.project.id
                self.logger.info('rules.fail.is_enabled', extra=extra)
                return

            group = event.group

            if not plugin.should_notify(group=group, event=event):
                extra['group_id'] = group.id
                self.logger.info('rule.fail.should_notify', extra=extra)
                return

            metrics.incr('notifications.sent', instance=plugin.slug, skip_internal=False)
            yield self.future(plugin.rule_notify)
Пример #2
0
    def get_interfaces(self):
        was_renormalized = _should_skip_to_python(self.event_id)

        metrics.incr('event.get_interfaces',
                     tags={'rust_renormalized': was_renormalized})

        return CanonicalKeyView(get_interfaces(self.data, rust_renormalized=was_renormalized))
Пример #3
0
    def _request(self, method, path, headers=None, data=None, params=None,
                 auth=None, json=True, allow_text=None, allow_redirects=None,
                 timeout=None):

        if allow_text is None:
            allow_text = self.allow_text

        if allow_redirects is None:
            allow_redirects = self.allow_redirects

        if allow_redirects is None:  # is still None
            allow_redirects = method.upper() == 'GET'

        if timeout is None:
            timeout = 30

        full_url = self.build_url(path)
        host = urlparse(full_url).netloc
        session = build_session()
        try:
            resp = getattr(session, method.lower())(
                url=full_url,
                headers=headers,
                json=data if json else None,
                data=data if not json else None,
                params=params,
                auth=auth,
                verify=self.verify_ssl,
                allow_redirects=allow_redirects,
                timeout=timeout,
            )
            resp.raise_for_status()
        except ConnectionError as e:
            metrics.incr('integrations.http_response', tags={
                'host': host,
                'status': 'connection_error'
            })
            raise ApiHostError.from_exception(e)
        except Timeout as e:
            metrics.incr('integrations.http_response', tags={
                'host': host,
                'status': 'timeout'
            })
            raise ApiTimeoutError.from_exception(e)
        except HTTPError as e:
            resp = e.response
            if resp is None:
                track_response_code(host, 'unknown')
                self.logger.exception('request.error', extra={
                    'url': full_url,
                })
                raise ApiError('Internal Error')
            track_response_code(host, resp.status_code)
            raise ApiError.from_response(resp)

        track_response_code(host, resp.status_code)
        if resp.status_code == 204:
            return {}

        return BaseApiResponse.from_response(resp, allow_text=allow_text)
Пример #4
0
    def after(self, event, state):
        service = self.get_option('service')

        extra = {
            'event_id': event.id
        }
        if not service:
            self.logger.info('rules.fail.is_configured', extra=extra)
            return

        plugin = plugins.get(service)
        if not plugin.is_enabled(self.project):
            extra['project_id'] = self.project.id
            self.logger.info('rules.fail.is_enabled', extra=extra)
            return

        group = event.group

        if not plugin.should_notify(group=group, event=event):
            extra['group_id'] = group.id
            self.logger.info('rule.fail.should_notify', extra=extra)
            return

        metrics.incr('notifications.sent', instance=plugin.slug)
        yield self.future(plugin.rule_notify)
Пример #5
0
def preprocess_event(cache_key=None, data=None, start_time=None, **kwargs):
    from sentry.plugins import plugins

    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'})
        error_logger.error('preprocess.failed.empty', extra={'cache_key': cache_key})
        return

    project = data['project']
    Raven.tags_context({
        'project': project,
    })

    # Iterate over all plugins looking for processors based on the input data
    # plugins should yield a processor function only if it actually can operate
    # on the input data, otherwise it should yield nothing
    for plugin in plugins.all(version=2):
        processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False)
        for processor in (processors or ()):
            # On the first processor found, we just defer to the process_event
            # queue to handle the actual work.
            process_event.delay(cache_key=cache_key, start_time=start_time)
            return

    # If we get here, that means the event had no preprocessing needed to be done
    # so we can jump directly to save_event
    if cache_key:
        data = None
    save_event.delay(cache_key=cache_key, data=data, start_time=start_time)
Пример #6
0
def _do_preprocess_event(cache_key, data, start_time, event_id, process_event):
    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'}, skip_internal=False)
        error_logger.error('preprocess.failed.empty', extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project)

    if should_process(data):
        process_event.delay(cache_key=cache_key, start_time=start_time, event_id=event_id)
        return

    # If we get here, that means the event had no preprocessing needed to be done
    # so we can jump directly to save_event
    if cache_key:
        data = None
    save_event.delay(
        cache_key=cache_key, data=data, start_time=start_time, event_id=event_id,
        project_id=project
    )
Пример #7
0
def task_revoked_handler(task, expired=False, **kwargs):
    if not isinstance(task, basestring):
        task = _get_task_name(task)
    if expired:
        metrics.incr('jobs.expired', instance=task)
    else:
        metrics.incr('jobs.revoked', instance=task)
Пример #8
0
def create_failed_event(cache_key, project, issues, event_id):
    """If processing failed we put the original data from the cache into a
    raw event.
    """
    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'})
        error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key})
        return

    from sentry.models import RawEvent, ProcessingIssue
    raw_event = RawEvent.objects.create(
        project_id=project,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(
            data['timestamp']).replace(tzinfo=timezone.utc),
        data=data
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue['scope'],
            object=issue['object'],
            type=issue['type'],
            data=issue['data'],
        )

    default_cache.delete(cache_key)
Пример #9
0
    def process(self, key):
        client = self.cluster.get_routing_client()
        lock_key = self._make_lock_key(key)
        # prevent a stampede due to the way we use celery etas + duplicate
        # tasks
        if not client.set(lock_key, '1', nx=True, ex=10):
            metrics.incr('buffer.revoked', tags={'reason': 'locked'})
            self.logger.info('Skipped process on %s; unable to get lock', key)
            return

        conn = self.cluster.get_local_client_for_key(key)
        pipe = conn.pipeline()
        pipe.hgetall(key)
        pipe.zrem(self.pending_key, key)
        pipe.delete(key)
        values = pipe.execute()[0]

        if not values:
            metrics.incr('buffer.revoked', tags={'reason': 'empty'})
            self.logger.info('Skipped process on %s; no values found', key)
            return

        model = import_string(values['m'])
        filters = pickle.loads(values['f'])
        incr_values = {}
        extra_values = {}
        for k, v in values.iteritems():
            if k.startswith('i+'):
                incr_values[k[2:]] = int(v)
            elif k.startswith('e+'):
                extra_values[k[2:]] = pickle.loads(v)

        super(RedisBuffer, self).process(model, incr_values, filters, extra_values)
Пример #10
0
def _do_preprocess_event(cache_key, data, start_time, event_id,
                         process_event):
    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'})
        error_logger.error('preprocess.failed.empty',
            extra={'cache_key': cache_key})
        return

    project = data['project']
    Raven.tags_context({
        'project': project,
    })

    if should_process(data):
        process_event.delay(cache_key=cache_key, start_time=start_time,
            event_id=event_id)
        return

    # If we get here, that means the event had no preprocessing needed to be done
    # so we can jump directly to save_event
    if cache_key:
        data = None
    save_event.delay(cache_key=cache_key, data=data, start_time=start_time,
        event_id=event_id)
Пример #11
0
def save_event(cache_key=None, data=None, start_time=None, event_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import EventManager

    if cache_key:
        data = default_cache.get(cache_key)

    if event_id is None and data is not None:
        event_id = data['event_id']

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'})
        return

    project = data.pop('project')

    delete_raw_event(project, event_id)

    Raven.tags_context({
        'project': project,
    })

    try:
        manager = EventManager(data)
        manager.save(project)
    finally:
        if cache_key:
            default_cache.delete(cache_key)
        if start_time:
            metrics.timing('events.time-to-process', time() - start_time,
                           instance=data['platform'])
Пример #12
0
    def __init__(self, *args, **kwargs):
        StacktraceProcessor.__init__(self, *args, **kwargs)

        # If true, the project has been opted into using the symbolicator
        # service for native symbolication, which also means symbolic is not
        # used at all anymore.
        # The (iOS) symbolserver is still used regardless of this value.
        self.use_symbolicator = _is_symbolicator_enabled(self.project, self.data)

        metrics.incr('native.use_symbolicator', tags={'value': self.use_symbolicator})

        self.arch = cpu_name_from_data(self.data)
        self.signal = signal_from_data(self.data)

        self.sym = None
        self.difs_referenced = set()

        images = get_path(self.data, 'debug_meta', 'images', default=(),
                          filter=self._is_valid_image)

        if images:
            self.available = True
            self.sdk_info = get_sdk_from_event(self.data)
            self.object_lookup = ObjectLookup(images)
            self.images = images
        else:
            self.available = False
Пример #13
0
def preprocess_event(cache_key=None, data=None, start_time=None, **kwargs):
    from sentry.plugins import plugins

    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'})
        logger.error('Data not available in preprocess_event (cache_key=%s)', cache_key)
        return

    project = data['project']

    # TODO(dcramer): ideally we would know if data changed by default
    has_changed = False
    for plugin in plugins.all(version=2):
        for processor in (safe_execute(plugin.get_event_preprocessors) or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed and cache_key:
        default_cache.set(cache_key, data, 3600)

    if cache_key:
        data = None
    save_event.delay(cache_key=cache_key, data=data, start_time=start_time)
Пример #14
0
    def send_async(self, to=None, cc=None, bcc=None):
        from sentry.tasks.email import send_email
        fmt = options.get('system.logging-format')
        messages = self.get_built_messages(to, cc=cc, bcc=bcc)
        extra = {'message_type': self.type}
        loggable = [v for k, v in six.iteritems(self.context) if hasattr(v, 'id')]
        for context in loggable:
            extra['%s_id' % type(context).__name__.lower()] = context.id

        log_mail_queued = partial(logger.info, 'mail.queued', extra=extra)
        for message in messages:
            safe_execute(
                send_email.delay,
                message=message,
                _with_transaction=False,
            )
            extra['message_id'] = message.extra_headers['Message-Id']
            metrics.incr('email.queued', instance=self.type)
            if fmt == LoggingFormat.HUMAN:
                extra['message_to'] = self.format_to(message.to),
                log_mail_queued()
            elif fmt == LoggingFormat.MACHINE:
                for recipient in message.to:
                    extra['message_to'] = recipient
                    log_mail_queued()
Пример #15
0
def process_event(cache_key, start_time=None, **kwargs):
    from sentry.plugins import plugins

    data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'})
        error_logger.error('process.failed.empty', extra={'cache_key': cache_key})
        return

    project = data['project']
    Raven.tags_context({
        'project': project,
    })

    # TODO(dcramer): ideally we would know if data changed by default
    has_changed = False
    for plugin in plugins.all(version=2):
        processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False)
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed:
        default_cache.set(cache_key, data, 3600)

    save_event.delay(cache_key=cache_key, data=None, start_time=start_time)
Пример #16
0
def send_messages(messages, fail_silently=False):
    connection = get_connection(fail_silently=fail_silently)
    sent = connection.send_messages(messages)
    metrics.incr("email.sent", len(messages))
    for message in messages:
        logger.info(name="sentry.mail", event="mail.sent", message_id=message.extra_headers["Message-Id"])
    return sent
Пример #17
0
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    reprocessing_active = ProjectOption.objects.get_value(
        project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT
    )

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, 'sentry:sent_failed_event_hint', False
    )
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={'reprocessing_active': reprocessing_active,
                  'issues': issues},
        ).send_notification()
        ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'})
        error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key})
        return True

    from sentry.models import RawEvent, ProcessingIssue
    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(data['timestamp']).replace(tzinfo=timezone.utc),
        data=data
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue['scope'],
            object=issue['object'],
            type=issue['type'],
            data=issue['data'],
        )

    default_cache.delete(cache_key)

    return True
Пример #18
0
def send_messages(messages, fail_silently=False):
    connection = get_connection(fail_silently=fail_silently)
    sent = connection.send_messages(messages)
    metrics.incr('email.sent', len(messages))
    for message in messages:
        extra = {'message_id': message.extra_headers['Message-Id']}
        logger.info('mail.sent', extra=extra)
    return sent
Пример #19
0
def send_messages(messages, fail_silently=False):
    connection = get_connection(fail_silently=fail_silently)
    sent = connection.send_messages(messages)
    metrics.incr("email.sent", len(messages))
    for message in messages:
        extra = {"message_id": message.extra_headers["Message-Id"]}
        logger.info("mail.sent", extra=extra)
    return sent
Пример #20
0
 def close(self):
     StacktraceProcessor.close(self)
     if self.dsyms_referenced:
         metrics.incr('dsyms.processed',
                      amount=len(self.dsyms_referenced),
                      instance=self.project.id)
     if self.sym is not None:
         self.sym.close()
         self.sym = None
Пример #21
0
    def after(self, event, state):
        group = event.group

        for plugin in self.get_plugins():
            if not safe_execute(plugin.should_notify, group=group, event=event):
                continue

            metrics.incr('notifications.sent', instance=plugin.slug)
            yield self.future(plugin.rule_notify)
Пример #22
0
def get_event_file_committers(project, event, frame_limit=25):
    # populate event data
    Event.objects.bind_nodes([event], 'data')

    group = Group.objects.get(id=event.group_id)

    first_release_version = group.get_first_release()

    if not first_release_version:
        raise Release.DoesNotExist

    releases = get_previous_releases(project, first_release_version)
    if not releases:
        raise Release.DoesNotExist

    commits = _get_commits(releases)

    if not commits:
        raise Commit.DoesNotExist

    frames = _get_frame_paths(event)
    app_frames = [frame for frame in frames if frame['in_app']][-frame_limit:]
    if not app_frames:
        app_frames = [frame for frame in frames][-frame_limit:]

    # TODO(maxbittker) return this set instead of annotated frames
    # XXX(dcramer): frames may not define a filepath. For example, in Java its common
    # to only have a module name/path
    path_set = {f for f in (frame.get('filename') or frame.get('abs_path')
                            for frame in app_frames) if f}

    file_changes = []
    if path_set:
        file_changes = _get_commit_file_changes(commits, path_set)

    commit_path_matches = {
        path: _match_commits_path(file_changes, path) for path in path_set
    }

    annotated_frames = [
        {
            'frame': frame,
            'commits': commit_path_matches.get(frame.get('filename') or frame.get('abs_path')) or []
        } for frame in app_frames
    ]

    relevant_commits = list(
        {match for match in commit_path_matches for match in commit_path_matches[match]}
    )

    committers = _get_committers(annotated_frames, relevant_commits)
    metrics.incr(
        'feature.owners.has-committers',
        instance='hit' if committers else 'miss',
        skip_internal=False)
    return committers
Пример #23
0
 def close(self):
     StacktraceProcessor.close(self)
     if self.dsyms_referenced:
         metrics.incr(
             'dsyms.processed',
             amount=len(self.dsyms_referenced),
             skip_internal=True,
             tags={
                 'project_id': self.project.id,
             },
         )
Пример #24
0
def send_messages(messages, fail_silently=False):
    connection = get_connection(fail_silently=fail_silently)
    sent = connection.send_messages(messages)
    metrics.incr('email.sent', len(messages), skip_internal=False)
    for message in messages:
        extra = {
            'message_id': message.extra_headers['Message-Id'],
            'size': len(message.message().as_bytes()),
        }
        logger.info('mail.sent', extra=extra)
    return sent
Пример #25
0
def send_messages(messages, fail_silently=False):
    connection = get_connection(fail_silently=fail_silently)
    sent = connection.send_messages(messages)
    metrics.incr('email.sent', len(messages))
    for message in messages:
        logger.info(
            name='sentry.mail',
            event='mail.sent',
            message_id=message.extra_headers['Message-Id'],
        )
    return sent
Пример #26
0
 def close(self):
     StacktraceProcessor.close(self)
     if self.sourcemaps_touched:
         metrics.incr(
             'sourcemaps.processed',
             amount=len(self.sourcemaps_touched),
             skip_internal=True,
             tags={
                 'project_id': self.project.id,
             },
         )
Пример #27
0
    def after(self, event, state):
        group = event.group

        for plugin in self.get_plugins():
            # plugin is now wrapped in the LegacyPluginService object
            plugin = plugin.service
            if not safe_execute(
                plugin.should_notify, group=group, event=event, _with_transaction=False
            ):
                continue

            metrics.incr('notifications.sent', instance=plugin.slug, skip_internal=False)
            yield self.future(plugin.rule_notify)
Пример #28
0
    def set(self, key, attachments, timeout=None):
        key = self.make_key(key)
        for index, attachment in enumerate(attachments):
            compressed = zlib.compress(attachment.data)
            self.inner.set(u'{}:{}'.format(key, index), compressed, timeout, raw=True)

            metrics_tags = {'type': attachment.type}
            metrics.incr('attachments.received', tags=metrics_tags, skip_internal=False)
            metrics.timing('attachments.blob-size.raw', len(attachment.data), tags=metrics_tags)
            metrics.timing('attachments.blob-size.compressed', len(compressed), tags=metrics_tags)

        meta = [attachment.meta() for attachment in attachments]
        self.inner.set(key, meta, timeout, raw=False)
Пример #29
0
    def _record_time(self, request, status_code):
        if not hasattr(request, "_view_path"):
            return

        metrics.incr(
            "view.response", instance=request._view_path, tags={"method": request.method, "status_code": status_code}
        )

        if not hasattr(request, "_start_time"):
            return

        ms = int((time.time() - request._start_time) * 1000)
        metrics.timing("view.duration", ms, instance=request._view_path, tags={"method": request.method})
Пример #30
0
 def emit(self, record, logger=get_logger()):
     """
     Turn something like:
         > django.request.Forbidden (CSRF cookie not set.): /account
     into:
         > django.request.forbidden_csrf_cookie_not_set
     and track it as an incremented counter.
     """
     key = record.name + '.' + record.getMessage()
     key = key.lower()
     key = whitespace_re.sub("_", key)
     key = metrics_badchars_re.sub("", key)
     key = ".".join(key.split(".")[:3])
     metrics.incr(key)
Пример #31
0
Файл: api.py Проект: ob3/sentry
    def process(self, request, project, auth, helper, data, **kwargs):
        metrics.incr('events.total')

        remote_addr = request.META['REMOTE_ADDR']
        event_received.send_robust(
            ip=remote_addr,
            sender=type(self),
        )

        if not is_valid_ip(remote_addr, project):
            app.tsdb.incr_multi([
                (app.tsdb.models.project_total_received, project.id),
                (app.tsdb.models.project_total_blacklisted, project.id),
                (app.tsdb.models.organization_total_received,
                 project.organization_id),
                (app.tsdb.models.organization_total_blacklisted,
                 project.organization_id),
            ])
            metrics.incr('events.blacklisted')
            raise APIForbidden('Blacklisted IP address: %s' % (remote_addr, ))

        # TODO: improve this API (e.g. make RateLimit act on __ne__)
        rate_limit = safe_execute(app.quotas.is_rate_limited,
                                  project=project,
                                  _with_transaction=False)
        if isinstance(rate_limit, bool):
            rate_limit = RateLimit(is_limited=rate_limit, retry_after=None)

        # XXX(dcramer): when the rate limiter fails we drop events to ensure
        # it cannot cascade
        if rate_limit is None or rate_limit.is_limited:
            if rate_limit is None:
                helper.log.debug(
                    'Dropped event due to error with rate limiter')
            app.tsdb.incr_multi([
                (app.tsdb.models.project_total_received, project.id),
                (app.tsdb.models.project_total_rejected, project.id),
                (app.tsdb.models.organization_total_received,
                 project.organization_id),
                (app.tsdb.models.organization_total_rejected,
                 project.organization_id),
            ])
            metrics.incr('events.dropped')
            if rate_limit is not None:
                raise APIRateLimited(rate_limit.retry_after)
        else:
            app.tsdb.incr_multi([
                (app.tsdb.models.project_total_received, project.id),
                (app.tsdb.models.organization_total_received,
                 project.organization_id),
            ])

        content_encoding = request.META.get('HTTP_CONTENT_ENCODING', '')

        if isinstance(data, basestring):
            if content_encoding == 'gzip':
                data = helper.decompress_gzip(data)
            elif content_encoding == 'deflate':
                data = helper.decompress_deflate(data)
            elif not data.startswith('{'):
                data = helper.decode_and_decompress_data(data)
            data = helper.safely_load_json_string(data)

        # mutates data
        data = helper.validate_data(project, data)

        if 'sdk' not in data:
            sdk = helper.parse_client_as_sdk(auth.client)
            if sdk:
                data['sdk'] = sdk

        # mutates data
        manager = EventManager(data, version=auth.version)
        data = manager.normalize()

        org_options = OrganizationOption.objects.get_all_values(
            project.organization_id)

        if org_options.get('sentry:require_scrub_ip_address', False):
            scrub_ip_address = True
        else:
            scrub_ip_address = project.get_option('sentry:scrub_ip_address',
                                                  False)

        # insert IP address if not available and wanted
        if not scrub_ip_address:
            helper.ensure_has_ip(
                data,
                remote_addr,
                set_if_missing=auth.is_public
                or data.get('platform') in ('javascript', 'cocoa', 'objc'))

        event_id = data['event_id']

        # TODO(dcramer): ideally we'd only validate this if the event_id was
        # supplied by the user
        cache_key = 'ev:%s:%s' % (
            project.id,
            event_id,
        )

        if cache.get(cache_key) is not None:
            raise APIForbidden(
                'An event with the same ID already exists (%s)' % (event_id, ))

        if org_options.get('sentry:require_scrub_data', False):
            scrub_data = True
        else:
            scrub_data = project.get_option('sentry:scrub_data', True)

        if scrub_data:
            # We filter data immediately before it ever gets into the queue
            sensitive_fields_key = 'sentry:sensitive_fields'
            sensitive_fields = (org_options.get(sensitive_fields_key, []) +
                                project.get_option(sensitive_fields_key, []))

            if org_options.get('sentry:require_scrub_defaults', False):
                scrub_defaults = True
            else:
                scrub_defaults = project.get_option('sentry:scrub_defaults',
                                                    True)

            inst = SensitiveDataFilter(
                fields=sensitive_fields,
                include_defaults=scrub_defaults,
            )
            inst.apply(data)

        if scrub_ip_address:
            # We filter data immediately before it ever gets into the queue
            helper.ensure_does_not_have_ip(data)

        # mutates data (strips a lot of context if not queued)
        helper.insert_data_to_database(data)

        cache.set(cache_key, '', 60 * 5)

        helper.log.debug('New event received (%s)', event_id)

        event_accepted.send_robust(
            ip=remote_addr,
            data=data,
            project=project,
            sender=type(self),
        )

        return event_id
Пример #32
0
    def process_update(self, subscription_update):
        dataset = self.subscription.snuba_query.dataset
        if dataset == "events" and not features.has(
                "organizations:incidents",
                self.subscription.project.organization):
            # They have downgraded since these subscriptions have been created. So we just ignore updates for now.
            metrics.incr(
                "incidents.alert_rules.ignore_update_missing_incidents")
            return
        elif dataset == "transactions" and not features.has(
                "organizations:performance-view",
                self.subscription.project.organization):
            # They have downgraded since these subscriptions have been created. So we just ignore updates for now.
            metrics.incr(
                "incidents.alert_rules.ignore_update_missing_incidents_performance"
            )
            return

        if not hasattr(self, "alert_rule"):
            # If the alert rule has been removed then just skip
            metrics.incr(
                "incidents.alert_rules.no_alert_rule_for_subscription")
            logger.error(
                "Received an update for a subscription, but no associated alert rule exists"
            )
            # TODO: Delete subscription here.
            return

        if subscription_update["timestamp"] <= self.last_update:
            metrics.incr(
                "incidents.alert_rules.skipping_already_processed_update")
            return

        self.last_update = subscription_update["timestamp"]

        if len(subscription_update["values"]["data"]) > 1:
            logger.warning(
                "Subscription returned more than 1 row of data",
                extra={
                    "subscription_id": self.subscription.id,
                    "dataset": self.subscription.snuba_query.dataset,
                    "snuba_subscription_id": self.subscription.subscription_id,
                    "result": subscription_update,
                },
            )
        aggregation_value = subscription_update["values"]["data"][0].values(
        )[0]
        alert_operator, resolve_operator = self.THRESHOLD_TYPE_OPERATORS[
            AlertRuleThresholdType(self.alert_rule.threshold_type)]
        for trigger in self.triggers:
            if alert_operator(
                    aggregation_value,
                    trigger.alert_threshold) and not self.check_trigger_status(
                        trigger, TriggerStatus.ACTIVE):
                metrics.incr("incidents.alert_rules.threshold",
                             tags={"type": "alert"})
                with transaction.atomic():
                    self.trigger_alert_threshold(trigger, aggregation_value)
            else:
                self.trigger_alert_counts[trigger.id] = 0

        if (resolve_operator(aggregation_value,
                             self.calculate_resolve_threshold())
                and self.active_incident):
            self.rule_resolve_counts += 1
            if self.rule_resolve_counts >= self.alert_rule.threshold_period:
                # TODO: Make sure we iterate over critical then warning in order.
                # Potentially also de-dupe actions that are identical. Maybe just
                # collect all actions, de-dupe and resolve all at once
                metrics.incr("incidents.alert_rules.threshold",
                             tags={"type": "resolve"})
                for trigger in self.triggers:
                    if self.check_trigger_status(trigger,
                                                 TriggerStatus.ACTIVE):
                        with transaction.atomic():
                            self.trigger_resolve_threshold(
                                trigger, aggregation_value)

                update_incident_status(
                    self.active_incident,
                    IncidentStatus.CLOSED,
                    status_method=IncidentStatusMethod.RULE_TRIGGERED,
                    date_closed=self.calculate_event_date_from_update_date(
                        self.last_update),
                )
                self.active_incident = None
                self.incident_triggers.clear()
                self.rule_resolve_counts = 0
        else:
            self.rule_resolve_counts = 0

        # We update the rule stats here after we commit the transaction. This guarantees
        # that we'll never miss an update, since we'll never roll back if the process
        # is killed here. The trade-off is that we might process an update twice. Mostly
        # this will have no effect, but if someone manages to close a triggered incident
        # before the next one then we might alert twice.
        self.update_alert_rule_stats()
Пример #33
0
 def close(self):
     StacktraceProcessor.close(self)
     if self.sourcemaps_touched:
         metrics.incr("sourcemaps.processed",
                      amount=len(self.sourcemaps_touched),
                      skip_internal=True)
Пример #34
0
    def forward_event(self, event, payload):
        if not (self.project_token and self.project_index
                and self.project_instance):
            metrics.incr(
                "integrations.splunk.forward-event.unconfigured",
                tags={
                    "project_id": event.project_id,
                    "organization_id": event.project.organization_id,
                    "event_type": event.get_event_type(),
                },
            )
            return False

        if self.host:
            payload["host"] = self.host

        client = self.get_client()

        try:
            # https://docs.splunk.com/Documentation/Splunk/7.2.3/Data/TroubleshootHTTPEventCollector
            client.request(payload)

            metrics.incr(
                "integrations.splunk.forward-event.success",
                tags={
                    "project_id": event.project_id,
                    "organization_id": event.project.organization_id,
                    "event_type": event.get_event_type(),
                },
            )
            return True
        except Exception as exc:
            metric = "integrations.splunk.forward-event.error"
            metrics.incr(
                metric,
                tags={
                    "project_id": event.project_id,
                    "organization_id": event.project.organization_id,
                    "event_type": event.get_event_type(),
                },
            )
            logger.info(
                metric,
                extra={
                    "instance": self.project_instance,
                    "project_id": event.project_id,
                    "organization_id": event.project.organization_id,
                    "error": str(exc),
                },
            )

            if isinstance(
                    exc,
                (
                    ApiHostError,
                    ApiTimeoutError,
                ),
            ):
                # The above errors are already handled by the API client.
                # Just log and return.
                return False

            if isinstance(exc, ApiError) and exc.code == 403:
                # 403s are not errors or actionable for us do not re-raise
                return False

            raise
Пример #35
0
    def _request(
        self,
        method,
        path,
        headers=None,
        data=None,
        params=None,
        auth=None,
        json=True,
        allow_text=None,
        allow_redirects=None,
        timeout=None,
    ):

        if allow_text is None:
            allow_text = self.allow_text

        if allow_redirects is None:
            allow_redirects = self.allow_redirects

        if allow_redirects is None:  # is still None
            allow_redirects = method.upper() == "GET"

        if timeout is None:
            timeout = 30

        full_url = self.build_url(path)
        session = build_session()

        metrics.incr(
            u"%s.http_request" % self.datadog_prefix,
            sample_rate=1.0,
            tags={self.integration_type: self.name},
        )

        try:
            with sentry_sdk.configure_scope() as scope:
                parent_span_id = scope.span.span_id
                trace_id = scope.span.trace_id
        except AttributeError:
            parent_span_id = None
            trace_id = None

        with sentry_sdk.start_transaction(
            op=u"{}.http".format(self.integration_type),
            name=u"{}.http_response.{}".format(self.integration_type, self.name),
            parent_span_id=parent_span_id,
            trace_id=trace_id,
            sampled=True,
        ) as span:
            try:
                resp = getattr(session, method.lower())(
                    url=full_url,
                    headers=headers,
                    json=data if json else None,
                    data=data if not json else None,
                    params=params,
                    auth=auth,
                    verify=self.verify_ssl,
                    allow_redirects=allow_redirects,
                    timeout=timeout,
                )
                resp.raise_for_status()
            except ConnectionError as e:
                self.track_response_data("connection_error", span, e)
                raise ApiHostError.from_exception(e)
            except Timeout as e:
                self.track_response_data("timeout", span, e)
                raise ApiTimeoutError.from_exception(e)
            except HTTPError as e:
                resp = e.response
                if resp is None:
                    self.track_response_data("unknown", span, e)
                    self.logger.exception(
                        "request.error", extra={self.integration_type: self.name, "url": full_url}
                    )
                    raise ApiError("Internal Error", url=full_url)
                self.track_response_data(resp.status_code, span, e)
                raise ApiError.from_response(resp, url=full_url)

            self.track_response_data(resp.status_code, span, None, resp)

            if resp.status_code == 204:
                return {}

            return BaseApiResponse.from_response(resp, allow_text=allow_text)
Пример #36
0
 def build_handler(self, incident, project):
     type = AlertRuleTriggerAction.Type(self.type)
     if type in self._type_registrations:
         return self._type_registrations[type].handler(self, incident, project)
     else:
         metrics.incr("alert_rule_trigger.unhandled_type.{}".format(self.type))
Пример #37
0
    def dispatch(self, request, project_id=None, *args, **kwargs):
        helper = self.helper_cls(
            agent=request.META.get('HTTP_USER_AGENT'),
            project_id=project_id,
            ip_address=request.META['REMOTE_ADDR'],
        )
        origin = None

        if kafka_publisher is not None:
            self._publish_to_kafka(request)

        try:
            origin = helper.origin_from_request(request)

            response = self._dispatch(request,
                                      helper,
                                      project_id=project_id,
                                      origin=origin,
                                      *args,
                                      **kwargs)
        except APIError as e:
            context = {
                'error': force_bytes(e.msg, errors='replace'),
            }
            if e.name:
                context['error_name'] = e.name

            response = HttpResponse(json.dumps(context),
                                    content_type='application/json',
                                    status=e.http_status)
            # Set X-Sentry-Error as in many cases it is easier to inspect the headers
            response['X-Sentry-Error'] = context['error']

            if isinstance(e, APIRateLimited) and e.retry_after is not None:
                response['Retry-After'] = six.text_type(
                    int(math.ceil(e.retry_after)))

        except Exception as e:
            # TODO(dcramer): test failures are not outputting the log message
            # here
            if settings.DEBUG:
                content = traceback.format_exc()
            else:
                content = ''
            logger.exception(e)
            response = HttpResponse(content,
                                    content_type='text/plain',
                                    status=500)

        # TODO(dcramer): it'd be nice if we had an incr_multi method so
        # tsdb could optimize this
        metrics.incr('client-api.all-versions.requests')
        metrics.incr('client-api.all-versions.responses.%s' %
                     (response.status_code, ))
        metrics.incr('client-api.all-versions.responses.%sxx' %
                     (six.text_type(response.status_code)[0], ))

        if helper.context.version:
            metrics.incr('client-api.v%s.requests' %
                         (helper.context.version, ))
            metrics.incr('client-api.v%s.responses.%s' %
                         (helper.context.version, response.status_code))
            metrics.incr('client-api.v%s.responses.%sxx' %
                         (helper.context.version,
                          six.text_type(response.status_code)[0]))

        if response.status_code != 200 and origin:
            # We allow all origins on errors
            response['Access-Control-Allow-Origin'] = '*'

        if origin:
            response['Access-Control-Allow-Headers'] = \
                'X-Sentry-Auth, X-Requested-With, Origin, Accept, ' \
                'Content-Type, Authentication'
            response['Access-Control-Allow-Methods'] = \
                ', '.join(self._allowed_methods())
            response['Access-Control-Expose-Headers'] = \
                'X-Sentry-Error, Retry-After'

        return response
Пример #38
0
    def process(self,
                request,
                project,
                key,
                auth,
                helper,
                data,
                attachments=None,
                **kwargs):
        metrics.incr('events.total')

        if not data:
            raise APIError('No JSON data was found')

        remote_addr = request.META['REMOTE_ADDR']

        event_mgr = EventManager(
            data,
            project=project,
            key=key,
            auth=auth,
            client_ip=remote_addr,
            user_agent=helper.context.agent,
            content_encoding=request.META.get('HTTP_CONTENT_ENCODING', ''),
        )
        del data

        self.pre_normalize(event_mgr, helper)
        event_mgr.normalize()

        event_received.send_robust(ip=remote_addr,
                                   project=project,
                                   sender=type(self))

        start_time = time()
        tsdb_start_time = to_datetime(start_time)
        should_filter, filter_reason = event_mgr.should_filter()
        if should_filter:
            increment_list = [
                (tsdb.models.project_total_received, project.id),
                (tsdb.models.project_total_blacklisted, project.id),
                (tsdb.models.organization_total_received,
                 project.organization_id),
                (tsdb.models.organization_total_blacklisted,
                 project.organization_id),
                (tsdb.models.key_total_received, key.id),
                (tsdb.models.key_total_blacklisted, key.id),
            ]
            try:
                increment_list.append(
                    (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id))
            # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES
            except KeyError:
                pass

            tsdb.incr_multi(
                increment_list,
                timestamp=tsdb_start_time,
            )

            metrics.incr('events.blacklisted', tags={'reason': filter_reason})
            event_filtered.send_robust(
                ip=remote_addr,
                project=project,
                sender=type(self),
            )
            raise APIForbidden('Event dropped due to filter: %s' %
                               (filter_reason, ))

        # TODO: improve this API (e.g. make RateLimit act on __ne__)
        rate_limit = safe_execute(quotas.is_rate_limited,
                                  project=project,
                                  key=key,
                                  _with_transaction=False)
        if isinstance(rate_limit, bool):
            rate_limit = RateLimit(is_limited=rate_limit, retry_after=None)

        # XXX(dcramer): when the rate limiter fails we drop events to ensure
        # it cannot cascade
        if rate_limit is None or rate_limit.is_limited:
            if rate_limit is None:
                api_logger.debug(
                    'Dropped event due to error with rate limiter')
            tsdb.incr_multi(
                [
                    (tsdb.models.project_total_received, project.id),
                    (tsdb.models.project_total_rejected, project.id),
                    (tsdb.models.organization_total_received,
                     project.organization_id),
                    (tsdb.models.organization_total_rejected,
                     project.organization_id),
                    (tsdb.models.key_total_received, key.id),
                    (tsdb.models.key_total_rejected, key.id),
                ],
                timestamp=tsdb_start_time,
            )
            metrics.incr(
                'events.dropped',
                tags={
                    'reason':
                    rate_limit.reason_code if rate_limit else 'unknown',
                })
            event_dropped.send_robust(
                ip=remote_addr,
                project=project,
                sender=type(self),
                reason_code=rate_limit.reason_code if rate_limit else None,
            )
            if rate_limit is not None:
                raise APIRateLimited(rate_limit.retry_after)
        else:
            tsdb.incr_multi(
                [
                    (tsdb.models.project_total_received, project.id),
                    (tsdb.models.organization_total_received,
                     project.organization_id),
                    (tsdb.models.key_total_received, key.id),
                ],
                timestamp=tsdb_start_time,
            )

        org_options = OrganizationOption.objects.get_all_values(
            project.organization_id)

        data = event_mgr.get_data()
        del event_mgr

        event_id = data['event_id']

        # TODO(dcramer): ideally we'd only validate this if the event_id was
        # supplied by the user
        cache_key = 'ev:%s:%s' % (
            project.id,
            event_id,
        )

        if cache.get(cache_key) is not None:
            raise APIForbidden(
                'An event with the same ID already exists (%s)' % (event_id, ))

        scrub_ip_address = (
            org_options.get('sentry:require_scrub_ip_address', False)
            or project.get_option('sentry:scrub_ip_address', False))
        scrub_data = (org_options.get('sentry:require_scrub_data', False)
                      or project.get_option('sentry:scrub_data', True))

        if scrub_data:
            # We filter data immediately before it ever gets into the queue
            sensitive_fields_key = 'sentry:sensitive_fields'
            sensitive_fields = (org_options.get(sensitive_fields_key, []) +
                                project.get_option(sensitive_fields_key, []))

            exclude_fields_key = 'sentry:safe_fields'
            exclude_fields = (org_options.get(exclude_fields_key, []) +
                              project.get_option(exclude_fields_key, []))

            scrub_defaults = (
                org_options.get('sentry:require_scrub_defaults', False)
                or project.get_option('sentry:scrub_defaults', True))

            SensitiveDataFilter(
                fields=sensitive_fields,
                include_defaults=scrub_defaults,
                exclude_fields=exclude_fields,
            ).apply(data)

        if scrub_ip_address:
            # We filter data immediately before it ever gets into the queue
            helper.ensure_does_not_have_ip(data)

        # mutates data (strips a lot of context if not queued)
        helper.insert_data_to_database(data,
                                       start_time=start_time,
                                       attachments=attachments)

        cache.set(cache_key, '', 60 * 5)

        api_logger.debug('New event received (%s)', event_id)

        event_accepted.send_robust(
            ip=remote_addr,
            data=data,
            project=project,
            sender=type(self),
        )

        return event_id
Пример #39
0
def process_event(event_manager, project, key, remote_addr, helper,
                  attachments, project_config):
    event_received.send_robust(ip=remote_addr,
                               project=project,
                               sender=process_event)

    start_time = time()

    data = event_manager.get_data()
    should_filter, filter_reason = event_manager.should_filter()
    del event_manager

    event_id = data["event_id"]
    data_category = DataCategory.from_event_type(data.get("type"))

    if should_filter:
        track_outcome(
            project_config.organization_id,
            project_config.project_id,
            key.id,
            Outcome.FILTERED,
            filter_reason,
            event_id=event_id,
            category=data_category,
        )
        metrics.incr("events.blacklisted",
                     tags={"reason": filter_reason},
                     skip_internal=False)

        # relay will no longer be able to provide information about filter
        # status so to see the impact we're adding a way to turn on relay
        # like behavior here.
        if options.get("store.lie-about-filter-status"):
            return event_id

        raise APIForbidden("Event dropped due to filter: %s" %
                           (filter_reason, ))

    # TODO: improve this API (e.g. make RateLimit act on __ne__)
    rate_limit = safe_execute(quotas.is_rate_limited,
                              project=project,
                              key=key,
                              _with_transaction=False)
    if isinstance(rate_limit, bool):
        rate_limit = RateLimit(is_limited=rate_limit, retry_after=None)

    # XXX(dcramer): when the rate limiter fails we drop events to ensure
    # it cannot cascade
    if rate_limit is None or rate_limit.is_limited:
        if rate_limit is None:
            api_logger.debug("Dropped event due to error with rate limiter")

        reason = rate_limit.reason_code if rate_limit else None
        track_outcome(
            project_config.organization_id,
            project_config.project_id,
            key.id,
            Outcome.RATE_LIMITED,
            reason,
            event_id=event_id,
            category=data_category,
        )
        metrics.incr("events.dropped",
                     tags={"reason": reason or "unknown"},
                     skip_internal=False)

        if rate_limit is not None:
            raise APIRateLimited(rate_limit.retry_after)

    # TODO(dcramer): ideally we'd only validate this if the event_id was
    # supplied by the user
    cache_key = "ev:%s:%s" % (project_config.project_id, event_id)

    # XXX(markus): I believe this code is extremely broken:
    #
    # * it practically uses memcached in prod which has no consistency
    #   guarantees (no idea how we don't run into issues there)
    #
    # * a TTL of 1h basically doesn't guarantee any deduplication at all. It
    #   just guarantees a good error message... for one hour.
    if cache.get(cache_key) is not None:
        track_outcome(
            project_config.organization_id,
            project_config.project_id,
            key.id,
            Outcome.INVALID,
            "duplicate",
            event_id=event_id,
            category=data_category,
        )
        raise APIForbidden("An event with the same ID already exists (%s)" %
                           (event_id, ))

    data = scrub_data(project, dict(data))

    # mutates data (strips a lot of context if not queued)
    helper.insert_data_to_database(data,
                                   start_time=start_time,
                                   attachments=attachments)

    cache.set(cache_key, "", 60 * 60)  # Cache for 1 hour

    api_logger.debug("New event received (%s)", event_id)

    event_accepted.send_robust(ip=remote_addr,
                               data=data,
                               project=project,
                               sender=process_event)

    return event_id
Пример #40
0
def assemble_download(
    data_export_id,
    export_limit=EXPORTED_ROWS_LIMIT,
    batch_size=SNUBA_MAX_RESULTS,
    offset=0,
    bytes_written=0,
    environment_id=None,
    **kwargs,
):
    with sentry_sdk.start_transaction(
            op="task.data_export.assemble",
            name="DataExportAssemble",
            sampled=True,
    ):
        first_page = offset == 0

        try:
            if first_page:
                logger.info("dataexport.start",
                            extra={"data_export_id": data_export_id})
            data_export = ExportedData.objects.get(id=data_export_id)
            if first_page:
                metrics.incr("dataexport.start",
                             tags={"success": True},
                             sample_rate=1.0)
            logger.info("dataexport.run",
                        extra={
                            "data_export_id": data_export_id,
                            "offset": offset
                        })
        except ExportedData.DoesNotExist as error:
            if first_page:
                metrics.incr("dataexport.start",
                             tags={"success": False},
                             sample_rate=1.0)
            logger.exception(error)
            return

        with sentry_sdk.configure_scope() as scope:
            if data_export.user:
                user = {}
                if data_export.user.id:
                    user["id"] = data_export.user.id
                if data_export.user.username:
                    user["username"] = data_export.user.username
                if data_export.user.email:
                    user["email"] = data_export.user.email
                scope.user = user
            scope.set_tag("organization.slug", data_export.organization.slug)
            scope.set_tag("export.type",
                          ExportQueryType.as_str(data_export.query_type))
            scope.set_extra("export.query", data_export.query_info)

        try:
            # ensure that the export limit is set and capped at EXPORTED_ROWS_LIMIT
            if export_limit is None:
                export_limit = EXPORTED_ROWS_LIMIT
            else:
                export_limit = min(export_limit, EXPORTED_ROWS_LIMIT)

            processor = get_processor(data_export, environment_id)

            with tempfile.TemporaryFile(mode="w+b") as tf:
                # XXX(python3):
                #
                # In python3 we write unicode strings (which is all the csv
                # module is able to do, it will NOT write bytes like in py2).
                # Because of this we use the codec getwriter to transform our
                # file handle to a stream writer that will encode to utf8.
                tfw = codecs.getwriter("utf-8")(tf)

                writer = csv.DictWriter(tfw,
                                        processor.header_fields,
                                        extrasaction="ignore")
                if first_page:
                    writer.writeheader()

                # the position in the file at the end of the headers
                starting_pos = tf.tell()

                # the row offset relative to the start of the current task
                # this offset tells you the number of rows written during this batch fragment
                fragment_offset = 0

                # the absolute row offset from the beginning of the export
                next_offset = offset + fragment_offset

                while True:
                    # the number of rows to export in the next batch fragment
                    fragment_row_count = min(
                        batch_size, max(export_limit - next_offset, 1))

                    rows = process_rows(processor, data_export,
                                        fragment_row_count, next_offset)
                    writer.writerows(rows)

                    fragment_offset += len(rows)
                    next_offset = offset + fragment_offset

                    if (not rows or len(rows) < batch_size
                            # the batch may exceed MAX_BATCH_SIZE but immediately stops
                            or tf.tell() - starting_pos >= MAX_BATCH_SIZE):
                        break

                tf.seek(0)
                new_bytes_written = store_export_chunk_as_blob(
                    data_export, bytes_written, tf)
                bytes_written += new_bytes_written
        except ExportError as error:
            return data_export.email_failure(message=six.text_type(error))
        except Exception as error:
            metrics.incr("dataexport.error",
                         tags={"error": six.text_type(error)},
                         sample_rate=1.0)
            logger.error(
                "dataexport.error: %s",
                six.text_type(error),
                extra={
                    "query": data_export.payload,
                    "org": data_export.organization_id
                },
            )
            capture_exception(error)

            try:
                current.retry()
            except MaxRetriesExceededError:
                metrics.incr(
                    "dataexport.end",
                    tags={
                        "success": False,
                        "error": six.text_type(error)
                    },
                    sample_rate=1.0,
                )
                return data_export.email_failure(
                    message="Internal processing failure")
        else:
            if (rows and len(rows) >= batch_size and new_bytes_written
                    and next_offset < export_limit):
                assemble_download.delay(
                    data_export_id,
                    export_limit=export_limit,
                    batch_size=batch_size,
                    offset=next_offset,
                    bytes_written=bytes_written,
                    environment_id=environment_id,
                )
            else:
                metrics.timing("dataexport.row_count",
                               next_offset,
                               sample_rate=1.0)
                metrics.timing("dataexport.file_size",
                               bytes_written,
                               sample_rate=1.0)
                merge_export_blobs.delay(data_export_id)
Пример #41
0
def merge_export_blobs(data_export_id, **kwargs):
    with sentry_sdk.start_transaction(
            op="task.data_export.merge",
            name="DataExportMerge",
            sampled=True,
    ):
        try:
            data_export = ExportedData.objects.get(id=data_export_id)
        except ExportedData.DoesNotExist as error:
            logger.exception(error)
            return

        with sentry_sdk.configure_scope() as scope:
            if data_export.user:
                user = {}
                if data_export.user.id:
                    user["id"] = data_export.user.id
                if data_export.user.username:
                    user["username"] = data_export.user.username
                if data_export.user.email:
                    user["email"] = data_export.user.email
                scope.user = user
            scope.user = user
            scope.set_tag("organization.slug", data_export.organization.slug)
            scope.set_tag("export.type",
                          ExportQueryType.as_str(data_export.query_type))
            scope.set_extra("export.query", data_export.query_info)

        # adapted from `putfile` in  `src/sentry/models/file.py`
        try:
            with transaction.atomic():
                file = File.objects.create(
                    name=data_export.file_name,
                    type="export.csv",
                    headers={"Content-Type": "text/csv"},
                )
                size = 0
                file_checksum = sha1(b"")

                for export_blob in ExportedDataBlob.objects.filter(
                        data_export=data_export).order_by("offset"):
                    blob = export_blob.blob
                    FileBlobIndex.objects.create(file=file,
                                                 blob=blob,
                                                 offset=size)
                    size += blob.size
                    blob_checksum = sha1(b"")

                    for chunk in blob.getfile().chunks():
                        blob_checksum.update(chunk)
                        file_checksum.update(chunk)

                    if blob.checksum != blob_checksum.hexdigest():
                        raise AssembleChecksumMismatch("Checksum mismatch")

                file.size = size
                file.checksum = file_checksum.hexdigest()
                file.save()
                data_export.finalize_upload(file=file)

                time_elapsed = (timezone.now() -
                                data_export.date_added).total_seconds()
                metrics.timing("dataexport.duration",
                               time_elapsed,
                               sample_rate=1.0)
                logger.info("dataexport.end",
                            extra={"data_export_id": data_export_id})
                metrics.incr("dataexport.end",
                             tags={"success": True},
                             sample_rate=1.0)
        except Exception as error:
            metrics.incr("dataexport.error",
                         tags={"error": six.text_type(error)},
                         sample_rate=1.0)
            metrics.incr(
                "dataexport.end",
                tags={
                    "success": False,
                    "error": six.text_type(error)
                },
                sample_rate=1.0,
            )
            logger.error(
                "dataexport.error: %s",
                six.text_type(error),
                extra={
                    "query": data_export.payload,
                    "org": data_export.organization_id
                },
            )
            capture_exception(error)
            if isinstance(error, IntegrityError):
                message = "Failed to save the assembled file."
            else:
                message = "Internal processing failure."
            return data_export.email_failure(message=message)
Пример #42
0
    def handle_basic_auth(self, request, **kwargs):
        can_register = self.can_register(request)

        op = request.POST.get("op")
        organization = kwargs.pop("organization", None)

        if not op:
            # Detect that we are on the register page by url /register/ and
            # then activate the register tab by default.
            if "/register" in request.path_info and can_register:
                op = "register"
            elif request.GET.get("op") == "sso":
                op = "sso"

        login_form = self.get_login_form(request)
        if can_register:
            register_form = self.get_register_form(
                request, initial={"username": request.session.get("invite_email", "")}
            )
        else:
            register_form = None

        if can_register and register_form.is_valid():
            user = register_form.save()
            user.send_confirm_emails(is_new_user=True)

            # HACK: grab whatever the first backend is and assume it works
            user.backend = settings.AUTHENTICATION_BACKENDS[0]

            auth.login(request, user, organization_id=organization.id if organization else None)

            # can_register should only allow a single registration
            request.session.pop("can_register", None)
            request.session.pop("invite_email", None)

            # Attempt to directly accept any pending invites
            invite_helper = ApiInviteHelper.from_cookie(request=request, instance=self)

            if invite_helper and invite_helper.valid_request:
                invite_helper.accept_invite()
                response = self.redirect_to_org(request)
                remove_invite_cookie(request, response)

                return response

            return self.redirect(auth.get_login_redirect(request))

        elif request.method == "POST":
            from sentry.app import ratelimiter
            from sentry.utils.hashlib import md5_text

            login_attempt = (
                op == "login" and request.POST.get("username") and request.POST.get("password")
            )

            if login_attempt and ratelimiter.is_limited(
                u"auth:login:username:{}".format(
                    md5_text(request.POST["username"].lower()).hexdigest()
                ),
                limit=10,
                window=60,  # 10 per minute should be enough for anyone
            ):
                login_form.errors["__all__"] = [
                    u"You have made too many login attempts. Please try again later."
                ]
                metrics.incr(
                    "login.attempt", instance="rate_limited", skip_internal=True, sample_rate=1.0
                )
            elif login_form.is_valid():
                user = login_form.get_user()

                auth.login(request, user, organization_id=organization.id if organization else None)
                metrics.incr(
                    "login.attempt", instance="success", skip_internal=True, sample_rate=1.0
                )

                if not user.is_active:
                    return self.redirect(reverse("sentry-reactivate-account"))

                return self.redirect(auth.get_login_redirect(request))
            else:
                metrics.incr(
                    "login.attempt", instance="failure", skip_internal=True, sample_rate=1.0
                )

        context = {
            "op": op or "login",
            "server_hostname": get_server_hostname(),
            "login_form": login_form,
            "organization": organization,
            "register_form": register_form,
            "CAN_REGISTER": can_register,
            "join_request_link": self.get_join_request_link(organization),
        }
        context.update(additional_context.run_callbacks(request))

        return self.respond_login(request, context, **kwargs)
    def put(self, request: Request, organization, member_id) -> Response:
        try:
            om = self._get_member(request, organization, member_id)
        except OrganizationMember.DoesNotExist:
            raise ResourceDoesNotExist

        serializer = OrganizationMemberSerializer(data=request.data, partial=True)

        if not serializer.is_valid():
            return Response(status=400)

        try:
            auth_provider = AuthProvider.objects.get(organization=organization)
            auth_provider = auth_provider.get_provider()
        except AuthProvider.DoesNotExist:
            auth_provider = None

        allowed_roles = None
        result = serializer.validated_data

        # XXX(dcramer): if/when this expands beyond reinvite we need to check
        # access level
        if result.get("reinvite"):
            if om.is_pending:
                if ratelimits.for_organization_member_invite(
                    organization=organization,
                    email=om.email,
                    user=request.user,
                    auth=request.auth,
                ):
                    metrics.incr(
                        "member-invite.attempt",
                        instance="rate_limited",
                        skip_internal=True,
                        sample_rate=1.0,
                    )
                    return Response({"detail": ERR_RATE_LIMITED}, status=429)

                if result.get("regenerate"):
                    if request.access.has_scope("member:admin"):
                        om.regenerate_token()
                        om.save()
                    else:
                        return Response({"detail": ERR_INSUFFICIENT_SCOPE}, status=400)
                if om.token_expired:
                    return Response({"detail": ERR_EXPIRED}, status=400)
                om.send_invite_email()
            elif auth_provider and not getattr(om.flags, "sso:linked"):
                om.send_sso_link_email(request.user, auth_provider)
            else:
                # TODO(dcramer): proper error message
                return Response({"detail": ERR_UNINVITABLE}, status=400)

        if "teams" in result:
            # dupe code from member_index
            # ensure listed teams are real teams
            teams = list(
                Team.objects.filter(
                    organization=organization, status=TeamStatus.VISIBLE, slug__in=result["teams"]
                )
            )

            if len(set(result["teams"])) != len(teams):
                return Response({"teams": "Invalid team"}, status=400)

            with transaction.atomic():
                # teams may be empty
                OrganizationMemberTeam.objects.filter(organizationmember=om).delete()
                OrganizationMemberTeam.objects.bulk_create(
                    [OrganizationMemberTeam(team=team, organizationmember=om) for team in teams]
                )

        if result.get("role"):
            _, allowed_roles = get_allowed_roles(request, organization)
            allowed_role_ids = {r.id for r in allowed_roles}

            # A user cannot promote others above themselves
            if result["role"] not in allowed_role_ids:
                return Response(
                    {"role": "You do not have permission to assign the given role."}, status=403
                )

            # A user cannot demote a superior
            if om.role not in allowed_role_ids:
                return Response(
                    {"role": "You do not have permission to assign a role to the given user."},
                    status=403,
                )

            if om.user == request.user and (result["role"] != om.role):
                return Response({"detail": "You cannot make changes to your own role."}, status=400)

            om.update(role=result["role"])

        self.create_audit_entry(
            request=request,
            organization=organization,
            target_object=om.id,
            target_user=om.user,
            event=AuditLogEntryEvent.MEMBER_EDIT,
            data=om.get_audit_log_data(),
        )

        context = self._serialize_member(om, request, allowed_roles)

        return Response(context)
Пример #44
0
    def _save_aggregate(self, event, hashes, release, **kwargs):
        project = event.project

        # attempt to find a matching hash
        all_hashes = self._find_hashes(project, hashes)

        existing_group_id = None
        for h in all_hashes:
            if h.group_id is not None:
                existing_group_id = h.group_id
                break
            if h.group_tombstone_id is not None:
                raise HashDiscarded('Matches group tombstone %s' % h.group_tombstone_id)

        # XXX(dcramer): this has the opportunity to create duplicate groups
        # it should be resolved by the hash merging function later but this
        # should be better tested/reviewed
        if existing_group_id is None:
            # it's possible the release was deleted between
            # when we queried for the release and now, so
            # make sure it still exists
            first_release = kwargs.pop('first_release', None)

            with transaction.atomic():
                short_id = project.next_short_id()
                group, group_is_new = Group.objects.create(
                    project=project,
                    short_id=short_id,
                    first_release_id=Release.objects.filter(
                        id=first_release.id,
                    ).values_list('id', flat=True).first() if first_release else None,
                    **kwargs
                ), True

            metrics.incr(
                'group.created',
                skip_internal=True,
                tags={'platform': event.platform or 'unknown'}
            )

        else:
            group = Group.objects.get(id=existing_group_id)

            group_is_new = False

        # If all hashes are brand new we treat this event as new
        is_new = False
        new_hashes = [h for h in all_hashes if h.group_id is None]
        if new_hashes:
            # XXX: There is a race condition here wherein another process could
            # create a new group that is associated with one of the new hashes,
            # add some event(s) to it, and then subsequently have the hash
            # "stolen" by this process. This then "orphans" those events from
            # their "siblings" in the group we've created here. We don't have a
            # way to fix this, since we can't update the group on those hashes
            # without filtering on `group_id` (which we can't do due to query
            # planner weirdness.) For more context, see 84c6f75a and d0e22787,
            # as well as GH-5085.
            GroupHash.objects.filter(
                id__in=[h.id for h in new_hashes],
            ).exclude(
                state=GroupHash.State.LOCKED_IN_MIGRATION,
            ).update(group=group)

            if group_is_new and len(new_hashes) == len(all_hashes):
                is_new = True

        # XXX(dcramer): it's important this gets called **before** the aggregate
        # is processed as otherwise values like last_seen will get mutated
        can_sample = (
            features.has('projects:sample-events', project=project) and should_sample(
                event.data.get('received') or float(event.datetime.strftime('%s')),
                group.data.get('last_received') or float(group.last_seen.strftime('%s')),
                group.times_seen,
            )
        )

        if not is_new:
            is_regression = self._process_existing_aggregate(
                group=group,
                event=event,
                data=kwargs,
                release=release,
            )
        else:
            is_regression = False

        # Determine if we've sampled enough data to store this event
        if is_new or is_regression:
            is_sample = False
        else:
            is_sample = can_sample

        if not is_sample:
            GroupHash.record_last_processed_event_id(
                all_hashes[0].id,
                event.event_id,
            )

        return group, is_new, is_regression, is_sample
Пример #45
0
    def query(
        self,
        projects,
        retention_window_start,
        group_queryset,
        environments,
        sort_by,
        limit,
        cursor,
        count_hits,
        paginator_options,
        search_filters,
        date_from,
        date_to,
        max_hits=None,
    ):

        now = timezone.now()
        end = None
        end_params = [
            _f for _f in
            [date_to, get_search_filter(search_filters, "date", "<")] if _f
        ]
        if end_params:
            end = min(end_params)

        if not end:
            end = now + ALLOWED_FUTURE_DELTA

            metrics.incr("snuba.search.postgres_only")

            # This search is for some time window that ends with "now",
            # so if the requested sort is `date` (`last_seen`) and there
            # are no other Snuba-based search predicates, we can simply
            # return the results from Postgres.
            if (cursor is None and sort_by == "date" and
                    # This handles tags and date parameters for search filters.
                    not [
                        sf for sf in search_filters if sf.key.name not in
                        self.postgres_only_fields.union(["date"])
                    ]):
                group_queryset = group_queryset.order_by("-last_seen")
                paginator = DateTimePaginator(group_queryset, "-last_seen",
                                              **paginator_options)
                # When its a simple django-only search, we count_hits like normal
                return paginator.get_result(limit,
                                            cursor,
                                            count_hits=count_hits,
                                            max_hits=max_hits)

        # TODO: Presumably we only want to search back to the project's max
        # retention date, which may be closer than 90 days in the past, but
        # apparently `retention_window_start` can be None(?), so we need a
        # fallback.
        retention_date = max([
            _f for _f in [retention_window_start, now - timedelta(days=90)]
            if _f
        ])
        start_params = [
            date_from, retention_date,
            get_search_filter(search_filters, "date", ">")
        ]
        start = max([_f for _f in start_params if _f])
        end = max([retention_date, end])

        if start == retention_date and end == retention_date:
            # Both `start` and `end` must have been trimmed to `retention_date`,
            # so this entire search was against a time range that is outside of
            # retention. We'll return empty results to maintain backwards compatibility
            # with Django search (for now).
            return self.empty_result

        if start >= end:
            # TODO: This maintains backwards compatibility with Django search, but
            # in the future we should find a way to notify the user that their search
            # is invalid.
            return self.empty_result

        # This search is specific to Inbox. If we're using inbox sort and only querying
        # postgres then we can use this sort method. Otherwise if we need to go to Snuba,
        # fail.
        if (sort_by == "inbox"
                and get_search_filter(search_filters, "for_review", "=")
                # This handles tags and date parameters for search filters.
                and not [
                    sf for sf in search_filters if sf.key.name not in
                    self.postgres_only_fields.union(["date"])
                ]):
            # We just filter on `GroupInbox.date_added` here, and don't filter by date
            # on the group. This keeps the query simpler and faster in some edge cases,
            # and date_added is a good enough proxy when we're using this sort.
            group_queryset = group_queryset.filter(
                groupinbox__date_added__gte=start,
                groupinbox__date_added__lte=end,
            )
            group_queryset = group_queryset.extra(select={
                "inbox_date":
                "sentry_groupinbox.date_added"
            }, ).order_by("-inbox_date")
            paginator = DateTimePaginator(group_queryset, "-inbox_date",
                                          **paginator_options)
            return paginator.get_result(limit,
                                        cursor,
                                        count_hits=count_hits,
                                        max_hits=max_hits)

        if sort_by == "inbox":
            raise InvalidSearchQuery(
                f"Sort key '{sort_by}' only supported for inbox search")

        # Here we check if all the django filters reduce the set of groups down
        # to something that we can send down to Snuba in a `group_id IN (...)`
        # clause.
        max_candidates = options.get("snuba.search.max-pre-snuba-candidates")

        with sentry_sdk.start_span(op="snuba_group_query") as span:
            group_ids = list(
                group_queryset.values_list("id",
                                           flat=True)[:max_candidates + 1])
            span.set_data("Max Candidates", max_candidates)
            span.set_data("Result Size", len(group_ids))
        metrics.timing("snuba.search.num_candidates", len(group_ids))

        too_many_candidates = False
        if not group_ids:
            # no matches could possibly be found from this point on
            metrics.incr("snuba.search.no_candidates", skip_internal=False)
            return self.empty_result
        elif len(group_ids) > max_candidates:
            # If the pre-filter query didn't include anything to significantly
            # filter down the number of results (from 'first_release', 'query',
            # 'status', 'bookmarked_by', 'assigned_to', 'unassigned',
            # 'subscribed_by', 'active_at_from', or 'active_at_to') then it
            # might have surpassed the `max_candidates`. In this case,
            # we *don't* want to pass candidates down to Snuba, and instead we
            # want Snuba to do all the filtering/sorting it can and *then* apply
            # this queryset to the results from Snuba, which we call
            # post-filtering.
            metrics.incr("snuba.search.too_many_candidates",
                         skip_internal=False)
            too_many_candidates = True
            group_ids = []

        sort_field = self.sort_strategies[sort_by]
        chunk_growth = options.get("snuba.search.chunk-growth-rate")
        max_chunk_size = options.get("snuba.search.max-chunk-size")
        chunk_limit = limit
        offset = 0
        num_chunks = 0
        hits = self.calculate_hits(
            group_ids,
            too_many_candidates,
            sort_field,
            projects,
            retention_window_start,
            group_queryset,
            environments,
            sort_by,
            limit,
            cursor,
            count_hits,
            paginator_options,
            search_filters,
            start,
            end,
        )
        if count_hits and hits == 0:
            return self.empty_result

        paginator_results = self.empty_result
        result_groups = []
        result_group_ids = set()

        max_time = options.get("snuba.search.max-total-chunk-time-seconds")
        time_start = time.time()

        # Do smaller searches in chunks until we have enough results
        # to answer the query (or hit the end of possible results). We do
        # this because a common case for search is to return 100 groups
        # sorted by `last_seen`, and we want to avoid returning all of
        # a project's groups and then post-sorting them all in Postgres
        # when typically the first N results will do.
        while (time.time() - time_start) < max_time:
            num_chunks += 1

            # grow the chunk size on each iteration to account for huge projects
            # and weird queries, up to a max size
            chunk_limit = min(int(chunk_limit * chunk_growth), max_chunk_size)
            # but if we have group_ids always query for at least that many items
            chunk_limit = max(chunk_limit, len(group_ids))

            # {group_id: group_score, ...}
            snuba_groups, total = self.snuba_search(
                start=start,
                end=end,
                project_ids=[p.id for p in projects],
                environment_ids=environments
                and [environment.id for environment in environments],
                sort_field=sort_field,
                cursor=cursor,
                group_ids=group_ids,
                limit=chunk_limit,
                offset=offset,
                search_filters=search_filters,
            )
            metrics.timing("snuba.search.num_snuba_results", len(snuba_groups))
            count = len(snuba_groups)
            more_results = count >= limit and (offset + limit) < total
            offset += len(snuba_groups)

            if not snuba_groups:
                break

            if group_ids:
                # pre-filtered candidates were passed down to Snuba, so we're
                # finished with filtering and these are the only results. Note
                # that because we set the chunk size to at least the size of
                # the group_ids, we know we got all of them (ie there are
                # no more chunks after the first)
                result_groups = snuba_groups
                if count_hits and hits is None:
                    hits = len(snuba_groups)
            else:
                # pre-filtered candidates were *not* passed down to Snuba,
                # so we need to do post-filtering to verify Sentry DB predicates
                filtered_group_ids = group_queryset.filter(
                    id__in=[gid
                            for gid, _ in snuba_groups]).values_list("id",
                                                                     flat=True)

                group_to_score = dict(snuba_groups)
                for group_id in filtered_group_ids:
                    if group_id in result_group_ids:
                        # because we're doing multiple Snuba queries, which
                        # happen outside of a transaction, there is a small possibility
                        # of groups moving around in the sort scoring underneath us,
                        # so we at least want to protect against duplicates
                        continue

                    group_score = group_to_score[group_id]
                    result_group_ids.add(group_id)
                    result_groups.append((group_id, group_score))

            # break the query loop for one of three reasons:
            # * we started with Postgres candidates and so only do one Snuba query max
            # * the paginator is returning enough results to satisfy the query (>= the limit)
            # * there are no more groups in Snuba to post-filter
            # TODO do we actually have to rebuild this SequencePaginator every time
            # or can we just make it after we've broken out of the loop?
            paginator_results = SequencePaginator(
                [(score, id) for (id, score) in result_groups],
                reverse=True,
                **paginator_options).get_result(limit,
                                                cursor,
                                                known_hits=hits,
                                                max_hits=max_hits)

            if group_ids or len(
                    paginator_results.results) >= limit or not more_results:
                break

        # HACK: We're using the SequencePaginator to mask the complexities of going
        # back and forth between two databases. This causes a problem with pagination
        # because we're 'lying' to the SequencePaginator (it thinks it has the entire
        # result set in memory when it does not). For this reason we need to make some
        # best guesses as to whether the `prev` and `next` cursors have more results.

        if len(paginator_results.results) == limit and more_results:
            # Because we are going back and forth between DBs there is a small
            # chance that we will hand the SequencePaginator exactly `limit`
            # items. In this case the paginator will assume there are no more
            # results, so we need to override the `next` cursor's results.
            paginator_results.next.has_results = True

        if cursor is not None and (not cursor.is_prev
                                   or len(paginator_results.results) > 0):
            # If the user passed a cursor, and it isn't already a 0 result `is_prev`
            # cursor, then it's worth allowing them to go back a page to check for
            # more results.
            paginator_results.prev.has_results = True

        metrics.timing("snuba.search.num_chunks", num_chunks)

        groups = Group.objects.in_bulk(paginator_results.results)
        paginator_results.results = [
            groups[k] for k in paginator_results.results if k in groups
        ]

        return paginator_results
Пример #46
0
    def save(self, project, raw=False):
        from sentry.tasks.post_process import index_event_tags
        data = self.data

        project = Project.objects.get_from_cache(id=project)

        # Check to make sure we're not about to do a bunch of work that's
        # already been done if we've processed an event with this ID. (This
        # isn't a perfect solution -- this doesn't handle ``EventMapping`` and
        # there's a race condition between here and when the event is actually
        # saved, but it's an improvement. See GH-7677.)
        try:
            event = Event.objects.get(
                project_id=project.id,
                event_id=data['event_id'],
            )
        except Event.DoesNotExist:
            pass
        else:
            self.logger.info(
                'duplicate.found',
                exc_info=True,
                extra={
                    'event_uuid': data['event_id'],
                    'project_id': project.id,
                    'model': Event.__name__,
                }
            )
            return event

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        level = data.pop('level')
        transaction_name = data.pop('transaction', None)
        culprit = data.pop('culprit', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)
        dist = data.pop('dist', None)
        environment = data.pop('environment', None)

        # unused
        time_spent = data.pop('time_spent', None)
        message = data.pop('message', '')

        if not culprit:
            if transaction_name:
                culprit = transaction_name
            else:
                culprit = generate_culprit(data, platform=platform)

        culprit = force_text(culprit)
        if transaction_name:
            transaction_name = force_text(transaction_name)

        recorded_timestamp = data.pop('timestamp')
        date = datetime.fromtimestamp(recorded_timestamp)
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'platform': platform,
        }

        event = Event(
            project_id=project.id,
            event_id=event_id,
            data=data,
            time_spent=time_spent,
            datetime=date,
            **kwargs
        )
        event._project_cache = project
        data = event.data.data

        # convert this to a dict to ensure we're only storing one value per key
        # as most parts of Sentry dont currently play well with multiple values
        tags = dict(data.get('tags') or [])
        tags['level'] = LOG_LEVELS[level]
        if logger_name:
            tags['logger'] = logger_name
        if server_name:
            tags['server_name'] = server_name
        if site:
            tags['site'] = site
        if environment:
            tags['environment'] = environment
        if transaction_name:
            tags['transaction'] = transaction_name

        if release:
            # dont allow a conflicting 'release' tag
            if 'release' in tags:
                del tags['release']
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            tags['sentry:release'] = release.version

        if dist and release:
            dist = release.add_dist(dist, date)
            tags['sentry:dist'] = dist.name
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            if 'user' in tags:
                del tags['user']
            tags['sentry:user'] = event_user.tag_value

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        normalize_in_app(data)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    tags.setdefault(key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                tags[k] = v
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.get_path(), None)

        # tags are stored as a tuple
        tags = tags.items()

        data['tags'] = tags
        data['fingerprint'] = fingerprint or ['{{ default }}']

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = [md5_from_hash(h) for h in get_hashes_from_fingerprint(event, fingerprint)]
        elif checksum:
            if HASH_RE.match(checksum):
                hashes = [checksum]
            else:
                hashes = [md5_from_hash([checksum]), checksum]
            data['checksum'] = checksum
        else:
            hashes = [md5_from_hash(h) for h in get_hashes_for_event(event)]

        # TODO(dcramer): temp workaround for complexity
        data['message'] = message
        event_type = eventtypes.get(data.get('type', 'default'))(data)
        event_metadata = event_type.get_metadata()
        # TODO(dcramer): temp workaround for complexity
        del data['message']

        data['type'] = event_type.key
        data['metadata'] = event_metadata

        # index components into ``Event.message``
        # See GH-3248
        if event_type.key != 'default':
            if 'sentry.interfaces.Message' in data and \
                    data['sentry.interfaces.Message']['message'] != message:
                message = u'{} {}'.format(
                    message,
                    data['sentry.interfaces.Message']['message'],
                )

        if not message:
            message = ''
        elif not isinstance(message, six.string_types):
            message = force_text(message)

        for value in six.itervalues(event_metadata):
            value_u = force_text(value, errors='replace')
            if value_u not in message:
                message = u'{} {}'.format(message, value_u)

        if culprit and culprit not in message:
            culprit_u = force_text(culprit, errors='replace')
            message = u'{} {}'.format(message, culprit_u)

        message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)

        event.message = message
        kwargs['message'] = message

        received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s'))
        group_kwargs = kwargs.copy()
        group_kwargs.update(
            {
                'culprit': culprit,
                'logger': logger_name,
                'level': level,
                'last_seen': date,
                'first_seen': date,
                'active_at': date,
                'data': {
                    'last_received': received_timestamp,
                    'type':
                    event_type.key,
                    # we cache the events metadata on the group to ensure its
                    # accessible in the stream
                    'metadata':
                    event_metadata,
                },
            }
        )

        if release:
            group_kwargs['first_release'] = release

        try:
            group, is_new, is_regression, is_sample = self._save_aggregate(
                event=event, hashes=hashes, release=release, **group_kwargs
            )
        except HashDiscarded:
            event_discarded.send_robust(
                project=project,
                sender=EventManager,
            )

            metrics.incr(
                'events.discarded',
                skip_internal=True,
                tags={
                    'organization_id': project.organization_id,
                    'platform': platform,
                },
            )
            raise
        else:
            event_saved.send_robust(
                project=project,
                event_size=event.size,
                sender=EventManager,
            )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        # When an event was sampled, the canonical source of truth
        # is the EventMapping table since we aren't going to be writing out an actual
        # Event row. Otherwise, if the Event isn't being sampled, we can safely
        # rely on the Event table itself as the source of truth and ignore
        # EventMapping since it's redundant information.
        if is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(EventMapping)):
                    EventMapping.objects.create(project=project, group=group, event_id=event_id)
            except IntegrityError:
                self.logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': EventMapping.__name__,
                    }
                )
                return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
            group_id=group.id,
            environment_id=environment.id,
            defaults={
                'first_release_id': release.id if release else None,
            },
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            ReleaseProjectEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(
            group=group,
            environment=environment,
        )

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': Event.__name__,
                    }
                )
                return event

            index_event_tags.delay(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                environment_id=environment.id,
                event_id=event.id,
                tags=tags,
                date_added=event.datetime,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )),
                ),
                timestamp=event.datetime,
                environment_id=environment.id,
            )
        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject, {'new_groups': 1}, {
                        'release_id': release.id,
                        'project_id': project.id,
                    }
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment, {'new_issues_count': 1}, {
                        'project_id': project.id,
                        'release_id': release.id,
                        'environment_id': environment.id,
                    }
                )

        safe_execute(Group.objects.add_tags, group, environment, tags, _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project, group=group, sender=Project)

        eventstream.publish(
            group=group,
            event=event,
            is_new=is_new,
            is_sample=is_sample,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        metrics.timing(
            'events.latency',
            received_timestamp - recorded_timestamp,
            tags={
                'project_id': project.id,
            },
        )

        return event
Пример #47
0
    def _save_aggregate(self, event, hashes, release, **kwargs):
        project = event.project

        # attempt to find a matching hash
        all_hashes = self._find_hashes(project, hashes)

        existing_group_id = None
        for h in all_hashes:
            if h.group_id is not None:
                existing_group_id = h.group_id
                break
            if h.group_tombstone_id is not None:
                raise HashDiscarded("Matches group tombstone %s" % h.group_tombstone_id)

        # XXX(dcramer): this has the opportunity to create duplicate groups
        # it should be resolved by the hash merging function later but this
        # should be better tested/reviewed
        if existing_group_id is None:
            # it's possible the release was deleted between
            # when we queried for the release and now, so
            # make sure it still exists
            first_release = kwargs.pop("first_release", None)

            with transaction.atomic():
                short_id = project.next_short_id()
                group, group_is_new = (
                    Group.objects.create(
                        project=project,
                        short_id=short_id,
                        first_release_id=Release.objects.filter(id=first_release.id)
                        .values_list("id", flat=True)
                        .first()
                        if first_release
                        else None,
                        **kwargs
                    ),
                    True,
                )

            metrics.incr(
                "group.created", skip_internal=True, tags={"platform": event.platform or "unknown"}
            )

        else:
            group = Group.objects.get(id=existing_group_id)

            group_is_new = False

        # If all hashes are brand new we treat this event as new
        is_new = False
        new_hashes = [h for h in all_hashes if h.group_id is None]
        if new_hashes:
            # XXX: There is a race condition here wherein another process could
            # create a new group that is associated with one of the new hashes,
            # add some event(s) to it, and then subsequently have the hash
            # "stolen" by this process. This then "orphans" those events from
            # their "siblings" in the group we've created here. We don't have a
            # way to fix this, since we can't update the group on those hashes
            # without filtering on `group_id` (which we can't do due to query
            # planner weirdness.) For more context, see 84c6f75a and d0e22787,
            # as well as GH-5085.
            GroupHash.objects.filter(id__in=[h.id for h in new_hashes]).exclude(
                state=GroupHash.State.LOCKED_IN_MIGRATION
            ).update(group=group)

            if group_is_new and len(new_hashes) == len(all_hashes):
                is_new = True

        if not is_new:
            is_regression = self._process_existing_aggregate(
                group=group, event=event, data=kwargs, release=release
            )
        else:
            is_regression = False

        return group, is_new, is_regression
Пример #48
0
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """

    set_current_project(project_id)

    from sentry.event_manager import EventManager, HashDiscarded

    event_type = "none"

    if cache_key and data is None:
        with metrics.timer(
                "tasks.store.do_save_event.get_cache") as metric_tags:
            data = event_processing_store.get(cache_key)
            if data is not None:
                metric_tags["event_type"] = event_type = data.get(
                    "type") or "none"

    with metrics.global_tags(event_type=event_type):
        if data is not None:
            data = CanonicalKeyDict(data)

        if event_id is None and data is not None:
            event_id = data["event_id"]

        # only when we come from reprocessing we get a project_id sent into
        # the task.
        if project_id is None:
            project_id = data.pop("project")
            set_current_project(project_id)

        # We only need to delete raw events for events that support
        # reprocessing.  If the data cannot be found we want to assume
        # that we need to delete the raw event.
        if not data or reprocessing.event_supports_reprocessing(data):
            with metrics.timer("tasks.store.do_save_event.delete_raw_event"):
                delete_raw_event(project_id, event_id, allow_hint_clear=True)

        # This covers two cases: where data is None because we did not manage
        # to fetch it from the default cache or the empty dictionary was
        # stored in the default cache.  The former happens if the event
        # expired while being on the queue, the second happens on reprocessing
        # if the raw event was deleted concurrently while we held on to
        # it.  This causes the node store to delete the data and we end up
        # fetching an empty dict.  We could in theory not invoke `save_event`
        # in those cases but it's important that we always clean up the
        # reprocessing reports correctly or they will screw up the UI.  So
        # to future proof this correctly we just handle this case here.
        if not data:
            metrics.incr("events.failed",
                         tags={
                             "reason": "cache",
                             "stage": "post"
                         },
                         skip_internal=False)
            return

        event = None
        try:
            with metrics.timer("tasks.store.do_save_event.event_manager.save"):
                manager = EventManager(data)
                # event.project.organization is populated after this statement.
                event = manager.save(project_id,
                                     assume_normalized=True,
                                     start_time=start_time,
                                     cache_key=cache_key)

        except HashDiscarded:
            pass

        finally:
            if cache_key:
                with metrics.timer("tasks.store.do_save_event.delete_cache"):
                    event_processing_store.delete_by_key(cache_key)

                with metrics.timer(
                        "tasks.store.do_save_event.delete_attachment_cache"):
                    # For the unlikely case that we did not manage to persist the
                    # event we also delete the key always.
                    if event is None or features.has(
                            "organizations:event-attachments",
                            event.project.organization,
                            actor=None):
                        attachment_cache.delete(cache_key)

            if start_time:
                metrics.timing("events.time-to-process",
                               time() - start_time,
                               instance=data["platform"])
Пример #49
0
    def get(self, _, project, event_id):
        """
        Retrieve Committer information for an event
        ```````````````````````````````````````````

        Return commiters on an individual event, plus a per-frame breakdown.

        :pparam string project_slug: the slug of the project the event
                                     belongs to.
        :pparam string event_id: the hexadecimal ID of the event to
                                 retrieve (as reported by the raven client).
        :auth: required
        """
        try:
            event = Event.objects.get(
                id=event_id,
                project_id=project.id,
            )
        except Event.DoesNotExist:
            return Response({'detail': 'Event not found'}, status=404)

        # populate event data
        Event.objects.bind_nodes([event], 'data')

        group = Group.objects.get(id=event.group_id)

        first_release_version = group.get_first_release()

        if not first_release_version:
            return Response({'detail': 'Release not found'}, status=404)

        releases = Release.get_closest_releases(project, first_release_version)

        if not releases:
            return Response({'detail': 'Release not found'}, status=404)

        commits = self._get_commits(releases)

        if not commits:
            return Response({'detail': 'No Commits found for Release'},
                            status=404)

        frames = self._get_frame_paths(event)
        frame_limit = 25
        app_frames = [frame for frame in frames
                      if frame['in_app']][-frame_limit:]

        # TODO(maxbittker) return this set instead of annotated frames
        path_set = {frame['abs_path'] for frame in app_frames}

        file_changes = []
        if path_set:
            file_changes = self._get_commit_file_changes(commits, path_set)

        commit_path_matches = {
            path: self._match_commits_path(file_changes, path)
            for path in path_set
        }

        annotated_frames = [{
            'frame': frame,
            'commits': commit_path_matches[frame['abs_path']]
        } for frame in app_frames]

        relevant_commits = list({
            commit
            for match in commit_path_matches
            for commit in commit_path_matches[match]
        })

        committers = self._get_committers(annotated_frames, relevant_commits)
        metrics.incr('feature.owners.has-committers',
                     instance='hit' if committers else 'miss')

        # serialize the commit objects
        serialized_annotated_frames = [{
            'frame': frame['frame'],
            'commits': serialize(frame['commits'])
        } for frame in annotated_frames]

        data = {
            # map author ids to sentry user dicts
            'committers': committers,
            'annotatedFrames': serialized_annotated_frames
        }
        return Response(data)
Пример #50
0
def create_failed_event(cache_key,
                        data,
                        project_id,
                        issues,
                        event_id,
                        start_time=None,
                        reprocessing_rev=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    set_current_project(project_id)

    # We can only create failed events for events that can potentially
    # create failed events.
    if not reprocessing.event_supports_reprocessing(data):
        return False

    reprocessing_active = ProjectOption.objects.get_value(
        project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT)

    # In case there is reprocessing active but the current reprocessing
    # revision is already different than when we started, we want to
    # immediately retry the event.  This resolves the problem when
    # otherwise a concurrent change of debug symbols might leave a
    # reprocessing issue stuck in the project forever.
    if (reprocessing_active and reprocessing.get_reprocessing_revision(
            project_id, cached=False) != reprocessing_rev):
        raise RetryProcessing()

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, "sentry:sent_failed_event_hint", False)
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={
                "reprocessing_active": reprocessing_active,
                "issues": issues
            },
        ).send_notification()
        ProjectOption.objects.set_value(project,
                                        "sentry:sent_failed_event_hint", True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = event_processing_store.get(cache_key)
    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "raw"
                     },
                     skip_internal=False)
        error_logger.error("process.failed_raw.empty",
                           extra={"cache_key": cache_key})
        return True

    data = CanonicalKeyDict(data)
    from sentry.models import RawEvent, ProcessingIssue

    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(
            data["timestamp"]).replace(tzinfo=timezone.utc),
        data=data,
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue["scope"],
            object=issue["object"],
            type=issue["type"],
            data=issue["data"],
        )

    event_processing_store.delete_by_key(cache_key)

    return True
Пример #51
0
    def get_send_to(self, project, event=None):
        """
        Returns a list of user IDs for the users that should receive
        notifications for the provided project.

        This result may come from cached data.
        """
        if not (project and project.teams.exists()):
            logger.debug('Tried to send notification to invalid project: %r',
                         project)
            return []

        if event:
            owners, _ = ProjectOwnership.get_owners(project.id, event.data)
            if owners != ProjectOwnership.Everyone:
                if not owners:
                    metrics.incr(
                        'features.owners.send_to',
                        tags={
                            'organization': project.organization_id,
                            'outcome': 'empty',
                        },
                        skip_internal=True,
                    )
                    return []

                metrics.incr(
                    'features.owners.send_to',
                    tags={
                        'organization': project.organization_id,
                        'outcome': 'match',
                    },
                    skip_internal=True,
                )
                send_to_list = []
                teams_to_resolve = []
                for owner in owners:
                    if owner.type == User:
                        send_to_list.append(owner.id)
                    else:
                        teams_to_resolve.append(owner.id)

                # get all users in teams
                if teams_to_resolve:
                    send_to_list += User.objects.filter(
                        is_active=True,
                        sentry_orgmember_set__organizationmemberteam__team__id__in
                        =teams_to_resolve,
                    ).values_list('id', flat=True)
                return send_to_list
            else:
                metrics.incr(
                    'features.owners.send_to',
                    tags={
                        'organization': project.organization_id,
                        'outcome': 'everyone',
                    },
                    skip_internal=True,
                )

        cache_key = '%s:send_to:%s' % (self.get_conf_key(), project.pk)
        send_to_list = cache.get(cache_key)
        if send_to_list is None:
            send_to_list = [s for s in self.get_sendable_users(project) if s]
            cache.set(cache_key, send_to_list, 60)  # 1 minute cache

        return send_to_list
Пример #52
0
def _do_process_event(
    cache_key,
    start_time,
    event_id,
    process_task,
    data=None,
    data_has_changed=None,
    from_symbolicate=False,
):
    from sentry.plugins.base import plugins

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "process"
                     },
                     skip_internal=False)
        error_logger.error("process.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_project(project_id)

    event_id = data["event_id"]

    with sentry_sdk.start_span(
            op="tasks.store.process_event.get_project_from_cache"):
        project = Project.objects.get_from_cache(id=project_id)

    with metrics.timer(
            "tasks.store.process_event.organization.get_from_cache"):
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

    has_changed = bool(data_has_changed)

    with sentry_sdk.start_span(
            op="tasks.store.process_event.get_reprocessing_revision"):
        # Fetch the reprocessing revision
        reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    # Stacktrace based event processors.
    with sentry_sdk.start_span(op="task.store.process_event.stacktraces"):
        with metrics.timer("tasks.store.process_event.stacktraces",
                           tags={"from_symbolicate": from_symbolicate}):
            new_data = process_stacktraces(data)

    if new_data is not None:
        has_changed = True
        data = new_data

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # *Right now* the only sensitive data that is added in stacktrace
    # processing are usernames in filepaths, so we run directly after
    # stacktrace processors.
    #
    # We do not yet want to deal with context data produced by plugins like
    # sessionstack or fullstory (which are in `get_event_preprocessors`), as
    # this data is very unlikely to be sensitive data. This is why scrubbing
    # happens somewhere in the middle of the pipeline.
    #
    # On the other hand, Javascript event error translation is happening after
    # this block because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`.
    #
    # We are fairly confident, however, that this should run *before*
    # re-normalization as it is hard to find sensitive data in partially
    # trimmed strings.
    if (has_changed and options.get("processing.can-use-scrubbers")
            and features.has("organizations:datascrubbers-v2",
                             project.organization,
                             actor=None)):
        with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"):
            with metrics.timer("tasks.store.datascrubbers.scrub",
                               tags={"from_symbolicate": from_symbolicate}):
                project_config = get_project_config(project)

                new_data = safe_execute(scrub_data,
                                        project_config=project_config,
                                        event=data.data)

                # XXX(markus): When datascrubbing is finally "totally stable", we might want
                # to drop the event if it crashes to avoid saving PII
                if new_data is not None:
                    data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        with sentry_sdk.start_span(
                op="task.store.process_event.preprocessors") as span:
            span.set_data("plugin", plugin.slug)
            span.set_data("from_symbolicate", from_symbolicate)
            with metrics.timer(
                    "tasks.store.process_event.preprocessors",
                    tags={
                        "plugin": plugin.slug,
                        "from_symbolicate": from_symbolicate
                    },
            ):
                processors = safe_execute(plugin.get_event_preprocessors,
                                          data=data,
                                          _with_transaction=False)
                for processor in processors or ():
                    try:
                        result = processor(data)
                    except Exception:
                        error_logger.exception(
                            "tasks.store.preprocessors.error")
                        data.setdefault("_metrics",
                                        {})["flag.processing.error"] = True
                        has_changed = True
                    else:
                        if result:
                            data = result
                            has_changed = True

    assert data[
        "project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(remove_other=False,
                                     is_renormalize=True,
                                     **DEFAULT_STORE_NORMALIZER_ARGS)
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                    cache_key,
                    data,
                    project_id,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke ourselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            _do_preprocess_event(cache_key, data, start_time, event_id,
                                 process_task, project)
            return

        cache_key = event_processing_store.store(data)

    submit_save_event(project, cache_key, event_id, start_time, data)
Пример #53
0
def update_groups(request, projects, organization_id, search_fn):
    group_ids = request.GET.getlist('id')
    if group_ids:
        group_list = Group.objects.filter(
            project__organization_id=organization_id,
            project__in=projects,
            id__in=group_ids,
        )
        # filter down group ids to only valid matches
        group_ids = [g.id for g in group_list]
        if not group_ids:
            return Response(status=204)
    else:
        group_list = None

    # TODO(jess): We may want to look into refactoring GroupValidator
    # to support multiple projects, but this is pretty complicated
    # because of the assignee validation. Punting on this for now.
    for project in projects:
        serializer = GroupValidator(
            data=request.DATA,
            partial=True,
            context={'project': project},
        )
        if not serializer.is_valid():
            return Response(serializer.errors, status=400)

    result = dict(serializer.object)

    # so we won't have to requery for each group
    project_lookup = {p.id: p for p in projects}

    acting_user = request.user if request.user.is_authenticated() else None

    if not group_ids:
        try:
            # bulk mutations are limited to 1000 items
            # TODO(dcramer): it'd be nice to support more than this, but its
            # a bit too complicated right now
            cursor_result, _ = search_fn({
                'limit': 1000,
                'paginator_options': {'max_limit': 1000},
            })
        except ValidationError as exc:
            return Response({'detail': six.text_type(exc)}, status=400)

        group_list = list(cursor_result)
        group_ids = [g.id for g in group_list]

    is_bulk = len(group_ids) > 1

    group_project_ids = {g.project_id for g in group_list}
    # filter projects down to only those that have groups in the search results
    projects = [p for p in projects if p.id in group_project_ids]

    queryset = Group.objects.filter(
        id__in=group_ids,
    )

    discard = result.get('discard')
    if discard:
        return handle_discard(request, list(queryset), projects, acting_user)

    statusDetails = result.pop('statusDetails', result)
    status = result.get('status')
    release = None
    commit = None

    if status in ('resolved', 'resolvedInNextRelease'):
        if status == 'resolvedInNextRelease' or statusDetails.get('inNextRelease'):
            # TODO(jess): We may want to support this for multi project, but punting on it for now
            if len(projects) > 1:
                return Response({
                    'detail': 'Cannot set resolved in next release for multiple projects.'
                }, status=400)
            release = statusDetails.get('inNextRelease') or Release.objects.filter(
                projects=projects[0],
                organization_id=projects[0].organization_id,
            ).extra(select={
                'sort': 'COALESCE(date_released, date_added)',
            }).order_by('-sort')[0]
            activity_type = Activity.SET_RESOLVED_IN_RELEASE
            activity_data = {
                # no version yet
                'version': '',
            }
            status_details = {
                'inNextRelease': True,
                'actor': serialize(extract_lazy_object(request.user), request.user),
            }
            res_type = GroupResolution.Type.in_next_release
            res_type_str = 'in_next_release'
            res_status = GroupResolution.Status.pending
        elif statusDetails.get('inRelease'):
            # TODO(jess): We could update validation to check if release
            # applies to multiple projects, but I think we agreed to punt
            # on this for now
            if len(projects) > 1:
                return Response({
                    'detail': 'Cannot set resolved in release for multiple projects.'
                }, status=400)
            release = statusDetails['inRelease']
            activity_type = Activity.SET_RESOLVED_IN_RELEASE
            activity_data = {
                # no version yet
                'version': release.version,
            }
            status_details = {
                'inRelease': release.version,
                'actor': serialize(extract_lazy_object(request.user), request.user),
            }
            res_type = GroupResolution.Type.in_release
            res_type_str = 'in_release'
            res_status = GroupResolution.Status.resolved
        elif statusDetails.get('inCommit'):
            # TODO(jess): Same here, this is probably something we could do, but
            # punting for now.
            if len(projects) > 1:
                return Response({
                    'detail': 'Cannot set resolved in commit for multiple projects.'
                }, status=400)
            commit = statusDetails['inCommit']
            activity_type = Activity.SET_RESOLVED_IN_COMMIT
            activity_data = {
                'commit': commit.id,
            }
            status_details = {
                'inCommit': serialize(commit, request.user),
                'actor': serialize(extract_lazy_object(request.user), request.user),
            }
            res_type_str = 'in_commit'
        else:
            res_type_str = 'now'
            activity_type = Activity.SET_RESOLVED
            activity_data = {}
            status_details = {}

        now = timezone.now()
        metrics.incr('group.resolved', instance=res_type_str, skip_internal=True)

        # if we've specified a commit, let's see if its already been released
        # this will allow us to associate the resolution to a release as if we
        # were simply using 'inRelease' above
        # Note: this is different than the way commit resolution works on deploy
        # creation, as a given deploy is connected to an explicit release, and
        # in this case we're simply choosing the most recent release which contains
        # the commit.
        if commit and not release:
            # TODO(jess): If we support multiple projects for release / commit resolution,
            # we need to update this to find the release for each project (we shouldn't assume
            # it's the same)
            try:
                release = Release.objects.filter(
                    projects__in=projects,
                    releasecommit__commit=commit,
                ).extra(select={
                    'sort': 'COALESCE(date_released, date_added)',
                }).order_by('-sort')[0]
                res_type = GroupResolution.Type.in_release
                res_status = GroupResolution.Status.resolved
            except IndexError:
                release = None

        for group in group_list:
            with transaction.atomic():
                resolution = None
                if release:
                    resolution_params = {
                        'release': release,
                        'type': res_type,
                        'status': res_status,
                        'actor_id': request.user.id
                        if request.user.is_authenticated() else None,
                    }
                    resolution, created = GroupResolution.objects.get_or_create(
                        group=group,
                        defaults=resolution_params,
                    )
                    if not created:
                        resolution.update(
                            datetime=timezone.now(), **resolution_params)

                if commit:
                    GroupLink.objects.create(
                        group_id=group.id,
                        project_id=group.project_id,
                        linked_type=GroupLink.LinkedType.commit,
                        relationship=GroupLink.Relationship.resolves,
                        linked_id=commit.id,
                    )

                affected = Group.objects.filter(
                    id=group.id,
                ).update(
                    status=GroupStatus.RESOLVED,
                    resolved_at=now,
                )
                if not resolution:
                    created = affected

                group.status = GroupStatus.RESOLVED
                group.resolved_at = now

                assigned_to = self_subscribe_and_assign_issue(acting_user, group)
                if assigned_to is not None:
                    result['assignedTo'] = assigned_to

                if created:
                    activity = Activity.objects.create(
                        project=project_lookup[group.project_id],
                        group=group,
                        type=activity_type,
                        user=acting_user,
                        ident=resolution.id if resolution else None,
                        data=activity_data,
                    )
                    # TODO(dcramer): we need a solution for activity rollups
                    # before sending notifications on bulk changes
                    if not is_bulk:
                        activity.send_notification()

            if release:
                issue_resolved_in_release.send_robust(
                    group=group,
                    project=project_lookup[group.project_id],
                    user=acting_user,
                    resolution_type=res_type_str,
                    sender=update_groups,
                )
            elif commit:
                resolved_with_commit.send_robust(
                    organization_id=organization_id,
                    user=request.user,
                    group=group,
                    sender=update_groups,
                )
            else:
                issue_resolved.send_robust(
                    project=project_lookup[group.project_id],
                    group=group,
                    user=acting_user,
                    sender=update_groups,
                )

            kick_off_status_syncs.apply_async(kwargs={
                'project_id': group.project_id,
                'group_id': group.id,
            })

        result.update({
            'status': 'resolved',
            'statusDetails': status_details,
        })

    elif status:
        new_status = STATUS_CHOICES[result['status']]

        with transaction.atomic():
            happened = queryset.exclude(
                status=new_status,
            ).update(
                status=new_status,
            )

            GroupResolution.objects.filter(
                group__in=group_ids,
            ).delete()

            if new_status == GroupStatus.IGNORED:
                metrics.incr('group.ignored', skip_internal=True)

                ignore_duration = (
                    statusDetails.pop('ignoreDuration', None) or
                    statusDetails.pop('snoozeDuration', None)
                ) or None
                ignore_count = statusDetails.pop(
                    'ignoreCount', None) or None
                ignore_window = statusDetails.pop(
                    'ignoreWindow', None) or None
                ignore_user_count = statusDetails.pop(
                    'ignoreUserCount', None) or None
                ignore_user_window = statusDetails.pop(
                    'ignoreUserWindow', None) or None
                if ignore_duration or ignore_count or ignore_user_count:
                    if ignore_duration:
                        ignore_until = timezone.now() + timedelta(
                            minutes=ignore_duration,
                        )
                    else:
                        ignore_until = None
                    for group in group_list:
                        state = {}
                        if ignore_count and not ignore_window:
                            state['times_seen'] = group.times_seen
                        if ignore_user_count and not ignore_user_window:
                            state['users_seen'] = group.count_users_seen()
                        GroupSnooze.objects.create_or_update(
                            group=group,
                            values={
                                'until':
                                ignore_until,
                                'count':
                                ignore_count,
                                'window':
                                ignore_window,
                                'user_count':
                                ignore_user_count,
                                'user_window':
                                ignore_user_window,
                                'state':
                                state,
                                'actor_id':
                                request.user.id if request.user.is_authenticated() else None,
                            }
                        )
                        result['statusDetails'] = {
                            'ignoreCount': ignore_count,
                            'ignoreUntil': ignore_until,
                            'ignoreUserCount': ignore_user_count,
                            'ignoreUserWindow': ignore_user_window,
                            'ignoreWindow': ignore_window,
                            'actor': serialize(extract_lazy_object(request.user), request.user),
                        }
                else:
                    GroupSnooze.objects.filter(
                        group__in=group_ids,
                    ).delete()
                    ignore_until = None
                    result['statusDetails'] = {}
            else:
                result['statusDetails'] = {}

        if group_list and happened:
            if new_status == GroupStatus.UNRESOLVED:
                activity_type = Activity.SET_UNRESOLVED
                activity_data = {}
            elif new_status == GroupStatus.IGNORED:
                activity_type = Activity.SET_IGNORED
                activity_data = {
                    'ignoreCount': ignore_count,
                    'ignoreDuration': ignore_duration,
                    'ignoreUntil': ignore_until,
                    'ignoreUserCount': ignore_user_count,
                    'ignoreUserWindow': ignore_user_window,
                    'ignoreWindow': ignore_window,
                }

            groups_by_project_id = defaultdict(list)
            for group in group_list:
                groups_by_project_id[group.project_id].append(group)

            for project in projects:
                project_groups = groups_by_project_id.get(project.id)
                if project_groups:
                    issue_ignored.send_robust(
                        project=project,
                        user=acting_user,
                        group_list=project_groups,
                        activity_data=activity_data,
                        sender=update_groups)

            for group in group_list:
                group.status = new_status

                activity = Activity.objects.create(
                    project=project_lookup[group.project_id],
                    group=group,
                    type=activity_type,
                    user=acting_user,
                    data=activity_data,
                )
                # TODO(dcramer): we need a solution for activity rollups
                # before sending notifications on bulk changes
                if not is_bulk:
                    if acting_user:
                        GroupSubscription.objects.subscribe(
                            user=acting_user,
                            group=group,
                            reason=GroupSubscriptionReason.status_change,
                        )
                    activity.send_notification()

                if new_status == GroupStatus.UNRESOLVED:
                    kick_off_status_syncs.apply_async(kwargs={
                        'project_id': group.project_id,
                        'group_id': group.id,
                    })

    if 'assignedTo' in result:
        assigned_actor = result['assignedTo']
        if assigned_actor:
            for group in group_list:
                resolved_actor = assigned_actor.resolve()

                GroupAssignee.objects.assign(group, resolved_actor, acting_user)
            result['assignedTo'] = serialize(
                assigned_actor.resolve(), acting_user, ActorSerializer())
        else:
            for group in group_list:
                GroupAssignee.objects.deassign(group, acting_user)

    is_member_map = {
        project.id: project.member_set.filter(user=acting_user).exists() for project in projects
    }
    if result.get('hasSeen'):
        for group in group_list:
            if is_member_map.get(group.project_id):
                instance, created = create_or_update(
                    GroupSeen,
                    group=group,
                    user=acting_user,
                    project=project_lookup[group.project_id],
                    values={
                        'last_seen': timezone.now(),
                    }
                )
    elif result.get('hasSeen') is False:
        GroupSeen.objects.filter(
            group__in=group_ids,
            user=acting_user,
        ).delete()

    if result.get('isBookmarked'):
        for group in group_list:
            GroupBookmark.objects.get_or_create(
                project=project_lookup[group.project_id],
                group=group,
                user=acting_user,
            )
            GroupSubscription.objects.subscribe(
                user=acting_user,
                group=group,
                reason=GroupSubscriptionReason.bookmark,
            )
    elif result.get('isBookmarked') is False:
        GroupBookmark.objects.filter(
            group__in=group_ids,
            user=acting_user,
        ).delete()

    # TODO(dcramer): we could make these more efficient by first
    # querying for rich rows are present (if N > 2), flipping the flag
    # on those rows, and then creating the missing rows
    if result.get('isSubscribed') in (True, False):
        is_subscribed = result['isSubscribed']
        for group in group_list:
            # NOTE: Subscribing without an initiating event (assignment,
            # commenting, etc.) clears out the previous subscription reason
            # to avoid showing confusing messaging as a result of this
            # action. It'd be jarring to go directly from "you are not
            # subscribed" to "you were subscribed due since you were
            # assigned" just by clicking the "subscribe" button (and you
            # may no longer be assigned to the issue anyway.)
            GroupSubscription.objects.create_or_update(
                user=acting_user,
                group=group,
                project=project_lookup[group.project_id],
                values={
                    'is_active': is_subscribed,
                    'reason': GroupSubscriptionReason.unknown,
                },
            )

        result['subscriptionDetails'] = {
            'reason': SUBSCRIPTION_REASON_MAP.get(
                GroupSubscriptionReason.unknown,
                'unknown',
            ),
        }

    if 'isPublic' in result:
        # We always want to delete an existing share, because triggering
        # an isPublic=True even when it's already public, should trigger
        # regenerating.
        for group in group_list:
            if GroupShare.objects.filter(group=group).delete():
                result['shareId'] = None
                Activity.objects.create(
                    project=project_lookup[group.project_id],
                    group=group,
                    type=Activity.SET_PRIVATE,
                    user=acting_user,
                )

    if result.get('isPublic'):
        for group in group_list:
            share, created = GroupShare.objects.get_or_create(
                project=project_lookup[group.project_id],
                group=group,
                user=acting_user,
            )
            if created:
                result['shareId'] = share.uuid
                Activity.objects.create(
                    project=project_lookup[group.project_id],
                    group=group,
                    type=Activity.SET_PUBLIC,
                    user=acting_user,
                )

    # XXX(dcramer): this feels a bit shady like it should be its own
    # endpoint
    if result.get('merge') and len(group_list) > 1:
        # don't allow merging cross project
        if len(projects) > 1:
            return Response({'detail': 'Merging across multiple projects is not supported'})
        group_list_by_times_seen = sorted(
            group_list,
            key=lambda g: (g.times_seen, g.id),
            reverse=True,
        )
        primary_group, groups_to_merge = group_list_by_times_seen[0], group_list_by_times_seen[1:]

        group_ids_to_merge = [g.id for g in groups_to_merge]
        eventstream_state = eventstream.start_merge(
            primary_group.project_id,
            group_ids_to_merge,
            primary_group.id
        )

        Group.objects.filter(
            id__in=group_ids_to_merge
        ).update(
            status=GroupStatus.PENDING_MERGE
        )

        transaction_id = uuid4().hex
        merge_groups.delay(
            from_object_ids=group_ids_to_merge,
            to_object_id=primary_group.id,
            transaction_id=transaction_id,
            eventstream_state=eventstream_state,
        )

        Activity.objects.create(
            project=project_lookup[primary_group.project_id],
            group=primary_group,
            type=Activity.MERGE,
            user=acting_user,
            data={
                'issues': [{
                    'id': c.id
                } for c in groups_to_merge],
            },
        )

        result['merge'] = {
            'parent': six.text_type(primary_group.id),
            'children': [six.text_type(g.id) for g in groups_to_merge],
        }

    return Response(result)
Пример #54
0
def _do_symbolicate_event(cache_key,
                          start_time,
                          event_id,
                          symbolicate_task,
                          data=None):
    from sentry.lang.native.processing import get_symbolication_function

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "symbolicate"
                     },
                     skip_internal=False)
        error_logger.error("symbolicate.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_project(project_id)

    event_id = data["event_id"]

    symbolication_function = get_symbolication_function(data)

    has_changed = False

    from_reprocessing = symbolicate_task is symbolicate_event_from_reprocessing

    try:
        with sentry_sdk.start_span(
                op="tasks.store.symbolicate_event.symbolication") as span:
            span.set_data("symbolicaton_function",
                          symbolication_function.__name__)

            with metrics.timer("tasks.store.symbolicate_event.symbolication"):
                symbolicated_data = symbolication_function(data)

            span.set_data("symbolicated_data", bool(symbolicated_data))
            if symbolicated_data:
                data = symbolicated_data
                has_changed = True

    except RetrySymbolication as e:
        if start_time and (
                time() -
                start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
            error_logger.warning("symbolicate.slow",
                                 extra={
                                     "project_id": project_id,
                                     "event_id": event_id
                                 })

        if start_time and (
                time() -
                start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
            # Do not drop event but actually continue with rest of pipeline
            # (persisting unsymbolicated event)
            error_logger.exception(
                "symbolicate.failed.infinite_retry",
                extra={
                    "project_id": project_id,
                    "event_id": event_id
                },
            )
            data.setdefault("_metrics", {})["flag.processing.error"] = True
            data.setdefault("_metrics", {})["flag.processing.fatal"] = True
            has_changed = True
        else:
            # Requeue the task in the "sleep" queue
            retry_symbolicate_event.apply_async(
                args=(),
                kwargs={
                    "symbolicate_task_name": symbolicate_task.__name__,
                    "task_kwargs": {
                        "cache_key": cache_key,
                        "event_id": event_id,
                        "start_time": start_time,
                    },
                },
                countdown=e.retry_after,
            )
            return
    except Exception:
        error_logger.exception("tasks.store.symbolicate_event.symbolication")
        data.setdefault("_metrics", {})["flag.processing.error"] = True
        data.setdefault("_metrics", {})["flag.processing.fatal"] = True
        has_changed = True

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        cache_key = event_processing_store.store(data)

    process_task = process_event_from_reprocessing if from_reprocessing else process_event
    _do_process_event(
        cache_key=cache_key,
        start_time=start_time,
        event_id=event_id,
        process_task=process_task,
        data=data,
        data_has_changed=has_changed,
        from_symbolicate=True,
    )
Пример #55
0
    def _query(self, project, retention_window_start, group_queryset, tags, environment,
               sort_by, limit, cursor, count_hits, paginator_options, **parameters):

        # TODO: Product decision: we currently search Group.message to handle
        # the `query` parameter, because that's what we've always done. We could
        # do that search against every event in Snuba instead, but results may
        # differ.

        # TODO: It's possible `first_release` could be handled by Snuba.
        if environment is not None:
            group_queryset = ds.QuerySetBuilder({
                'first_release': ds.CallbackCondition(
                    lambda queryset, version: queryset.extra(
                        where=[
                            '{} = {}'.format(
                                ds.get_sql_column(GroupEnvironment, 'first_release_id'),
                                ds.get_sql_column(Release, 'id'),
                            ),
                            '{} = %s'.format(
                                ds.get_sql_column(Release, 'organization'),
                            ),
                            '{} = %s'.format(
                                ds.get_sql_column(Release, 'version'),
                            ),
                        ],
                        params=[project.organization_id, version],
                        tables=[Release._meta.db_table],
                    ),
                ),
            }).build(
                group_queryset.extra(
                    where=[
                        u'{} = {}'.format(
                            ds.get_sql_column(Group, 'id'),
                            ds.get_sql_column(GroupEnvironment, 'group_id'),
                        ),
                        u'{} = %s'.format(
                            ds.get_sql_column(GroupEnvironment, 'environment_id'),
                        ),
                    ],
                    params=[environment.id],
                    tables=[GroupEnvironment._meta.db_table],
                ),
                parameters,
            )
        else:
            group_queryset = ds.QuerySetBuilder({
                'first_release': ds.CallbackCondition(
                    lambda queryset, version: queryset.filter(
                        first_release__organization_id=project.organization_id,
                        first_release__version=version,
                    ),
                ),
            }).build(
                group_queryset,
                parameters,
            )

        now = timezone.now()
        # TODO: Presumably we want to search back to the project's full retention,
        #       which may be higher than 90 days in the past, but apparently
        #       `retention_window_start` can be None(?), so we need a fallback.
        start = max(
            filter(None, [
                retention_window_start,
                parameters.get('date_from'),
                now - timedelta(days=90)
            ])
        )

        end = parameters.get('date_to')
        if not end:
            end = now + ALLOWED_FUTURE_DELTA

            # This search is for some time window that ends with "now",
            # so if the requested sort is `date` (`last_seen`) and there
            # are no other Snuba-based search predicates, we can simply
            # return the results from Postgres.
            if sort_by == 'date' \
                    and not tags \
                    and not environment \
                    and not any(param in parameters for param in [
                        'age_from', 'age_to', 'last_seen_from',
                        'last_seen_to', 'times_seen', 'times_seen_lower',
                        'times_seen_upper'
                    ]):
                group_queryset = group_queryset.order_by('-last_seen')
                paginator = DateTimePaginator(group_queryset, '-last_seen', **paginator_options)
                return paginator.get_result(limit, cursor, count_hits=count_hits)

        assert start < end

        # maximum number of Group IDs to send down to Snuba,
        # if more Group ID candidates are found, a "bare" Snuba
        # search is performed and the result groups are then
        # post-filtered via queries to the Sentry DB
        max_pre_snuba_candidates = options.get('snuba.search.max-pre-snuba-candidates')

        # pre-filter query
        candidate_ids = None
        if max_pre_snuba_candidates and limit <= max_pre_snuba_candidates:
            candidate_ids = list(
                group_queryset.values_list('id', flat=True)[:max_pre_snuba_candidates + 1]
            )
            metrics.timing('snuba.search.num_candidates', len(candidate_ids))

            if not candidate_ids:
                # no matches could possibly be found from this point on
                metrics.incr('snuba.search.no_candidates')
                return Paginator(Group.objects.none()).get_result()
            elif len(candidate_ids) > max_pre_snuba_candidates:
                # If the pre-filter query didn't include anything to significantly
                # filter down the number of results (from 'first_release', 'query',
                # 'status', 'bookmarked_by', 'assigned_to', 'unassigned',
                # 'subscribed_by', 'active_at_from', or 'active_at_to') then it
                # might have surpassed the `max_pre_snuba_candidates`. In this case,
                # we *don't* want to pass candidates down to Snuba, and instead we
                # want Snuba to do all the filtering/sorting it can and *then* apply
                # this queryset to the results from Snuba, which we call
                # post-filtering.
                metrics.incr('snuba.search.too_many_candidates')
                candidate_ids = None

        sort, extra_aggregations, score_fn = sort_strategies[sort_by]

        chunk_growth = options.get('snuba.search.chunk-growth-rate')
        max_chunk_size = options.get('snuba.search.max-chunk-size')
        chunk_limit = limit
        offset = 0
        num_chunks = 0

        paginator_results = Paginator(Group.objects.none()).get_result()
        result_groups = []
        result_group_ids = set()
        min_score = float('inf')
        max_score = -1

        max_time = options.get('snuba.search.max-total-chunk-time-seconds')
        time_start = time.time()

        # Do smaller searches in chunks until we have enough results
        # to answer the query (or hit the end of possible results). We do
        # this because a common case for search is to return 100 groups
        # sorted by `last_seen`, and we want to avoid returning all of
        # a project's groups and then post-sorting them all in Postgres
        # when typically the first N results will do.
        while (time.time() - time_start) < max_time:
            num_chunks += 1

            # grow the chunk size on each iteration to account for huge projects
            # and weird queries, up to a max size
            chunk_limit = min(int(chunk_limit * chunk_growth), max_chunk_size)
            # but if we have candidate_ids always query for at least that many items
            chunk_limit = max(chunk_limit, len(candidate_ids) if candidate_ids else 0)

            # {group_id: group_score, ...}
            snuba_groups, more_results = snuba_search(
                start=start,
                end=end,
                project_id=project.id,
                environment_id=environment and environment.id,
                tags=tags,
                sort=sort,
                extra_aggregations=extra_aggregations,
                score_fn=score_fn,
                candidate_ids=candidate_ids,
                limit=chunk_limit,
                offset=offset,
                **parameters
            )
            metrics.timing('snuba.search.num_snuba_results', len(snuba_groups))
            offset += len(snuba_groups)

            if not snuba_groups:
                break

            if candidate_ids:
                # pre-filtered candidates were passed down to Snuba,
                # so we're finished with filtering and these are the
                # only results
                result_groups = snuba_groups
            else:
                # pre-filtered candidates were *not* passed down to Snuba,
                # so we need to do post-filtering to verify Sentry DB predicates
                filtered_group_ids = group_queryset.filter(
                    id__in=[gid for gid, _ in snuba_groups]
                ).values_list('id', flat=True)

                group_to_score = dict(snuba_groups)
                for group_id in filtered_group_ids:
                    if group_id in result_group_ids:
                        # because we're doing multiple Snuba queries, which
                        # happen outside of a transaction, there is a small possibility
                        # of groups moving around in the sort scoring underneath us,
                        # so we at least want to protect against duplicates
                        continue

                    group_score = group_to_score[group_id]
                    result_group_ids.add(group_id)
                    result_groups.append((group_id, group_score))

                    # used for cursor logic
                    min_score = min(min_score, group_score)
                    max_score = max(max_score, group_score)

            # HACK: If a cursor is being used and there may be more results available
            # in Snuba, we need to detect whether the cursor's value will be
            # found in the result groups. If it isn't in the results yet we need to
            # continue querying before we hand off to the paginator to decide whether
            # enough results are found or not, otherwise the paginator will happily
            # return `limit` worth of results that don't take the cursor into account
            # at all, since it can't know there are more results available.
            # TODO: If chunked search works in practice we should probably extend the
            # paginator to throw something if the cursor value is never found, or do
            # something other than partially leak internal paginator logic up to here.
            # Or make separate Paginator implementation just for Snuba search?
            if cursor is not None \
                    and not candidate_ids \
                    and more_results:
                if cursor.is_prev and min_score < cursor.value:
                    continue
                elif not cursor.is_prev and max_score > cursor.value:
                    continue

            paginator_results = SequencePaginator(
                [(score, id) for (id, score) in result_groups],
                reverse=True,
                **paginator_options
            ).get_result(limit, cursor, count_hits=False)

            # break the query loop for one of three reasons:
            # * we started with Postgres candidates and so only do one Snuba query max
            # * the paginator is returning enough results to satisfy the query (>= the limit)
            # * there are no more groups in Snuba to post-filter
            if candidate_ids \
                    or len(paginator_results.results) >= limit \
                    or not more_results:
                break

        metrics.timing('snuba.search.num_chunks', num_chunks)

        groups = Group.objects.in_bulk(paginator_results.results)
        paginator_results.results = [groups[k] for k in paginator_results.results if k in groups]

        return paginator_results
Пример #56
0
    def _process_single_incr(self, key):
        client = self.cluster.get_routing_client()
        lock_key = self._make_lock_key(key)
        # prevent a stampede due to the way we use celery etas + duplicate
        # tasks
        if not client.set(lock_key, "1", nx=True, ex=10):
            metrics.incr("buffer.revoked",
                         tags={"reason": "locked"},
                         skip_internal=False)
            self.logger.debug("buffer.revoked.locked",
                              extra={"redis_key": key})
            return

        pending_key = self._make_pending_key_from_key(key)

        try:
            conn = self.cluster.get_local_client_for_key(key)
            pipe = conn.pipeline()
            pipe.hgetall(key)
            pipe.zrem(pending_key, key)
            pipe.delete(key)
            values = pipe.execute()[0]

            # XXX(python3): In python2 this isn't as important since redis will
            # return string tyes (be it, byte strings), but in py3 we get bytes
            # back, and really we just want to deal with keys as strings.
            values = {force_text(k): v for k, v in values.items()}

            if not values:
                metrics.incr("buffer.revoked",
                             tags={"reason": "empty"},
                             skip_internal=False)
                self.logger.debug("buffer.revoked.empty",
                                  extra={"redis_key": key})
                return

            # XXX(py3): Note that ``import_string`` explicitly wants a str in
            # python2, so we'll decode (for python3) and then translate back to
            # a byte string (in python2) for import_string.
            model = import_string(str(values.pop("m").decode("utf-8")))  # NOQA

            if values["f"].startswith(b"{"):
                filters = self._load_values(
                    json.loads(values.pop("f").decode("utf-8")))
            else:
                # TODO(dcramer): legacy pickle support - remove in Sentry 9.1
                filters = pickle.loads(values.pop("f"))

            incr_values = {}
            extra_values = {}
            signal_only = None
            for k, v in values.items():
                if k.startswith("i+"):
                    incr_values[k[2:]] = int(v)
                elif k.startswith("e+"):
                    if v.startswith(b"["):
                        extra_values[k[2:]] = self._load_value(
                            json.loads(v.decode("utf-8")))
                    else:
                        # TODO(dcramer): legacy pickle support - remove in Sentry 9.1
                        extra_values[k[2:]] = pickle.loads(v)
                elif k == "s":
                    signal_only = bool(int(v))  # Should be 1 if set

            super().process(model, incr_values, filters, extra_values,
                            signal_only)
        finally:
            client.delete(lock_key)
Пример #57
0
 def filter(self, record):
     # TODO(mattrobenolt): handle an upstream Sentry
     metrics.incr("internal.uncaptured.logs", skip_internal=False)
     return is_current_event_safe()
Пример #58
0
def process_event(event_manager, project, key, remote_addr, helper,
                  attachments):
    event_received.send_robust(ip=remote_addr,
                               project=project,
                               sender=process_event)

    start_time = time()

    data = event_manager.get_data()
    should_filter, filter_reason = event_manager.should_filter()
    del event_manager

    event_id = data['event_id']

    if should_filter:
        track_outcome(project.organization_id,
                      project.id,
                      key.id,
                      Outcome.FILTERED,
                      filter_reason,
                      event_id=event_id)
        metrics.incr('events.blacklisted',
                     tags={'reason': filter_reason},
                     skip_internal=False)
        event_filtered.send_robust(
            ip=remote_addr,
            project=project,
            sender=process_event,
        )
        raise APIForbidden('Event dropped due to filter: %s' %
                           (filter_reason, ))

    # TODO: improve this API (e.g. make RateLimit act on __ne__)
    rate_limit = safe_execute(quotas.is_rate_limited,
                              project=project,
                              key=key,
                              _with_transaction=False)
    if isinstance(rate_limit, bool):
        rate_limit = RateLimit(is_limited=rate_limit, retry_after=None)

    # XXX(dcramer): when the rate limiter fails we drop events to ensure
    # it cannot cascade
    if rate_limit is None or rate_limit.is_limited:
        if rate_limit is None:
            api_logger.debug('Dropped event due to error with rate limiter')

        reason = rate_limit.reason_code if rate_limit else None
        track_outcome(project.organization_id,
                      project.id,
                      key.id,
                      Outcome.RATE_LIMITED,
                      reason,
                      event_id=event_id)
        metrics.incr(
            'events.dropped',
            tags={
                'reason': reason or 'unknown',
            },
            skip_internal=False,
        )
        event_dropped.send_robust(
            ip=remote_addr,
            project=project,
            reason_code=reason,
            sender=process_event,
        )
        if rate_limit is not None:
            raise APIRateLimited(rate_limit.retry_after)

    org_options = OrganizationOption.objects.get_all_values(
        project.organization_id)

    # TODO(dcramer): ideally we'd only validate this if the event_id was
    # supplied by the user
    cache_key = 'ev:%s:%s' % (
        project.id,
        event_id,
    )

    if cache.get(cache_key) is not None:
        track_outcome(project.organization_id,
                      project.id,
                      key.id,
                      Outcome.INVALID,
                      'duplicate',
                      event_id=event_id)
        raise APIForbidden('An event with the same ID already exists (%s)' %
                           (event_id, ))

    scrub_ip_address = (
        org_options.get('sentry:require_scrub_ip_address', False)
        or project.get_option('sentry:scrub_ip_address', False))
    scrub_data = (org_options.get('sentry:require_scrub_data', False)
                  or project.get_option('sentry:scrub_data', True))

    if scrub_data:
        # We filter data immediately before it ever gets into the queue
        sensitive_fields_key = 'sentry:sensitive_fields'
        sensitive_fields = (org_options.get(sensitive_fields_key, []) +
                            project.get_option(sensitive_fields_key, []))

        exclude_fields_key = 'sentry:safe_fields'
        exclude_fields = (org_options.get(exclude_fields_key, []) +
                          project.get_option(exclude_fields_key, []))

        scrub_defaults = (org_options.get('sentry:require_scrub_defaults',
                                          False)
                          or project.get_option('sentry:scrub_defaults', True))

        SensitiveDataFilter(
            fields=sensitive_fields,
            include_defaults=scrub_defaults,
            exclude_fields=exclude_fields,
        ).apply(data)

    if scrub_ip_address:
        # We filter data immediately before it ever gets into the queue
        helper.ensure_does_not_have_ip(data)

    # mutates data (strips a lot of context if not queued)
    helper.insert_data_to_database(data,
                                   start_time=start_time,
                                   attachments=attachments)

    cache.set(cache_key, '', 60 * 5)

    api_logger.debug('New event received (%s)', event_id)

    event_accepted.send_robust(
        ip=remote_addr,
        data=data,
        project=project,
        sender=process_event,
    )

    return event_id
Пример #59
0
    def post(self, request, organization):
        """
        Add a Member to Organization
        ````````````````````````````

        Invite a member to the organization.

        :pparam string organization_slug: the slug of the organization the member will belong to
        :param string email: the email address to invite
        :param string role: the role of the new member
        :param array teams: the slugs of the teams the member should belong to.

        :auth: required
        """
        if not features.has("organizations:invite-members",
                            organization,
                            actor=request.user):
            return Response(
                {
                    "organization":
                    "Your organization is not allowed to invite members"
                },
                status=403)

        _, allowed_roles = get_allowed_roles(request, organization)

        serializer = OrganizationMemberSerializer(
            data=request.data,
            context={
                "organization": organization,
                "allowed_roles": allowed_roles,
                "allow_existing_invite_request": True,
            },
        )

        if not serializer.is_valid():
            return Response(serializer.errors, status=400)

        result = serializer.validated_data

        if ratelimits.for_organization_member_invite(
                organization=organization,
                email=result["email"],
                user=request.user,
                auth=request.auth,
        ):
            metrics.incr(
                "member-invite.attempt",
                instance="rate_limited",
                skip_internal=True,
                sample_rate=1.0,
            )
            return Response({"detail": ERR_RATE_LIMITED}, status=429)

        with transaction.atomic():
            # remove any invitation requests for this email before inviting
            OrganizationMember.objects.filter(
                Q(invite_status=InviteStatus.REQUESTED_TO_BE_INVITED.value)
                | Q(invite_status=InviteStatus.REQUESTED_TO_JOIN.value),
                email=result["email"],
                organization=organization,
            ).delete()

            om = OrganizationMember(
                organization=organization,
                email=result["email"],
                role=result["role"],
                inviter=request.user,
            )

            if settings.SENTRY_ENABLE_INVITES:
                om.token = om.generate_token()
            om.save()

        if result["teams"]:
            lock = locks.get(f"org:member:{om.id}", duration=5)
            with TimedRetryPolicy(10)(lock.acquire):
                save_team_assignments(om, result["teams"])

        if settings.SENTRY_ENABLE_INVITES and result.get("sendInvite"):
            om.send_invite_email()
            member_invited.send_robust(member=om,
                                       user=request.user,
                                       sender=self,
                                       referrer=request.data.get("referrer"))

        self.create_audit_entry(
            request=request,
            organization_id=organization.id,
            target_object=om.id,
            data=om.get_audit_log_data(),
            event=AuditLogEntryEvent.MEMBER_INVITE if
            settings.SENTRY_ENABLE_INVITES else AuditLogEntryEvent.MEMBER_ADD,
        )

        return Response(serialize(om), status=201)
Пример #60
0
    def save(self, project_id, raw=False, assume_normalized=False):
        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        data = self._data

        project = Project.objects.get_from_cache(id=project_id)
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

        # Check to make sure we're not about to do a bunch of work that's
        # already been done if we've processed an event with this ID. (This
        # isn't a perfect solution -- this doesn't handle ``EventMapping`` and
        # there's a race condition between here and when the event is actually
        # saved, but it's an improvement. See GH-7677.)
        try:
            event = Event.objects.get(
                project_id=project.id,
                event_id=data['event_id'],
            )
        except Event.DoesNotExist:
            pass
        else:
            # Make sure we cache on the project before returning
            event._project_cache = project
            logger.info(
                'duplicate.found',
                exc_info=True,
                extra={
                    'event_uuid': data['event_id'],
                    'project_id': project.id,
                    'model': Event.__name__,
                }
            )
            return event

        # Pull out the culprit
        culprit = self.get_culprit()

        # Pull the toplevel data we're interested in
        level = data.get('level')

        # TODO(mitsuhiko): this code path should be gone by July 2018.
        # This is going to be fine because no code actually still depends
        # on integers here.  When we need an integer it will be converted
        # into one later.  Old workers used to send integers here.
        if level is not None and isinstance(level, six.integer_types):
            level = LOG_LEVELS[level]

        transaction_name = data.get('transaction')
        logger_name = data.get('logger')
        release = data.get('release')
        dist = data.get('dist')
        environment = data.get('environment')
        recorded_timestamp = data.get('timestamp')

        # We need to swap out the data with the one internal to the newly
        # created event object
        event = self._get_event_instance(project_id=project_id)
        self._data = data = event.data.data

        event._project_cache = project

        date = event.datetime
        platform = event.platform
        event_id = event.event_id

        if transaction_name:
            transaction_name = force_text(transaction_name)

        # Some of the data that are toplevel attributes are duplicated
        # into tags (logger, level, environment, transaction).  These are
        # different from legacy attributes which are normalized into tags
        # ahead of time (site, server_name).
        setdefault_path(data, 'tags', value=[])
        set_tag(data, 'level', level)
        if logger_name:
            set_tag(data, 'logger', logger_name)
        if environment:
            set_tag(data, 'environment', environment)
        if transaction_name:
            set_tag(data, 'transaction', transaction_name)

        if release:
            # dont allow a conflicting 'release' tag
            pop_tag(data, 'release')
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )
            set_tag(data, 'sentry:release', release.version)

        if dist and release:
            dist = release.add_dist(dist, date)
            # dont allow a conflicting 'dist' tag
            pop_tag(data, 'dist')
            set_tag(data, 'sentry:dist', dist.name)
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            pop_tag(data, 'user')
            set_tag(data, 'sentry:user', event_user.tag_value)

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        grouping_config = load_grouping_config(
            get_grouping_config_dict_for_event_data(data, project))
        normalize_stacktraces_for_grouping(data, grouping_config)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    if get_tag(data, key) is None:
                        set_tag(data, key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                set_tag(data, k, v)
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.path, None)

        # The active grouping config was put into the event in the
        # normalize step before.  We now also make sure that the
        # fingerprint was set to `'{{ default }}' just in case someone
        # removed it from the payload.  The call to get_hashes will then
        # look at `grouping_config` to pick the right paramters.
        data['fingerprint'] = data.get('fingerprint') or ['{{ default }}']
        apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project))

        # Here we try to use the grouping config that was requested in the
        # event.  If that config has since been deleted (because it was an
        # experimental grouping config) we fall back to the default.
        try:
            hashes = event.get_hashes()
        except GroupingConfigNotFound:
            data['grouping_config'] = get_grouping_config_dict_for_project(project)
            hashes = event.get_hashes()

        data['hashes'] = hashes

        # we want to freeze not just the metadata and type in but also the
        # derived attributes.  The reason for this is that we push this
        # data into kafka for snuba processing and our postprocessing
        # picks up the data right from the snuba topic.  For most usage
        # however the data is dynamically overriden by Event.title and
        # Event.location (See Event.as_dict)
        materialized_metadata = self.materialize_metadata()
        event_metadata = materialized_metadata['metadata']
        data.update(materialized_metadata)
        data['culprit'] = culprit

        # index components into ``Event.message``
        # See GH-3248
        event.message = self.get_search_message(event_metadata, culprit)
        received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s'))

        # The group gets the same metadata as the event when it's flushed but
        # additionally the `last_received` key is set.  This key is used by
        # _save_aggregate.
        group_metadata = dict(materialized_metadata)
        group_metadata['last_received'] = received_timestamp
        kwargs = {
            'platform': platform,
            'message': event.message,
            'culprit': culprit,
            'logger': logger_name,
            'level': LOG_LEVELS_MAP.get(level),
            'last_seen': date,
            'first_seen': date,
            'active_at': date,
            'data': group_metadata,
        }

        if release:
            kwargs['first_release'] = release

        try:
            group, is_new, is_regression, is_sample = self._save_aggregate(
                event=event, hashes=hashes, release=release, **kwargs
            )
        except HashDiscarded:
            event_discarded.send_robust(
                project=project,
                sender=EventManager,
            )

            metrics.incr(
                'events.discarded',
                skip_internal=True,
                tags={
                    'organization_id': project.organization_id,
                    'platform': platform,
                },
            )
            raise
        else:
            event_saved.send_robust(
                project=project,
                event_size=event.size,
                sender=EventManager,
            )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        # When an event was sampled, the canonical source of truth
        # is the EventMapping table since we aren't going to be writing out an actual
        # Event row. Otherwise, if the Event isn't being sampled, we can safely
        # rely on the Event table itself as the source of truth and ignore
        # EventMapping since it's redundant information.
        if is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(EventMapping)):
                    EventMapping.objects.create(project=project, group=group, event_id=event_id)
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': EventMapping.__name__,
                    }
                )
                return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
            group_id=group.id,
            environment_id=environment.id,
            defaults={
                'first_release': release if release else None,
            },
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            ReleaseProjectEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(
            group=group,
            environment=environment,
        )

        # Update any event attachment that arrived before the event group was defined.
        EventAttachment.objects.filter(
            project_id=project.id,
            event_id=event_id,
        ).update(
            group_id=group.id,
        )

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': Event.__name__,
                    }
                )
                return event

            tagstore.delay_index_event_tags(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                environment_id=environment.id,
                event_id=event.id,
                tags=event.tags,
                date_added=event.datetime,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )),
                ),
                timestamp=event.datetime,
                environment_id=environment.id,
            )
        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject, {'new_groups': 1}, {
                        'release_id': release.id,
                        'project_id': project.id,
                    }
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment, {'new_issues_count': 1}, {
                        'project_id': project.id,
                        'release_id': release.id,
                        'environment_id': environment.id,
                    }
                )

        safe_execute(
            Group.objects.add_tags,
            group,
            environment,
            event.get_tags(),
            _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project, group=group, sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_sample=is_sample,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        metrics.timing(
            'events.latency',
            received_timestamp - recorded_timestamp,
            tags={
                'project_id': project.id,
            },
        )

        metrics.timing(
            'events.size.data.post_save',
            event.size,
            tags={'project_id': project.id}
        )

        return event