def after(self, event, state): service = self.get_option('service') extra = {'event_id': event.id} if not service: self.logger.info('rules.fail.is_configured', extra=extra) return app = None try: app = SentryApp.objects.get(slug=service) except SentryApp.DoesNotExist: pass if app: kwargs = {'sentry_app': app} metrics.incr('notifications.sent', instance=app.slug, skip_internal=False) yield self.future(notify_sentry_app, **kwargs) else: plugin = plugins.get(service) if not plugin.is_enabled(self.project): extra['project_id'] = self.project.id self.logger.info('rules.fail.is_enabled', extra=extra) return group = event.group if not plugin.should_notify(group=group, event=event): extra['group_id'] = group.id self.logger.info('rule.fail.should_notify', extra=extra) return metrics.incr('notifications.sent', instance=plugin.slug, skip_internal=False) yield self.future(plugin.rule_notify)
def get_interfaces(self): was_renormalized = _should_skip_to_python(self.event_id) metrics.incr('event.get_interfaces', tags={'rust_renormalized': was_renormalized}) return CanonicalKeyView(get_interfaces(self.data, rust_renormalized=was_renormalized))
def _request(self, method, path, headers=None, data=None, params=None, auth=None, json=True, allow_text=None, allow_redirects=None, timeout=None): if allow_text is None: allow_text = self.allow_text if allow_redirects is None: allow_redirects = self.allow_redirects if allow_redirects is None: # is still None allow_redirects = method.upper() == 'GET' if timeout is None: timeout = 30 full_url = self.build_url(path) host = urlparse(full_url).netloc session = build_session() try: resp = getattr(session, method.lower())( url=full_url, headers=headers, json=data if json else None, data=data if not json else None, params=params, auth=auth, verify=self.verify_ssl, allow_redirects=allow_redirects, timeout=timeout, ) resp.raise_for_status() except ConnectionError as e: metrics.incr('integrations.http_response', tags={ 'host': host, 'status': 'connection_error' }) raise ApiHostError.from_exception(e) except Timeout as e: metrics.incr('integrations.http_response', tags={ 'host': host, 'status': 'timeout' }) raise ApiTimeoutError.from_exception(e) except HTTPError as e: resp = e.response if resp is None: track_response_code(host, 'unknown') self.logger.exception('request.error', extra={ 'url': full_url, }) raise ApiError('Internal Error') track_response_code(host, resp.status_code) raise ApiError.from_response(resp) track_response_code(host, resp.status_code) if resp.status_code == 204: return {} return BaseApiResponse.from_response(resp, allow_text=allow_text)
def after(self, event, state): service = self.get_option('service') extra = { 'event_id': event.id } if not service: self.logger.info('rules.fail.is_configured', extra=extra) return plugin = plugins.get(service) if not plugin.is_enabled(self.project): extra['project_id'] = self.project.id self.logger.info('rules.fail.is_enabled', extra=extra) return group = event.group if not plugin.should_notify(group=group, event=event): extra['group_id'] = group.id self.logger.info('rule.fail.should_notify', extra=extra) return metrics.incr('notifications.sent', instance=plugin.slug) yield self.future(plugin.rule_notify)
def preprocess_event(cache_key=None, data=None, start_time=None, **kwargs): from sentry.plugins import plugins if cache_key: data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'}) error_logger.error('preprocess.failed.empty', extra={'cache_key': cache_key}) return project = data['project'] Raven.tags_context({ 'project': project, }) # Iterate over all plugins looking for processors based on the input data # plugins should yield a processor function only if it actually can operate # on the input data, otherwise it should yield nothing for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in (processors or ()): # On the first processor found, we just defer to the process_event # queue to handle the actual work. process_event.delay(cache_key=cache_key, start_time=start_time) return # If we get here, that means the event had no preprocessing needed to be done # so we can jump directly to save_event if cache_key: data = None save_event.delay(cache_key=cache_key, data=data, start_time=start_time)
def _do_preprocess_event(cache_key, data, start_time, event_id, process_event): if cache_key: data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'}, skip_internal=False) error_logger.error('preprocess.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project = data['project'] with configure_scope() as scope: scope.set_tag("project", project) if should_process(data): process_event.delay(cache_key=cache_key, start_time=start_time, event_id=event_id) return # If we get here, that means the event had no preprocessing needed to be done # so we can jump directly to save_event if cache_key: data = None save_event.delay( cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project_id=project )
def task_revoked_handler(task, expired=False, **kwargs): if not isinstance(task, basestring): task = _get_task_name(task) if expired: metrics.incr('jobs.expired', instance=task) else: metrics.incr('jobs.revoked', instance=task)
def create_failed_event(cache_key, project, issues, event_id): """If processing failed we put the original data from the cache into a raw event. """ # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project, event_id) data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'}) error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key}) return from sentry.models import RawEvent, ProcessingIssue raw_event = RawEvent.objects.create( project_id=project, event_id=event_id, datetime=datetime.utcfromtimestamp( data['timestamp']).replace(tzinfo=timezone.utc), data=data ) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue['scope'], object=issue['object'], type=issue['type'], data=issue['data'], ) default_cache.delete(cache_key)
def process(self, key): client = self.cluster.get_routing_client() lock_key = self._make_lock_key(key) # prevent a stampede due to the way we use celery etas + duplicate # tasks if not client.set(lock_key, '1', nx=True, ex=10): metrics.incr('buffer.revoked', tags={'reason': 'locked'}) self.logger.info('Skipped process on %s; unable to get lock', key) return conn = self.cluster.get_local_client_for_key(key) pipe = conn.pipeline() pipe.hgetall(key) pipe.zrem(self.pending_key, key) pipe.delete(key) values = pipe.execute()[0] if not values: metrics.incr('buffer.revoked', tags={'reason': 'empty'}) self.logger.info('Skipped process on %s; no values found', key) return model = import_string(values['m']) filters = pickle.loads(values['f']) incr_values = {} extra_values = {} for k, v in values.iteritems(): if k.startswith('i+'): incr_values[k[2:]] = int(v) elif k.startswith('e+'): extra_values[k[2:]] = pickle.loads(v) super(RedisBuffer, self).process(model, incr_values, filters, extra_values)
def _do_preprocess_event(cache_key, data, start_time, event_id, process_event): if cache_key: data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'}) error_logger.error('preprocess.failed.empty', extra={'cache_key': cache_key}) return project = data['project'] Raven.tags_context({ 'project': project, }) if should_process(data): process_event.delay(cache_key=cache_key, start_time=start_time, event_id=event_id) return # If we get here, that means the event had no preprocessing needed to be done # so we can jump directly to save_event if cache_key: data = None save_event.delay(cache_key=cache_key, data=data, start_time=start_time, event_id=event_id)
def save_event(cache_key=None, data=None, start_time=None, event_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import EventManager if cache_key: data = default_cache.get(cache_key) if event_id is None and data is not None: event_id = data['event_id'] if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'}) return project = data.pop('project') delete_raw_event(project, event_id) Raven.tags_context({ 'project': project, }) try: manager = EventManager(data) manager.save(project) finally: if cache_key: default_cache.delete(cache_key) if start_time: metrics.timing('events.time-to-process', time() - start_time, instance=data['platform'])
def __init__(self, *args, **kwargs): StacktraceProcessor.__init__(self, *args, **kwargs) # If true, the project has been opted into using the symbolicator # service for native symbolication, which also means symbolic is not # used at all anymore. # The (iOS) symbolserver is still used regardless of this value. self.use_symbolicator = _is_symbolicator_enabled(self.project, self.data) metrics.incr('native.use_symbolicator', tags={'value': self.use_symbolicator}) self.arch = cpu_name_from_data(self.data) self.signal = signal_from_data(self.data) self.sym = None self.difs_referenced = set() images = get_path(self.data, 'debug_meta', 'images', default=(), filter=self._is_valid_image) if images: self.available = True self.sdk_info = get_sdk_from_event(self.data) self.object_lookup = ObjectLookup(images) self.images = images else: self.available = False
def preprocess_event(cache_key=None, data=None, start_time=None, **kwargs): from sentry.plugins import plugins if cache_key: data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'}) logger.error('Data not available in preprocess_event (cache_key=%s)', cache_key) return project = data['project'] # TODO(dcramer): ideally we would know if data changed by default has_changed = False for plugin in plugins.all(version=2): for processor in (safe_execute(plugin.get_event_preprocessors) or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed and cache_key: default_cache.set(cache_key, data, 3600) if cache_key: data = None save_event.delay(cache_key=cache_key, data=data, start_time=start_time)
def send_async(self, to=None, cc=None, bcc=None): from sentry.tasks.email import send_email fmt = options.get('system.logging-format') messages = self.get_built_messages(to, cc=cc, bcc=bcc) extra = {'message_type': self.type} loggable = [v for k, v in six.iteritems(self.context) if hasattr(v, 'id')] for context in loggable: extra['%s_id' % type(context).__name__.lower()] = context.id log_mail_queued = partial(logger.info, 'mail.queued', extra=extra) for message in messages: safe_execute( send_email.delay, message=message, _with_transaction=False, ) extra['message_id'] = message.extra_headers['Message-Id'] metrics.incr('email.queued', instance=self.type) if fmt == LoggingFormat.HUMAN: extra['message_to'] = self.format_to(message.to), log_mail_queued() elif fmt == LoggingFormat.MACHINE: for recipient in message.to: extra['message_to'] = recipient log_mail_queued()
def process_event(cache_key, start_time=None, **kwargs): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'}) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return project = data['project'] Raven.tags_context({ 'project': project, }) # TODO(dcramer): ideally we would know if data changed by default has_changed = False for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: default_cache.set(cache_key, data, 3600) save_event.delay(cache_key=cache_key, data=None, start_time=start_time)
def send_messages(messages, fail_silently=False): connection = get_connection(fail_silently=fail_silently) sent = connection.send_messages(messages) metrics.incr("email.sent", len(messages)) for message in messages: logger.info(name="sentry.mail", event="mail.sent", message_id=message.extra_headers["Message-Id"]) return sent
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ reprocessing_active = ProjectOption.objects.get_value( project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT ) # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, 'sentry:sent_failed_event_hint', False ) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={'reprocessing_active': reprocessing_active, 'issues': issues}, ).send_notification() ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'}) error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key}) return True from sentry.models import RawEvent, ProcessingIssue raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp(data['timestamp']).replace(tzinfo=timezone.utc), data=data ) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue['scope'], object=issue['object'], type=issue['type'], data=issue['data'], ) default_cache.delete(cache_key) return True
def send_messages(messages, fail_silently=False): connection = get_connection(fail_silently=fail_silently) sent = connection.send_messages(messages) metrics.incr('email.sent', len(messages)) for message in messages: extra = {'message_id': message.extra_headers['Message-Id']} logger.info('mail.sent', extra=extra) return sent
def send_messages(messages, fail_silently=False): connection = get_connection(fail_silently=fail_silently) sent = connection.send_messages(messages) metrics.incr("email.sent", len(messages)) for message in messages: extra = {"message_id": message.extra_headers["Message-Id"]} logger.info("mail.sent", extra=extra) return sent
def close(self): StacktraceProcessor.close(self) if self.dsyms_referenced: metrics.incr('dsyms.processed', amount=len(self.dsyms_referenced), instance=self.project.id) if self.sym is not None: self.sym.close() self.sym = None
def after(self, event, state): group = event.group for plugin in self.get_plugins(): if not safe_execute(plugin.should_notify, group=group, event=event): continue metrics.incr('notifications.sent', instance=plugin.slug) yield self.future(plugin.rule_notify)
def get_event_file_committers(project, event, frame_limit=25): # populate event data Event.objects.bind_nodes([event], 'data') group = Group.objects.get(id=event.group_id) first_release_version = group.get_first_release() if not first_release_version: raise Release.DoesNotExist releases = get_previous_releases(project, first_release_version) if not releases: raise Release.DoesNotExist commits = _get_commits(releases) if not commits: raise Commit.DoesNotExist frames = _get_frame_paths(event) app_frames = [frame for frame in frames if frame['in_app']][-frame_limit:] if not app_frames: app_frames = [frame for frame in frames][-frame_limit:] # TODO(maxbittker) return this set instead of annotated frames # XXX(dcramer): frames may not define a filepath. For example, in Java its common # to only have a module name/path path_set = {f for f in (frame.get('filename') or frame.get('abs_path') for frame in app_frames) if f} file_changes = [] if path_set: file_changes = _get_commit_file_changes(commits, path_set) commit_path_matches = { path: _match_commits_path(file_changes, path) for path in path_set } annotated_frames = [ { 'frame': frame, 'commits': commit_path_matches.get(frame.get('filename') or frame.get('abs_path')) or [] } for frame in app_frames ] relevant_commits = list( {match for match in commit_path_matches for match in commit_path_matches[match]} ) committers = _get_committers(annotated_frames, relevant_commits) metrics.incr( 'feature.owners.has-committers', instance='hit' if committers else 'miss', skip_internal=False) return committers
def close(self): StacktraceProcessor.close(self) if self.dsyms_referenced: metrics.incr( 'dsyms.processed', amount=len(self.dsyms_referenced), skip_internal=True, tags={ 'project_id': self.project.id, }, )
def send_messages(messages, fail_silently=False): connection = get_connection(fail_silently=fail_silently) sent = connection.send_messages(messages) metrics.incr('email.sent', len(messages), skip_internal=False) for message in messages: extra = { 'message_id': message.extra_headers['Message-Id'], 'size': len(message.message().as_bytes()), } logger.info('mail.sent', extra=extra) return sent
def send_messages(messages, fail_silently=False): connection = get_connection(fail_silently=fail_silently) sent = connection.send_messages(messages) metrics.incr('email.sent', len(messages)) for message in messages: logger.info( name='sentry.mail', event='mail.sent', message_id=message.extra_headers['Message-Id'], ) return sent
def close(self): StacktraceProcessor.close(self) if self.sourcemaps_touched: metrics.incr( 'sourcemaps.processed', amount=len(self.sourcemaps_touched), skip_internal=True, tags={ 'project_id': self.project.id, }, )
def after(self, event, state): group = event.group for plugin in self.get_plugins(): # plugin is now wrapped in the LegacyPluginService object plugin = plugin.service if not safe_execute( plugin.should_notify, group=group, event=event, _with_transaction=False ): continue metrics.incr('notifications.sent', instance=plugin.slug, skip_internal=False) yield self.future(plugin.rule_notify)
def set(self, key, attachments, timeout=None): key = self.make_key(key) for index, attachment in enumerate(attachments): compressed = zlib.compress(attachment.data) self.inner.set(u'{}:{}'.format(key, index), compressed, timeout, raw=True) metrics_tags = {'type': attachment.type} metrics.incr('attachments.received', tags=metrics_tags, skip_internal=False) metrics.timing('attachments.blob-size.raw', len(attachment.data), tags=metrics_tags) metrics.timing('attachments.blob-size.compressed', len(compressed), tags=metrics_tags) meta = [attachment.meta() for attachment in attachments] self.inner.set(key, meta, timeout, raw=False)
def _record_time(self, request, status_code): if not hasattr(request, "_view_path"): return metrics.incr( "view.response", instance=request._view_path, tags={"method": request.method, "status_code": status_code} ) if not hasattr(request, "_start_time"): return ms = int((time.time() - request._start_time) * 1000) metrics.timing("view.duration", ms, instance=request._view_path, tags={"method": request.method})
def emit(self, record, logger=get_logger()): """ Turn something like: > django.request.Forbidden (CSRF cookie not set.): /account into: > django.request.forbidden_csrf_cookie_not_set and track it as an incremented counter. """ key = record.name + '.' + record.getMessage() key = key.lower() key = whitespace_re.sub("_", key) key = metrics_badchars_re.sub("", key) key = ".".join(key.split(".")[:3]) metrics.incr(key)
def process(self, request, project, auth, helper, data, **kwargs): metrics.incr('events.total') remote_addr = request.META['REMOTE_ADDR'] event_received.send_robust( ip=remote_addr, sender=type(self), ) if not is_valid_ip(remote_addr, project): app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.project_total_blacklisted, project.id), (app.tsdb.models.organization_total_received, project.organization_id), (app.tsdb.models.organization_total_blacklisted, project.organization_id), ]) metrics.incr('events.blacklisted') raise APIForbidden('Blacklisted IP address: %s' % (remote_addr, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(app.quotas.is_rate_limited, project=project, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: helper.log.debug( 'Dropped event due to error with rate limiter') app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.project_total_rejected, project.id), (app.tsdb.models.organization_total_received, project.organization_id), (app.tsdb.models.organization_total_rejected, project.organization_id), ]) metrics.incr('events.dropped') if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.organization_total_received, project.organization_id), ]) content_encoding = request.META.get('HTTP_CONTENT_ENCODING', '') if isinstance(data, basestring): if content_encoding == 'gzip': data = helper.decompress_gzip(data) elif content_encoding == 'deflate': data = helper.decompress_deflate(data) elif not data.startswith('{'): data = helper.decode_and_decompress_data(data) data = helper.safely_load_json_string(data) # mutates data data = helper.validate_data(project, data) if 'sdk' not in data: sdk = helper.parse_client_as_sdk(auth.client) if sdk: data['sdk'] = sdk # mutates data manager = EventManager(data, version=auth.version) data = manager.normalize() org_options = OrganizationOption.objects.get_all_values( project.organization_id) if org_options.get('sentry:require_scrub_ip_address', False): scrub_ip_address = True else: scrub_ip_address = project.get_option('sentry:scrub_ip_address', False) # insert IP address if not available and wanted if not scrub_ip_address: helper.ensure_has_ip( data, remote_addr, set_if_missing=auth.is_public or data.get('platform') in ('javascript', 'cocoa', 'objc')) event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( project.id, event_id, ) if cache.get(cache_key) is not None: raise APIForbidden( 'An event with the same ID already exists (%s)' % (event_id, )) if org_options.get('sentry:require_scrub_data', False): scrub_data = True else: scrub_data = project.get_option('sentry:scrub_data', True) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = (org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, [])) if org_options.get('sentry:require_scrub_defaults', False): scrub_defaults = True else: scrub_defaults = project.get_option('sentry:scrub_defaults', True) inst = SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, ) inst.apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data) cache.set(cache_key, '', 60 * 5) helper.log.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=type(self), ) return event_id
def process_update(self, subscription_update): dataset = self.subscription.snuba_query.dataset if dataset == "events" and not features.has( "organizations:incidents", self.subscription.project.organization): # They have downgraded since these subscriptions have been created. So we just ignore updates for now. metrics.incr( "incidents.alert_rules.ignore_update_missing_incidents") return elif dataset == "transactions" and not features.has( "organizations:performance-view", self.subscription.project.organization): # They have downgraded since these subscriptions have been created. So we just ignore updates for now. metrics.incr( "incidents.alert_rules.ignore_update_missing_incidents_performance" ) return if not hasattr(self, "alert_rule"): # If the alert rule has been removed then just skip metrics.incr( "incidents.alert_rules.no_alert_rule_for_subscription") logger.error( "Received an update for a subscription, but no associated alert rule exists" ) # TODO: Delete subscription here. return if subscription_update["timestamp"] <= self.last_update: metrics.incr( "incidents.alert_rules.skipping_already_processed_update") return self.last_update = subscription_update["timestamp"] if len(subscription_update["values"]["data"]) > 1: logger.warning( "Subscription returned more than 1 row of data", extra={ "subscription_id": self.subscription.id, "dataset": self.subscription.snuba_query.dataset, "snuba_subscription_id": self.subscription.subscription_id, "result": subscription_update, }, ) aggregation_value = subscription_update["values"]["data"][0].values( )[0] alert_operator, resolve_operator = self.THRESHOLD_TYPE_OPERATORS[ AlertRuleThresholdType(self.alert_rule.threshold_type)] for trigger in self.triggers: if alert_operator( aggregation_value, trigger.alert_threshold) and not self.check_trigger_status( trigger, TriggerStatus.ACTIVE): metrics.incr("incidents.alert_rules.threshold", tags={"type": "alert"}) with transaction.atomic(): self.trigger_alert_threshold(trigger, aggregation_value) else: self.trigger_alert_counts[trigger.id] = 0 if (resolve_operator(aggregation_value, self.calculate_resolve_threshold()) and self.active_incident): self.rule_resolve_counts += 1 if self.rule_resolve_counts >= self.alert_rule.threshold_period: # TODO: Make sure we iterate over critical then warning in order. # Potentially also de-dupe actions that are identical. Maybe just # collect all actions, de-dupe and resolve all at once metrics.incr("incidents.alert_rules.threshold", tags={"type": "resolve"}) for trigger in self.triggers: if self.check_trigger_status(trigger, TriggerStatus.ACTIVE): with transaction.atomic(): self.trigger_resolve_threshold( trigger, aggregation_value) update_incident_status( self.active_incident, IncidentStatus.CLOSED, status_method=IncidentStatusMethod.RULE_TRIGGERED, date_closed=self.calculate_event_date_from_update_date( self.last_update), ) self.active_incident = None self.incident_triggers.clear() self.rule_resolve_counts = 0 else: self.rule_resolve_counts = 0 # We update the rule stats here after we commit the transaction. This guarantees # that we'll never miss an update, since we'll never roll back if the process # is killed here. The trade-off is that we might process an update twice. Mostly # this will have no effect, but if someone manages to close a triggered incident # before the next one then we might alert twice. self.update_alert_rule_stats()
def close(self): StacktraceProcessor.close(self) if self.sourcemaps_touched: metrics.incr("sourcemaps.processed", amount=len(self.sourcemaps_touched), skip_internal=True)
def forward_event(self, event, payload): if not (self.project_token and self.project_index and self.project_instance): metrics.incr( "integrations.splunk.forward-event.unconfigured", tags={ "project_id": event.project_id, "organization_id": event.project.organization_id, "event_type": event.get_event_type(), }, ) return False if self.host: payload["host"] = self.host client = self.get_client() try: # https://docs.splunk.com/Documentation/Splunk/7.2.3/Data/TroubleshootHTTPEventCollector client.request(payload) metrics.incr( "integrations.splunk.forward-event.success", tags={ "project_id": event.project_id, "organization_id": event.project.organization_id, "event_type": event.get_event_type(), }, ) return True except Exception as exc: metric = "integrations.splunk.forward-event.error" metrics.incr( metric, tags={ "project_id": event.project_id, "organization_id": event.project.organization_id, "event_type": event.get_event_type(), }, ) logger.info( metric, extra={ "instance": self.project_instance, "project_id": event.project_id, "organization_id": event.project.organization_id, "error": str(exc), }, ) if isinstance( exc, ( ApiHostError, ApiTimeoutError, ), ): # The above errors are already handled by the API client. # Just log and return. return False if isinstance(exc, ApiError) and exc.code == 403: # 403s are not errors or actionable for us do not re-raise return False raise
def _request( self, method, path, headers=None, data=None, params=None, auth=None, json=True, allow_text=None, allow_redirects=None, timeout=None, ): if allow_text is None: allow_text = self.allow_text if allow_redirects is None: allow_redirects = self.allow_redirects if allow_redirects is None: # is still None allow_redirects = method.upper() == "GET" if timeout is None: timeout = 30 full_url = self.build_url(path) session = build_session() metrics.incr( u"%s.http_request" % self.datadog_prefix, sample_rate=1.0, tags={self.integration_type: self.name}, ) try: with sentry_sdk.configure_scope() as scope: parent_span_id = scope.span.span_id trace_id = scope.span.trace_id except AttributeError: parent_span_id = None trace_id = None with sentry_sdk.start_transaction( op=u"{}.http".format(self.integration_type), name=u"{}.http_response.{}".format(self.integration_type, self.name), parent_span_id=parent_span_id, trace_id=trace_id, sampled=True, ) as span: try: resp = getattr(session, method.lower())( url=full_url, headers=headers, json=data if json else None, data=data if not json else None, params=params, auth=auth, verify=self.verify_ssl, allow_redirects=allow_redirects, timeout=timeout, ) resp.raise_for_status() except ConnectionError as e: self.track_response_data("connection_error", span, e) raise ApiHostError.from_exception(e) except Timeout as e: self.track_response_data("timeout", span, e) raise ApiTimeoutError.from_exception(e) except HTTPError as e: resp = e.response if resp is None: self.track_response_data("unknown", span, e) self.logger.exception( "request.error", extra={self.integration_type: self.name, "url": full_url} ) raise ApiError("Internal Error", url=full_url) self.track_response_data(resp.status_code, span, e) raise ApiError.from_response(resp, url=full_url) self.track_response_data(resp.status_code, span, None, resp) if resp.status_code == 204: return {} return BaseApiResponse.from_response(resp, allow_text=allow_text)
def build_handler(self, incident, project): type = AlertRuleTriggerAction.Type(self.type) if type in self._type_registrations: return self._type_registrations[type].handler(self, incident, project) else: metrics.incr("alert_rule_trigger.unhandled_type.{}".format(self.type))
def dispatch(self, request, project_id=None, *args, **kwargs): helper = self.helper_cls( agent=request.META.get('HTTP_USER_AGENT'), project_id=project_id, ip_address=request.META['REMOTE_ADDR'], ) origin = None if kafka_publisher is not None: self._publish_to_kafka(request) try: origin = helper.origin_from_request(request) response = self._dispatch(request, helper, project_id=project_id, origin=origin, *args, **kwargs) except APIError as e: context = { 'error': force_bytes(e.msg, errors='replace'), } if e.name: context['error_name'] = e.name response = HttpResponse(json.dumps(context), content_type='application/json', status=e.http_status) # Set X-Sentry-Error as in many cases it is easier to inspect the headers response['X-Sentry-Error'] = context['error'] if isinstance(e, APIRateLimited) and e.retry_after is not None: response['Retry-After'] = six.text_type( int(math.ceil(e.retry_after))) except Exception as e: # TODO(dcramer): test failures are not outputting the log message # here if settings.DEBUG: content = traceback.format_exc() else: content = '' logger.exception(e) response = HttpResponse(content, content_type='text/plain', status=500) # TODO(dcramer): it'd be nice if we had an incr_multi method so # tsdb could optimize this metrics.incr('client-api.all-versions.requests') metrics.incr('client-api.all-versions.responses.%s' % (response.status_code, )) metrics.incr('client-api.all-versions.responses.%sxx' % (six.text_type(response.status_code)[0], )) if helper.context.version: metrics.incr('client-api.v%s.requests' % (helper.context.version, )) metrics.incr('client-api.v%s.responses.%s' % (helper.context.version, response.status_code)) metrics.incr('client-api.v%s.responses.%sxx' % (helper.context.version, six.text_type(response.status_code)[0])) if response.status_code != 200 and origin: # We allow all origins on errors response['Access-Control-Allow-Origin'] = '*' if origin: response['Access-Control-Allow-Headers'] = \ 'X-Sentry-Auth, X-Requested-With, Origin, Accept, ' \ 'Content-Type, Authentication' response['Access-Control-Allow-Methods'] = \ ', '.join(self._allowed_methods()) response['Access-Control-Expose-Headers'] = \ 'X-Sentry-Error, Retry-After' return response
def process(self, request, project, key, auth, helper, data, attachments=None, **kwargs): metrics.incr('events.total') if not data: raise APIError('No JSON data was found') remote_addr = request.META['REMOTE_ADDR'] event_mgr = EventManager( data, project=project, key=key, auth=auth, client_ip=remote_addr, user_agent=helper.context.agent, content_encoding=request.META.get('HTTP_CONTENT_ENCODING', ''), ) del data self.pre_normalize(event_mgr, helper) event_mgr.normalize() event_received.send_robust(ip=remote_addr, project=project, sender=type(self)) start_time = time() tsdb_start_time = to_datetime(start_time) should_filter, filter_reason = event_mgr.should_filter() if should_filter: increment_list = [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_blacklisted, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_blacklisted, key.id), ] try: increment_list.append( (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id)) # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES except KeyError: pass tsdb.incr_multi( increment_list, timestamp=tsdb_start_time, ) metrics.incr('events.blacklisted', tags={'reason': filter_reason}) event_filtered.send_robust( ip=remote_addr, project=project, sender=type(self), ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug( 'Dropped event due to error with rate limiter') tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_rejected, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_rejected, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_rejected, key.id), ], timestamp=tsdb_start_time, ) metrics.incr( 'events.dropped', tags={ 'reason': rate_limit.reason_code if rate_limit else 'unknown', }) event_dropped.send_robust( ip=remote_addr, project=project, sender=type(self), reason_code=rate_limit.reason_code if rate_limit else None, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.key_total_received, key.id), ], timestamp=tsdb_start_time, ) org_options = OrganizationOption.objects.get_all_values( project.organization_id) data = event_mgr.get_data() del event_mgr event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( project.id, event_id, ) if cache.get(cache_key) is not None: raise APIForbidden( 'An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = ( org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = (org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, [])) exclude_fields_key = 'sentry:safe_fields' exclude_fields = (org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, [])) scrub_defaults = ( org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=type(self), ) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] data_category = DataCategory.from_event_type(data.get("type")) if should_filter: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, category=data_category, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) # relay will no longer be able to provide information about filter # status so to see the impact we're adding a way to turn on relay # like behavior here. if options.get("store.lie-about-filter-status"): return event_id raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, category=data_category, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) # XXX(markus): I believe this code is extremely broken: # # * it practically uses memcached in prod which has no consistency # guarantees (no idea how we don't run into issues there) # # * a TTL of 1h basically doesn't guarantee any deduplication at all. It # just guarantees a good error message... for one hour. if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, category=data_category, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) data = scrub_data(project, dict(data)) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def assemble_download( data_export_id, export_limit=EXPORTED_ROWS_LIMIT, batch_size=SNUBA_MAX_RESULTS, offset=0, bytes_written=0, environment_id=None, **kwargs, ): with sentry_sdk.start_transaction( op="task.data_export.assemble", name="DataExportAssemble", sampled=True, ): first_page = offset == 0 try: if first_page: logger.info("dataexport.start", extra={"data_export_id": data_export_id}) data_export = ExportedData.objects.get(id=data_export_id) if first_page: metrics.incr("dataexport.start", tags={"success": True}, sample_rate=1.0) logger.info("dataexport.run", extra={ "data_export_id": data_export_id, "offset": offset }) except ExportedData.DoesNotExist as error: if first_page: metrics.incr("dataexport.start", tags={"success": False}, sample_rate=1.0) logger.exception(error) return with sentry_sdk.configure_scope() as scope: if data_export.user: user = {} if data_export.user.id: user["id"] = data_export.user.id if data_export.user.username: user["username"] = data_export.user.username if data_export.user.email: user["email"] = data_export.user.email scope.user = user scope.set_tag("organization.slug", data_export.organization.slug) scope.set_tag("export.type", ExportQueryType.as_str(data_export.query_type)) scope.set_extra("export.query", data_export.query_info) try: # ensure that the export limit is set and capped at EXPORTED_ROWS_LIMIT if export_limit is None: export_limit = EXPORTED_ROWS_LIMIT else: export_limit = min(export_limit, EXPORTED_ROWS_LIMIT) processor = get_processor(data_export, environment_id) with tempfile.TemporaryFile(mode="w+b") as tf: # XXX(python3): # # In python3 we write unicode strings (which is all the csv # module is able to do, it will NOT write bytes like in py2). # Because of this we use the codec getwriter to transform our # file handle to a stream writer that will encode to utf8. tfw = codecs.getwriter("utf-8")(tf) writer = csv.DictWriter(tfw, processor.header_fields, extrasaction="ignore") if first_page: writer.writeheader() # the position in the file at the end of the headers starting_pos = tf.tell() # the row offset relative to the start of the current task # this offset tells you the number of rows written during this batch fragment fragment_offset = 0 # the absolute row offset from the beginning of the export next_offset = offset + fragment_offset while True: # the number of rows to export in the next batch fragment fragment_row_count = min( batch_size, max(export_limit - next_offset, 1)) rows = process_rows(processor, data_export, fragment_row_count, next_offset) writer.writerows(rows) fragment_offset += len(rows) next_offset = offset + fragment_offset if (not rows or len(rows) < batch_size # the batch may exceed MAX_BATCH_SIZE but immediately stops or tf.tell() - starting_pos >= MAX_BATCH_SIZE): break tf.seek(0) new_bytes_written = store_export_chunk_as_blob( data_export, bytes_written, tf) bytes_written += new_bytes_written except ExportError as error: return data_export.email_failure(message=six.text_type(error)) except Exception as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) logger.error( "dataexport.error: %s", six.text_type(error), extra={ "query": data_export.payload, "org": data_export.organization_id }, ) capture_exception(error) try: current.retry() except MaxRetriesExceededError: metrics.incr( "dataexport.end", tags={ "success": False, "error": six.text_type(error) }, sample_rate=1.0, ) return data_export.email_failure( message="Internal processing failure") else: if (rows and len(rows) >= batch_size and new_bytes_written and next_offset < export_limit): assemble_download.delay( data_export_id, export_limit=export_limit, batch_size=batch_size, offset=next_offset, bytes_written=bytes_written, environment_id=environment_id, ) else: metrics.timing("dataexport.row_count", next_offset, sample_rate=1.0) metrics.timing("dataexport.file_size", bytes_written, sample_rate=1.0) merge_export_blobs.delay(data_export_id)
def merge_export_blobs(data_export_id, **kwargs): with sentry_sdk.start_transaction( op="task.data_export.merge", name="DataExportMerge", sampled=True, ): try: data_export = ExportedData.objects.get(id=data_export_id) except ExportedData.DoesNotExist as error: logger.exception(error) return with sentry_sdk.configure_scope() as scope: if data_export.user: user = {} if data_export.user.id: user["id"] = data_export.user.id if data_export.user.username: user["username"] = data_export.user.username if data_export.user.email: user["email"] = data_export.user.email scope.user = user scope.user = user scope.set_tag("organization.slug", data_export.organization.slug) scope.set_tag("export.type", ExportQueryType.as_str(data_export.query_type)) scope.set_extra("export.query", data_export.query_info) # adapted from `putfile` in `src/sentry/models/file.py` try: with transaction.atomic(): file = File.objects.create( name=data_export.file_name, type="export.csv", headers={"Content-Type": "text/csv"}, ) size = 0 file_checksum = sha1(b"") for export_blob in ExportedDataBlob.objects.filter( data_export=data_export).order_by("offset"): blob = export_blob.blob FileBlobIndex.objects.create(file=file, blob=blob, offset=size) size += blob.size blob_checksum = sha1(b"") for chunk in blob.getfile().chunks(): blob_checksum.update(chunk) file_checksum.update(chunk) if blob.checksum != blob_checksum.hexdigest(): raise AssembleChecksumMismatch("Checksum mismatch") file.size = size file.checksum = file_checksum.hexdigest() file.save() data_export.finalize_upload(file=file) time_elapsed = (timezone.now() - data_export.date_added).total_seconds() metrics.timing("dataexport.duration", time_elapsed, sample_rate=1.0) logger.info("dataexport.end", extra={"data_export_id": data_export_id}) metrics.incr("dataexport.end", tags={"success": True}, sample_rate=1.0) except Exception as error: metrics.incr("dataexport.error", tags={"error": six.text_type(error)}, sample_rate=1.0) metrics.incr( "dataexport.end", tags={ "success": False, "error": six.text_type(error) }, sample_rate=1.0, ) logger.error( "dataexport.error: %s", six.text_type(error), extra={ "query": data_export.payload, "org": data_export.organization_id }, ) capture_exception(error) if isinstance(error, IntegrityError): message = "Failed to save the assembled file." else: message = "Internal processing failure." return data_export.email_failure(message=message)
def handle_basic_auth(self, request, **kwargs): can_register = self.can_register(request) op = request.POST.get("op") organization = kwargs.pop("organization", None) if not op: # Detect that we are on the register page by url /register/ and # then activate the register tab by default. if "/register" in request.path_info and can_register: op = "register" elif request.GET.get("op") == "sso": op = "sso" login_form = self.get_login_form(request) if can_register: register_form = self.get_register_form( request, initial={"username": request.session.get("invite_email", "")} ) else: register_form = None if can_register and register_form.is_valid(): user = register_form.save() user.send_confirm_emails(is_new_user=True) # HACK: grab whatever the first backend is and assume it works user.backend = settings.AUTHENTICATION_BACKENDS[0] auth.login(request, user, organization_id=organization.id if organization else None) # can_register should only allow a single registration request.session.pop("can_register", None) request.session.pop("invite_email", None) # Attempt to directly accept any pending invites invite_helper = ApiInviteHelper.from_cookie(request=request, instance=self) if invite_helper and invite_helper.valid_request: invite_helper.accept_invite() response = self.redirect_to_org(request) remove_invite_cookie(request, response) return response return self.redirect(auth.get_login_redirect(request)) elif request.method == "POST": from sentry.app import ratelimiter from sentry.utils.hashlib import md5_text login_attempt = ( op == "login" and request.POST.get("username") and request.POST.get("password") ) if login_attempt and ratelimiter.is_limited( u"auth:login:username:{}".format( md5_text(request.POST["username"].lower()).hexdigest() ), limit=10, window=60, # 10 per minute should be enough for anyone ): login_form.errors["__all__"] = [ u"You have made too many login attempts. Please try again later." ] metrics.incr( "login.attempt", instance="rate_limited", skip_internal=True, sample_rate=1.0 ) elif login_form.is_valid(): user = login_form.get_user() auth.login(request, user, organization_id=organization.id if organization else None) metrics.incr( "login.attempt", instance="success", skip_internal=True, sample_rate=1.0 ) if not user.is_active: return self.redirect(reverse("sentry-reactivate-account")) return self.redirect(auth.get_login_redirect(request)) else: metrics.incr( "login.attempt", instance="failure", skip_internal=True, sample_rate=1.0 ) context = { "op": op or "login", "server_hostname": get_server_hostname(), "login_form": login_form, "organization": organization, "register_form": register_form, "CAN_REGISTER": can_register, "join_request_link": self.get_join_request_link(organization), } context.update(additional_context.run_callbacks(request)) return self.respond_login(request, context, **kwargs)
def put(self, request: Request, organization, member_id) -> Response: try: om = self._get_member(request, organization, member_id) except OrganizationMember.DoesNotExist: raise ResourceDoesNotExist serializer = OrganizationMemberSerializer(data=request.data, partial=True) if not serializer.is_valid(): return Response(status=400) try: auth_provider = AuthProvider.objects.get(organization=organization) auth_provider = auth_provider.get_provider() except AuthProvider.DoesNotExist: auth_provider = None allowed_roles = None result = serializer.validated_data # XXX(dcramer): if/when this expands beyond reinvite we need to check # access level if result.get("reinvite"): if om.is_pending: if ratelimits.for_organization_member_invite( organization=organization, email=om.email, user=request.user, auth=request.auth, ): metrics.incr( "member-invite.attempt", instance="rate_limited", skip_internal=True, sample_rate=1.0, ) return Response({"detail": ERR_RATE_LIMITED}, status=429) if result.get("regenerate"): if request.access.has_scope("member:admin"): om.regenerate_token() om.save() else: return Response({"detail": ERR_INSUFFICIENT_SCOPE}, status=400) if om.token_expired: return Response({"detail": ERR_EXPIRED}, status=400) om.send_invite_email() elif auth_provider and not getattr(om.flags, "sso:linked"): om.send_sso_link_email(request.user, auth_provider) else: # TODO(dcramer): proper error message return Response({"detail": ERR_UNINVITABLE}, status=400) if "teams" in result: # dupe code from member_index # ensure listed teams are real teams teams = list( Team.objects.filter( organization=organization, status=TeamStatus.VISIBLE, slug__in=result["teams"] ) ) if len(set(result["teams"])) != len(teams): return Response({"teams": "Invalid team"}, status=400) with transaction.atomic(): # teams may be empty OrganizationMemberTeam.objects.filter(organizationmember=om).delete() OrganizationMemberTeam.objects.bulk_create( [OrganizationMemberTeam(team=team, organizationmember=om) for team in teams] ) if result.get("role"): _, allowed_roles = get_allowed_roles(request, organization) allowed_role_ids = {r.id for r in allowed_roles} # A user cannot promote others above themselves if result["role"] not in allowed_role_ids: return Response( {"role": "You do not have permission to assign the given role."}, status=403 ) # A user cannot demote a superior if om.role not in allowed_role_ids: return Response( {"role": "You do not have permission to assign a role to the given user."}, status=403, ) if om.user == request.user and (result["role"] != om.role): return Response({"detail": "You cannot make changes to your own role."}, status=400) om.update(role=result["role"]) self.create_audit_entry( request=request, organization=organization, target_object=om.id, target_user=om.user, event=AuditLogEntryEvent.MEMBER_EDIT, data=om.get_audit_log_data(), ) context = self._serialize_member(om, request, allowed_roles) return Response(context)
def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) existing_group_id = None for h in all_hashes: if h.group_id is not None: existing_group_id = h.group_id break if h.group_tombstone_id is not None: raise HashDiscarded('Matches group tombstone %s' % h.group_tombstone_id) # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: # it's possible the release was deleted between # when we queried for the release and now, so # make sure it still exists first_release = kwargs.pop('first_release', None) with transaction.atomic(): short_id = project.next_short_id() group, group_is_new = Group.objects.create( project=project, short_id=short_id, first_release_id=Release.objects.filter( id=first_release.id, ).values_list('id', flat=True).first() if first_release else None, **kwargs ), True metrics.incr( 'group.created', skip_internal=True, tags={'platform': event.platform or 'unknown'} ) else: group = Group.objects.get(id=existing_group_id) group_is_new = False # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h for h in all_hashes if h.group_id is None] if new_hashes: # XXX: There is a race condition here wherein another process could # create a new group that is associated with one of the new hashes, # add some event(s) to it, and then subsequently have the hash # "stolen" by this process. This then "orphans" those events from # their "siblings" in the group we've created here. We don't have a # way to fix this, since we can't update the group on those hashes # without filtering on `group_id` (which we can't do due to query # planner weirdness.) For more context, see 84c6f75a and d0e22787, # as well as GH-5085. GroupHash.objects.filter( id__in=[h.id for h in new_hashes], ).exclude( state=GroupHash.State.LOCKED_IN_MIGRATION, ).update(group=group) if group_is_new and len(new_hashes) == len(all_hashes): is_new = True # XXX(dcramer): it's important this gets called **before** the aggregate # is processed as otherwise values like last_seen will get mutated can_sample = ( features.has('projects:sample-events', project=project) and should_sample( event.data.get('received') or float(event.datetime.strftime('%s')), group.data.get('last_received') or float(group.last_seen.strftime('%s')), group.times_seen, ) ) if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release, ) else: is_regression = False # Determine if we've sampled enough data to store this event if is_new or is_regression: is_sample = False else: is_sample = can_sample if not is_sample: GroupHash.record_last_processed_event_id( all_hashes[0].id, event.event_id, ) return group, is_new, is_regression, is_sample
def query( self, projects, retention_window_start, group_queryset, environments, sort_by, limit, cursor, count_hits, paginator_options, search_filters, date_from, date_to, max_hits=None, ): now = timezone.now() end = None end_params = [ _f for _f in [date_to, get_search_filter(search_filters, "date", "<")] if _f ] if end_params: end = min(end_params) if not end: end = now + ALLOWED_FUTURE_DELTA metrics.incr("snuba.search.postgres_only") # This search is for some time window that ends with "now", # so if the requested sort is `date` (`last_seen`) and there # are no other Snuba-based search predicates, we can simply # return the results from Postgres. if (cursor is None and sort_by == "date" and # This handles tags and date parameters for search filters. not [ sf for sf in search_filters if sf.key.name not in self.postgres_only_fields.union(["date"]) ]): group_queryset = group_queryset.order_by("-last_seen") paginator = DateTimePaginator(group_queryset, "-last_seen", **paginator_options) # When its a simple django-only search, we count_hits like normal return paginator.get_result(limit, cursor, count_hits=count_hits, max_hits=max_hits) # TODO: Presumably we only want to search back to the project's max # retention date, which may be closer than 90 days in the past, but # apparently `retention_window_start` can be None(?), so we need a # fallback. retention_date = max([ _f for _f in [retention_window_start, now - timedelta(days=90)] if _f ]) start_params = [ date_from, retention_date, get_search_filter(search_filters, "date", ">") ] start = max([_f for _f in start_params if _f]) end = max([retention_date, end]) if start == retention_date and end == retention_date: # Both `start` and `end` must have been trimmed to `retention_date`, # so this entire search was against a time range that is outside of # retention. We'll return empty results to maintain backwards compatibility # with Django search (for now). return self.empty_result if start >= end: # TODO: This maintains backwards compatibility with Django search, but # in the future we should find a way to notify the user that their search # is invalid. return self.empty_result # This search is specific to Inbox. If we're using inbox sort and only querying # postgres then we can use this sort method. Otherwise if we need to go to Snuba, # fail. if (sort_by == "inbox" and get_search_filter(search_filters, "for_review", "=") # This handles tags and date parameters for search filters. and not [ sf for sf in search_filters if sf.key.name not in self.postgres_only_fields.union(["date"]) ]): # We just filter on `GroupInbox.date_added` here, and don't filter by date # on the group. This keeps the query simpler and faster in some edge cases, # and date_added is a good enough proxy when we're using this sort. group_queryset = group_queryset.filter( groupinbox__date_added__gte=start, groupinbox__date_added__lte=end, ) group_queryset = group_queryset.extra(select={ "inbox_date": "sentry_groupinbox.date_added" }, ).order_by("-inbox_date") paginator = DateTimePaginator(group_queryset, "-inbox_date", **paginator_options) return paginator.get_result(limit, cursor, count_hits=count_hits, max_hits=max_hits) if sort_by == "inbox": raise InvalidSearchQuery( f"Sort key '{sort_by}' only supported for inbox search") # Here we check if all the django filters reduce the set of groups down # to something that we can send down to Snuba in a `group_id IN (...)` # clause. max_candidates = options.get("snuba.search.max-pre-snuba-candidates") with sentry_sdk.start_span(op="snuba_group_query") as span: group_ids = list( group_queryset.values_list("id", flat=True)[:max_candidates + 1]) span.set_data("Max Candidates", max_candidates) span.set_data("Result Size", len(group_ids)) metrics.timing("snuba.search.num_candidates", len(group_ids)) too_many_candidates = False if not group_ids: # no matches could possibly be found from this point on metrics.incr("snuba.search.no_candidates", skip_internal=False) return self.empty_result elif len(group_ids) > max_candidates: # If the pre-filter query didn't include anything to significantly # filter down the number of results (from 'first_release', 'query', # 'status', 'bookmarked_by', 'assigned_to', 'unassigned', # 'subscribed_by', 'active_at_from', or 'active_at_to') then it # might have surpassed the `max_candidates`. In this case, # we *don't* want to pass candidates down to Snuba, and instead we # want Snuba to do all the filtering/sorting it can and *then* apply # this queryset to the results from Snuba, which we call # post-filtering. metrics.incr("snuba.search.too_many_candidates", skip_internal=False) too_many_candidates = True group_ids = [] sort_field = self.sort_strategies[sort_by] chunk_growth = options.get("snuba.search.chunk-growth-rate") max_chunk_size = options.get("snuba.search.max-chunk-size") chunk_limit = limit offset = 0 num_chunks = 0 hits = self.calculate_hits( group_ids, too_many_candidates, sort_field, projects, retention_window_start, group_queryset, environments, sort_by, limit, cursor, count_hits, paginator_options, search_filters, start, end, ) if count_hits and hits == 0: return self.empty_result paginator_results = self.empty_result result_groups = [] result_group_ids = set() max_time = options.get("snuba.search.max-total-chunk-time-seconds") time_start = time.time() # Do smaller searches in chunks until we have enough results # to answer the query (or hit the end of possible results). We do # this because a common case for search is to return 100 groups # sorted by `last_seen`, and we want to avoid returning all of # a project's groups and then post-sorting them all in Postgres # when typically the first N results will do. while (time.time() - time_start) < max_time: num_chunks += 1 # grow the chunk size on each iteration to account for huge projects # and weird queries, up to a max size chunk_limit = min(int(chunk_limit * chunk_growth), max_chunk_size) # but if we have group_ids always query for at least that many items chunk_limit = max(chunk_limit, len(group_ids)) # {group_id: group_score, ...} snuba_groups, total = self.snuba_search( start=start, end=end, project_ids=[p.id for p in projects], environment_ids=environments and [environment.id for environment in environments], sort_field=sort_field, cursor=cursor, group_ids=group_ids, limit=chunk_limit, offset=offset, search_filters=search_filters, ) metrics.timing("snuba.search.num_snuba_results", len(snuba_groups)) count = len(snuba_groups) more_results = count >= limit and (offset + limit) < total offset += len(snuba_groups) if not snuba_groups: break if group_ids: # pre-filtered candidates were passed down to Snuba, so we're # finished with filtering and these are the only results. Note # that because we set the chunk size to at least the size of # the group_ids, we know we got all of them (ie there are # no more chunks after the first) result_groups = snuba_groups if count_hits and hits is None: hits = len(snuba_groups) else: # pre-filtered candidates were *not* passed down to Snuba, # so we need to do post-filtering to verify Sentry DB predicates filtered_group_ids = group_queryset.filter( id__in=[gid for gid, _ in snuba_groups]).values_list("id", flat=True) group_to_score = dict(snuba_groups) for group_id in filtered_group_ids: if group_id in result_group_ids: # because we're doing multiple Snuba queries, which # happen outside of a transaction, there is a small possibility # of groups moving around in the sort scoring underneath us, # so we at least want to protect against duplicates continue group_score = group_to_score[group_id] result_group_ids.add(group_id) result_groups.append((group_id, group_score)) # break the query loop for one of three reasons: # * we started with Postgres candidates and so only do one Snuba query max # * the paginator is returning enough results to satisfy the query (>= the limit) # * there are no more groups in Snuba to post-filter # TODO do we actually have to rebuild this SequencePaginator every time # or can we just make it after we've broken out of the loop? paginator_results = SequencePaginator( [(score, id) for (id, score) in result_groups], reverse=True, **paginator_options).get_result(limit, cursor, known_hits=hits, max_hits=max_hits) if group_ids or len( paginator_results.results) >= limit or not more_results: break # HACK: We're using the SequencePaginator to mask the complexities of going # back and forth between two databases. This causes a problem with pagination # because we're 'lying' to the SequencePaginator (it thinks it has the entire # result set in memory when it does not). For this reason we need to make some # best guesses as to whether the `prev` and `next` cursors have more results. if len(paginator_results.results) == limit and more_results: # Because we are going back and forth between DBs there is a small # chance that we will hand the SequencePaginator exactly `limit` # items. In this case the paginator will assume there are no more # results, so we need to override the `next` cursor's results. paginator_results.next.has_results = True if cursor is not None and (not cursor.is_prev or len(paginator_results.results) > 0): # If the user passed a cursor, and it isn't already a 0 result `is_prev` # cursor, then it's worth allowing them to go back a page to check for # more results. paginator_results.prev.has_results = True metrics.timing("snuba.search.num_chunks", num_chunks) groups = Group.objects.in_bulk(paginator_results.results) paginator_results.results = [ groups[k] for k in paginator_results.results if k in groups ] return paginator_results
def save(self, project, raw=False): from sentry.tasks.post_process import index_event_tags data = self.data project = Project.objects.get_from_cache(id=project) # Check to make sure we're not about to do a bunch of work that's # already been done if we've processed an event with this ID. (This # isn't a perfect solution -- this doesn't handle ``EventMapping`` and # there's a race condition between here and when the event is actually # saved, but it's an improvement. See GH-7677.) try: event = Event.objects.get( project_id=project.id, event_id=data['event_id'], ) except Event.DoesNotExist: pass else: self.logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': data['event_id'], 'project_id': project.id, 'model': Event.__name__, } ) return event # First we pull out our top-level (non-data attr) kwargs event_id = data.pop('event_id') level = data.pop('level') transaction_name = data.pop('transaction', None) culprit = data.pop('culprit', None) logger_name = data.pop('logger', None) server_name = data.pop('server_name', None) site = data.pop('site', None) checksum = data.pop('checksum', None) fingerprint = data.pop('fingerprint', None) platform = data.pop('platform', None) release = data.pop('release', None) dist = data.pop('dist', None) environment = data.pop('environment', None) # unused time_spent = data.pop('time_spent', None) message = data.pop('message', '') if not culprit: if transaction_name: culprit = transaction_name else: culprit = generate_culprit(data, platform=platform) culprit = force_text(culprit) if transaction_name: transaction_name = force_text(transaction_name) recorded_timestamp = data.pop('timestamp') date = datetime.fromtimestamp(recorded_timestamp) date = date.replace(tzinfo=timezone.utc) kwargs = { 'platform': platform, } event = Event( project_id=project.id, event_id=event_id, data=data, time_spent=time_spent, datetime=date, **kwargs ) event._project_cache = project data = event.data.data # convert this to a dict to ensure we're only storing one value per key # as most parts of Sentry dont currently play well with multiple values tags = dict(data.get('tags') or []) tags['level'] = LOG_LEVELS[level] if logger_name: tags['logger'] = logger_name if server_name: tags['server_name'] = server_name if site: tags['site'] = site if environment: tags['environment'] = environment if transaction_name: tags['transaction'] = transaction_name if release: # dont allow a conflicting 'release' tag if 'release' in tags: del tags['release'] release = Release.get_or_create( project=project, version=release, date_added=date, ) tags['sentry:release'] = release.version if dist and release: dist = release.add_dist(dist, date) tags['sentry:dist'] = dist.name else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag if 'user' in tags: del tags['user'] tags['sentry:user'] = event_user.tag_value # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. normalize_in_app(data) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: tags.setdefault(key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): tags[k] = v # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.get_path(), None) # tags are stored as a tuple tags = tags.items() data['tags'] = tags data['fingerprint'] = fingerprint or ['{{ default }}'] # prioritize fingerprint over checksum as its likely the client defaulted # a checksum whereas the fingerprint was explicit if fingerprint: hashes = [md5_from_hash(h) for h in get_hashes_from_fingerprint(event, fingerprint)] elif checksum: if HASH_RE.match(checksum): hashes = [checksum] else: hashes = [md5_from_hash([checksum]), checksum] data['checksum'] = checksum else: hashes = [md5_from_hash(h) for h in get_hashes_for_event(event)] # TODO(dcramer): temp workaround for complexity data['message'] = message event_type = eventtypes.get(data.get('type', 'default'))(data) event_metadata = event_type.get_metadata() # TODO(dcramer): temp workaround for complexity del data['message'] data['type'] = event_type.key data['metadata'] = event_metadata # index components into ``Event.message`` # See GH-3248 if event_type.key != 'default': if 'sentry.interfaces.Message' in data and \ data['sentry.interfaces.Message']['message'] != message: message = u'{} {}'.format( message, data['sentry.interfaces.Message']['message'], ) if not message: message = '' elif not isinstance(message, six.string_types): message = force_text(message) for value in six.itervalues(event_metadata): value_u = force_text(value, errors='replace') if value_u not in message: message = u'{} {}'.format(message, value_u) if culprit and culprit not in message: culprit_u = force_text(culprit, errors='replace') message = u'{} {}'.format(message, culprit_u) message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) event.message = message kwargs['message'] = message received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s')) group_kwargs = kwargs.copy() group_kwargs.update( { 'culprit': culprit, 'logger': logger_name, 'level': level, 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': { 'last_received': received_timestamp, 'type': event_type.key, # we cache the events metadata on the group to ensure its # accessible in the stream 'metadata': event_metadata, }, } ) if release: group_kwargs['first_release'] = release try: group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **group_kwargs ) except HashDiscarded: event_discarded.send_robust( project=project, sender=EventManager, ) metrics.incr( 'events.discarded', skip_internal=True, tags={ 'organization_id': project.organization_id, 'platform': platform, }, ) raise else: event_saved.send_robust( project=project, event_size=event.size, sender=EventManager, ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) # When an event was sampled, the canonical source of truth # is the EventMapping table since we aren't going to be writing out an actual # Event row. Otherwise, if the Event isn't being sampled, we can safely # rely on the Event table itself as the source of truth and ignore # EventMapping since it's redundant information. if is_sample: try: with transaction.atomic(using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: self.logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': EventMapping.__name__, } ) return event environment = Environment.get_or_create( project=project, name=environment, ) group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={ 'first_release_id': release.id if release else None, }, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) ReleaseProjectEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append( (tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, }) ) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update( group=group, environment=environment, ) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: self.logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': Event.__name__, } ) return event index_event_tags.delay( organization_id=project.organization_id, project_id=project.id, group_id=group.id, environment_id=environment.id, event_id=event.id, tags=tags, date_added=event.datetime, ) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )), ), timestamp=event.datetime, environment_id=environment.id, ) if release: if is_new: buffer.incr( ReleaseProject, {'new_groups': 1}, { 'release_id': release.id, 'project_id': project.id, } ) if is_new_group_environment: buffer.incr( ReleaseProjectEnvironment, {'new_issues_count': 1}, { 'project_id': project.id, 'release_id': release.id, 'environment_id': environment.id, } ) safe_execute(Group.objects.add_tags, group, environment, tags, _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send_robust(project=project, group=group, sender=Project) eventstream.publish( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], # We are choosing to skip consuming the event back # in the eventstream if it's flagged as raw. # This means that we want to publish the event # through the event stream, but we don't care # about post processing and handling the commit. skip_consume=raw, ) metrics.timing( 'events.latency', received_timestamp - recorded_timestamp, tags={ 'project_id': project.id, }, ) return event
def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) existing_group_id = None for h in all_hashes: if h.group_id is not None: existing_group_id = h.group_id break if h.group_tombstone_id is not None: raise HashDiscarded("Matches group tombstone %s" % h.group_tombstone_id) # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: # it's possible the release was deleted between # when we queried for the release and now, so # make sure it still exists first_release = kwargs.pop("first_release", None) with transaction.atomic(): short_id = project.next_short_id() group, group_is_new = ( Group.objects.create( project=project, short_id=short_id, first_release_id=Release.objects.filter(id=first_release.id) .values_list("id", flat=True) .first() if first_release else None, **kwargs ), True, ) metrics.incr( "group.created", skip_internal=True, tags={"platform": event.platform or "unknown"} ) else: group = Group.objects.get(id=existing_group_id) group_is_new = False # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h for h in all_hashes if h.group_id is None] if new_hashes: # XXX: There is a race condition here wherein another process could # create a new group that is associated with one of the new hashes, # add some event(s) to it, and then subsequently have the hash # "stolen" by this process. This then "orphans" those events from # their "siblings" in the group we've created here. We don't have a # way to fix this, since we can't update the group on those hashes # without filtering on `group_id` (which we can't do due to query # planner weirdness.) For more context, see 84c6f75a and d0e22787, # as well as GH-5085. GroupHash.objects.filter(id__in=[h.id for h in new_hashes]).exclude( state=GroupHash.State.LOCKED_IN_MIGRATION ).update(group=group) if group_is_new and len(new_hashes) == len(all_hashes): is_new = True if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release ) else: is_regression = False return group, is_new, is_regression
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ set_current_project(project_id) from sentry.event_manager import EventManager, HashDiscarded event_type = "none" if cache_key and data is None: with metrics.timer( "tasks.store.do_save_event.get_cache") as metric_tags: data = event_processing_store.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get( "type") or "none" with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") set_current_project(project_id) # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return event = None try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save(project_id, assume_normalized=True, start_time=start_time, cache_key=cache_key) except HashDiscarded: pass finally: if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): event_processing_store.delete_by_key(cache_key) with metrics.timer( "tasks.store.do_save_event.delete_attachment_cache"): # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has( "organizations:event-attachments", event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"])
def get(self, _, project, event_id): """ Retrieve Committer information for an event ``````````````````````````````````````````` Return commiters on an individual event, plus a per-frame breakdown. :pparam string project_slug: the slug of the project the event belongs to. :pparam string event_id: the hexadecimal ID of the event to retrieve (as reported by the raven client). :auth: required """ try: event = Event.objects.get( id=event_id, project_id=project.id, ) except Event.DoesNotExist: return Response({'detail': 'Event not found'}, status=404) # populate event data Event.objects.bind_nodes([event], 'data') group = Group.objects.get(id=event.group_id) first_release_version = group.get_first_release() if not first_release_version: return Response({'detail': 'Release not found'}, status=404) releases = Release.get_closest_releases(project, first_release_version) if not releases: return Response({'detail': 'Release not found'}, status=404) commits = self._get_commits(releases) if not commits: return Response({'detail': 'No Commits found for Release'}, status=404) frames = self._get_frame_paths(event) frame_limit = 25 app_frames = [frame for frame in frames if frame['in_app']][-frame_limit:] # TODO(maxbittker) return this set instead of annotated frames path_set = {frame['abs_path'] for frame in app_frames} file_changes = [] if path_set: file_changes = self._get_commit_file_changes(commits, path_set) commit_path_matches = { path: self._match_commits_path(file_changes, path) for path in path_set } annotated_frames = [{ 'frame': frame, 'commits': commit_path_matches[frame['abs_path']] } for frame in app_frames] relevant_commits = list({ commit for match in commit_path_matches for commit in commit_path_matches[match] }) committers = self._get_committers(annotated_frames, relevant_commits) metrics.incr('feature.owners.has-committers', instance='hit' if committers else 'miss') # serialize the commit objects serialized_annotated_frames = [{ 'frame': frame['frame'], 'commits': serialize(frame['commits']) } for frame in annotated_frames] data = { # map author ids to sentry user dicts 'committers': committers, 'annotatedFrames': serialized_annotated_frames } return Response(data)
def create_failed_event(cache_key, data, project_id, issues, event_id, start_time=None, reprocessing_rev=None): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ set_current_project(project_id) # We can only create failed events for events that can potentially # create failed events. if not reprocessing.event_supports_reprocessing(data): return False reprocessing_active = ProjectOption.objects.get_value( project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT) # In case there is reprocessing active but the current reprocessing # revision is already different than when we started, we want to # immediately retry the event. This resolves the problem when # otherwise a concurrent change of debug symbols might leave a # reprocessing issue stuck in the project forever. if (reprocessing_active and reprocessing.get_reprocessing_revision( project_id, cached=False) != reprocessing_rev): raise RetryProcessing() # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, "sentry:sent_failed_event_hint", False) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={ "reprocessing_active": reprocessing_active, "issues": issues }, ).send_notification() ProjectOption.objects.set_value(project, "sentry:sent_failed_event_hint", True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "raw" }, skip_internal=False) error_logger.error("process.failed_raw.empty", extra={"cache_key": cache_key}) return True data = CanonicalKeyDict(data) from sentry.models import RawEvent, ProcessingIssue raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp( data["timestamp"]).replace(tzinfo=timezone.utc), data=data, ) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue["scope"], object=issue["object"], type=issue["type"], data=issue["data"], ) event_processing_store.delete_by_key(cache_key) return True
def get_send_to(self, project, event=None): """ Returns a list of user IDs for the users that should receive notifications for the provided project. This result may come from cached data. """ if not (project and project.teams.exists()): logger.debug('Tried to send notification to invalid project: %r', project) return [] if event: owners, _ = ProjectOwnership.get_owners(project.id, event.data) if owners != ProjectOwnership.Everyone: if not owners: metrics.incr( 'features.owners.send_to', tags={ 'organization': project.organization_id, 'outcome': 'empty', }, skip_internal=True, ) return [] metrics.incr( 'features.owners.send_to', tags={ 'organization': project.organization_id, 'outcome': 'match', }, skip_internal=True, ) send_to_list = [] teams_to_resolve = [] for owner in owners: if owner.type == User: send_to_list.append(owner.id) else: teams_to_resolve.append(owner.id) # get all users in teams if teams_to_resolve: send_to_list += User.objects.filter( is_active=True, sentry_orgmember_set__organizationmemberteam__team__id__in =teams_to_resolve, ).values_list('id', flat=True) return send_to_list else: metrics.incr( 'features.owners.send_to', tags={ 'organization': project.organization_id, 'outcome': 'everyone', }, skip_internal=True, ) cache_key = '%s:send_to:%s' % (self.get_conf_key(), project.pk) send_to_list = cache.get(cache_key) if send_to_list is None: send_to_list = [s for s in self.get_sendable_users(project) if s] cache.set(cache_key, send_to_list, 60) # 1 minute cache return send_to_list
def _do_process_event( cache_key, start_time, event_id, process_task, data=None, data_has_changed=None, from_symbolicate=False, ): from sentry.plugins.base import plugins if data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "process" }, skip_internal=False) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_project(project_id) event_id = data["event_id"] with sentry_sdk.start_span( op="tasks.store.process_event.get_project_from_cache"): project = Project.objects.get_from_cache(id=project_id) with metrics.timer( "tasks.store.process_event.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) has_changed = bool(data_has_changed) with sentry_sdk.start_span( op="tasks.store.process_event.get_reprocessing_revision"): # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) # Stacktrace based event processors. with sentry_sdk.start_span(op="task.store.process_event.stacktraces"): with metrics.timer("tasks.store.process_event.stacktraces", tags={"from_symbolicate": from_symbolicate}): new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # Second round of datascrubbing after stacktrace and language-specific # processing. First round happened as part of ingest. # # *Right now* the only sensitive data that is added in stacktrace # processing are usernames in filepaths, so we run directly after # stacktrace processors. # # We do not yet want to deal with context data produced by plugins like # sessionstack or fullstory (which are in `get_event_preprocessors`), as # this data is very unlikely to be sensitive data. This is why scrubbing # happens somewhere in the middle of the pipeline. # # On the other hand, Javascript event error translation is happening after # this block because it uses `get_event_preprocessors` instead of # `get_event_enhancers`. # # We are fairly confident, however, that this should run *before* # re-normalization as it is hard to find sensitive data in partially # trimmed strings. if (has_changed and options.get("processing.can-use-scrubbers") and features.has("organizations:datascrubbers-v2", project.organization, actor=None)): with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"): with metrics.timer("tasks.store.datascrubbers.scrub", tags={"from_symbolicate": from_symbolicate}): project_config = get_project_config(project) new_data = safe_execute(scrub_data, project_config=project_config, event=data.data) # XXX(markus): When datascrubbing is finally "totally stable", we might want # to drop the event if it crashes to avoid saving PII if new_data is not None: data.data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): with sentry_sdk.start_span( op="task.store.process_event.preprocessors") as span: span.set_data("plugin", plugin.slug) span.set_data("from_symbolicate", from_symbolicate) with metrics.timer( "tasks.store.process_event.preprocessors", tags={ "plugin": plugin.slug, "from_symbolicate": from_symbolicate }, ): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in processors or (): try: result = processor(data) except Exception: error_logger.exception( "tasks.store.preprocessors.error") data.setdefault("_metrics", {})["flag.processing.error"] = True has_changed = True else: if result: data = result has_changed = True assert data[ "project"] == project_id, "Project cannot be mutated by plugins" # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer(remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke ourselves again. This happens when the reprocessing # revision changed while we were processing. _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project) return cache_key = event_processing_store.store(data) submit_save_event(project, cache_key, event_id, start_time, data)
def update_groups(request, projects, organization_id, search_fn): group_ids = request.GET.getlist('id') if group_ids: group_list = Group.objects.filter( project__organization_id=organization_id, project__in=projects, id__in=group_ids, ) # filter down group ids to only valid matches group_ids = [g.id for g in group_list] if not group_ids: return Response(status=204) else: group_list = None # TODO(jess): We may want to look into refactoring GroupValidator # to support multiple projects, but this is pretty complicated # because of the assignee validation. Punting on this for now. for project in projects: serializer = GroupValidator( data=request.DATA, partial=True, context={'project': project}, ) if not serializer.is_valid(): return Response(serializer.errors, status=400) result = dict(serializer.object) # so we won't have to requery for each group project_lookup = {p.id: p for p in projects} acting_user = request.user if request.user.is_authenticated() else None if not group_ids: try: # bulk mutations are limited to 1000 items # TODO(dcramer): it'd be nice to support more than this, but its # a bit too complicated right now cursor_result, _ = search_fn({ 'limit': 1000, 'paginator_options': {'max_limit': 1000}, }) except ValidationError as exc: return Response({'detail': six.text_type(exc)}, status=400) group_list = list(cursor_result) group_ids = [g.id for g in group_list] is_bulk = len(group_ids) > 1 group_project_ids = {g.project_id for g in group_list} # filter projects down to only those that have groups in the search results projects = [p for p in projects if p.id in group_project_ids] queryset = Group.objects.filter( id__in=group_ids, ) discard = result.get('discard') if discard: return handle_discard(request, list(queryset), projects, acting_user) statusDetails = result.pop('statusDetails', result) status = result.get('status') release = None commit = None if status in ('resolved', 'resolvedInNextRelease'): if status == 'resolvedInNextRelease' or statusDetails.get('inNextRelease'): # TODO(jess): We may want to support this for multi project, but punting on it for now if len(projects) > 1: return Response({ 'detail': 'Cannot set resolved in next release for multiple projects.' }, status=400) release = statusDetails.get('inNextRelease') or Release.objects.filter( projects=projects[0], organization_id=projects[0].organization_id, ).extra(select={ 'sort': 'COALESCE(date_released, date_added)', }).order_by('-sort')[0] activity_type = Activity.SET_RESOLVED_IN_RELEASE activity_data = { # no version yet 'version': '', } status_details = { 'inNextRelease': True, 'actor': serialize(extract_lazy_object(request.user), request.user), } res_type = GroupResolution.Type.in_next_release res_type_str = 'in_next_release' res_status = GroupResolution.Status.pending elif statusDetails.get('inRelease'): # TODO(jess): We could update validation to check if release # applies to multiple projects, but I think we agreed to punt # on this for now if len(projects) > 1: return Response({ 'detail': 'Cannot set resolved in release for multiple projects.' }, status=400) release = statusDetails['inRelease'] activity_type = Activity.SET_RESOLVED_IN_RELEASE activity_data = { # no version yet 'version': release.version, } status_details = { 'inRelease': release.version, 'actor': serialize(extract_lazy_object(request.user), request.user), } res_type = GroupResolution.Type.in_release res_type_str = 'in_release' res_status = GroupResolution.Status.resolved elif statusDetails.get('inCommit'): # TODO(jess): Same here, this is probably something we could do, but # punting for now. if len(projects) > 1: return Response({ 'detail': 'Cannot set resolved in commit for multiple projects.' }, status=400) commit = statusDetails['inCommit'] activity_type = Activity.SET_RESOLVED_IN_COMMIT activity_data = { 'commit': commit.id, } status_details = { 'inCommit': serialize(commit, request.user), 'actor': serialize(extract_lazy_object(request.user), request.user), } res_type_str = 'in_commit' else: res_type_str = 'now' activity_type = Activity.SET_RESOLVED activity_data = {} status_details = {} now = timezone.now() metrics.incr('group.resolved', instance=res_type_str, skip_internal=True) # if we've specified a commit, let's see if its already been released # this will allow us to associate the resolution to a release as if we # were simply using 'inRelease' above # Note: this is different than the way commit resolution works on deploy # creation, as a given deploy is connected to an explicit release, and # in this case we're simply choosing the most recent release which contains # the commit. if commit and not release: # TODO(jess): If we support multiple projects for release / commit resolution, # we need to update this to find the release for each project (we shouldn't assume # it's the same) try: release = Release.objects.filter( projects__in=projects, releasecommit__commit=commit, ).extra(select={ 'sort': 'COALESCE(date_released, date_added)', }).order_by('-sort')[0] res_type = GroupResolution.Type.in_release res_status = GroupResolution.Status.resolved except IndexError: release = None for group in group_list: with transaction.atomic(): resolution = None if release: resolution_params = { 'release': release, 'type': res_type, 'status': res_status, 'actor_id': request.user.id if request.user.is_authenticated() else None, } resolution, created = GroupResolution.objects.get_or_create( group=group, defaults=resolution_params, ) if not created: resolution.update( datetime=timezone.now(), **resolution_params) if commit: GroupLink.objects.create( group_id=group.id, project_id=group.project_id, linked_type=GroupLink.LinkedType.commit, relationship=GroupLink.Relationship.resolves, linked_id=commit.id, ) affected = Group.objects.filter( id=group.id, ).update( status=GroupStatus.RESOLVED, resolved_at=now, ) if not resolution: created = affected group.status = GroupStatus.RESOLVED group.resolved_at = now assigned_to = self_subscribe_and_assign_issue(acting_user, group) if assigned_to is not None: result['assignedTo'] = assigned_to if created: activity = Activity.objects.create( project=project_lookup[group.project_id], group=group, type=activity_type, user=acting_user, ident=resolution.id if resolution else None, data=activity_data, ) # TODO(dcramer): we need a solution for activity rollups # before sending notifications on bulk changes if not is_bulk: activity.send_notification() if release: issue_resolved_in_release.send_robust( group=group, project=project_lookup[group.project_id], user=acting_user, resolution_type=res_type_str, sender=update_groups, ) elif commit: resolved_with_commit.send_robust( organization_id=organization_id, user=request.user, group=group, sender=update_groups, ) else: issue_resolved.send_robust( project=project_lookup[group.project_id], group=group, user=acting_user, sender=update_groups, ) kick_off_status_syncs.apply_async(kwargs={ 'project_id': group.project_id, 'group_id': group.id, }) result.update({ 'status': 'resolved', 'statusDetails': status_details, }) elif status: new_status = STATUS_CHOICES[result['status']] with transaction.atomic(): happened = queryset.exclude( status=new_status, ).update( status=new_status, ) GroupResolution.objects.filter( group__in=group_ids, ).delete() if new_status == GroupStatus.IGNORED: metrics.incr('group.ignored', skip_internal=True) ignore_duration = ( statusDetails.pop('ignoreDuration', None) or statusDetails.pop('snoozeDuration', None) ) or None ignore_count = statusDetails.pop( 'ignoreCount', None) or None ignore_window = statusDetails.pop( 'ignoreWindow', None) or None ignore_user_count = statusDetails.pop( 'ignoreUserCount', None) or None ignore_user_window = statusDetails.pop( 'ignoreUserWindow', None) or None if ignore_duration or ignore_count or ignore_user_count: if ignore_duration: ignore_until = timezone.now() + timedelta( minutes=ignore_duration, ) else: ignore_until = None for group in group_list: state = {} if ignore_count and not ignore_window: state['times_seen'] = group.times_seen if ignore_user_count and not ignore_user_window: state['users_seen'] = group.count_users_seen() GroupSnooze.objects.create_or_update( group=group, values={ 'until': ignore_until, 'count': ignore_count, 'window': ignore_window, 'user_count': ignore_user_count, 'user_window': ignore_user_window, 'state': state, 'actor_id': request.user.id if request.user.is_authenticated() else None, } ) result['statusDetails'] = { 'ignoreCount': ignore_count, 'ignoreUntil': ignore_until, 'ignoreUserCount': ignore_user_count, 'ignoreUserWindow': ignore_user_window, 'ignoreWindow': ignore_window, 'actor': serialize(extract_lazy_object(request.user), request.user), } else: GroupSnooze.objects.filter( group__in=group_ids, ).delete() ignore_until = None result['statusDetails'] = {} else: result['statusDetails'] = {} if group_list and happened: if new_status == GroupStatus.UNRESOLVED: activity_type = Activity.SET_UNRESOLVED activity_data = {} elif new_status == GroupStatus.IGNORED: activity_type = Activity.SET_IGNORED activity_data = { 'ignoreCount': ignore_count, 'ignoreDuration': ignore_duration, 'ignoreUntil': ignore_until, 'ignoreUserCount': ignore_user_count, 'ignoreUserWindow': ignore_user_window, 'ignoreWindow': ignore_window, } groups_by_project_id = defaultdict(list) for group in group_list: groups_by_project_id[group.project_id].append(group) for project in projects: project_groups = groups_by_project_id.get(project.id) if project_groups: issue_ignored.send_robust( project=project, user=acting_user, group_list=project_groups, activity_data=activity_data, sender=update_groups) for group in group_list: group.status = new_status activity = Activity.objects.create( project=project_lookup[group.project_id], group=group, type=activity_type, user=acting_user, data=activity_data, ) # TODO(dcramer): we need a solution for activity rollups # before sending notifications on bulk changes if not is_bulk: if acting_user: GroupSubscription.objects.subscribe( user=acting_user, group=group, reason=GroupSubscriptionReason.status_change, ) activity.send_notification() if new_status == GroupStatus.UNRESOLVED: kick_off_status_syncs.apply_async(kwargs={ 'project_id': group.project_id, 'group_id': group.id, }) if 'assignedTo' in result: assigned_actor = result['assignedTo'] if assigned_actor: for group in group_list: resolved_actor = assigned_actor.resolve() GroupAssignee.objects.assign(group, resolved_actor, acting_user) result['assignedTo'] = serialize( assigned_actor.resolve(), acting_user, ActorSerializer()) else: for group in group_list: GroupAssignee.objects.deassign(group, acting_user) is_member_map = { project.id: project.member_set.filter(user=acting_user).exists() for project in projects } if result.get('hasSeen'): for group in group_list: if is_member_map.get(group.project_id): instance, created = create_or_update( GroupSeen, group=group, user=acting_user, project=project_lookup[group.project_id], values={ 'last_seen': timezone.now(), } ) elif result.get('hasSeen') is False: GroupSeen.objects.filter( group__in=group_ids, user=acting_user, ).delete() if result.get('isBookmarked'): for group in group_list: GroupBookmark.objects.get_or_create( project=project_lookup[group.project_id], group=group, user=acting_user, ) GroupSubscription.objects.subscribe( user=acting_user, group=group, reason=GroupSubscriptionReason.bookmark, ) elif result.get('isBookmarked') is False: GroupBookmark.objects.filter( group__in=group_ids, user=acting_user, ).delete() # TODO(dcramer): we could make these more efficient by first # querying for rich rows are present (if N > 2), flipping the flag # on those rows, and then creating the missing rows if result.get('isSubscribed') in (True, False): is_subscribed = result['isSubscribed'] for group in group_list: # NOTE: Subscribing without an initiating event (assignment, # commenting, etc.) clears out the previous subscription reason # to avoid showing confusing messaging as a result of this # action. It'd be jarring to go directly from "you are not # subscribed" to "you were subscribed due since you were # assigned" just by clicking the "subscribe" button (and you # may no longer be assigned to the issue anyway.) GroupSubscription.objects.create_or_update( user=acting_user, group=group, project=project_lookup[group.project_id], values={ 'is_active': is_subscribed, 'reason': GroupSubscriptionReason.unknown, }, ) result['subscriptionDetails'] = { 'reason': SUBSCRIPTION_REASON_MAP.get( GroupSubscriptionReason.unknown, 'unknown', ), } if 'isPublic' in result: # We always want to delete an existing share, because triggering # an isPublic=True even when it's already public, should trigger # regenerating. for group in group_list: if GroupShare.objects.filter(group=group).delete(): result['shareId'] = None Activity.objects.create( project=project_lookup[group.project_id], group=group, type=Activity.SET_PRIVATE, user=acting_user, ) if result.get('isPublic'): for group in group_list: share, created = GroupShare.objects.get_or_create( project=project_lookup[group.project_id], group=group, user=acting_user, ) if created: result['shareId'] = share.uuid Activity.objects.create( project=project_lookup[group.project_id], group=group, type=Activity.SET_PUBLIC, user=acting_user, ) # XXX(dcramer): this feels a bit shady like it should be its own # endpoint if result.get('merge') and len(group_list) > 1: # don't allow merging cross project if len(projects) > 1: return Response({'detail': 'Merging across multiple projects is not supported'}) group_list_by_times_seen = sorted( group_list, key=lambda g: (g.times_seen, g.id), reverse=True, ) primary_group, groups_to_merge = group_list_by_times_seen[0], group_list_by_times_seen[1:] group_ids_to_merge = [g.id for g in groups_to_merge] eventstream_state = eventstream.start_merge( primary_group.project_id, group_ids_to_merge, primary_group.id ) Group.objects.filter( id__in=group_ids_to_merge ).update( status=GroupStatus.PENDING_MERGE ) transaction_id = uuid4().hex merge_groups.delay( from_object_ids=group_ids_to_merge, to_object_id=primary_group.id, transaction_id=transaction_id, eventstream_state=eventstream_state, ) Activity.objects.create( project=project_lookup[primary_group.project_id], group=primary_group, type=Activity.MERGE, user=acting_user, data={ 'issues': [{ 'id': c.id } for c in groups_to_merge], }, ) result['merge'] = { 'parent': six.text_type(primary_group.id), 'children': [six.text_type(g.id) for g in groups_to_merge], } return Response(result)
def _do_symbolicate_event(cache_key, start_time, event_id, symbolicate_task, data=None): from sentry.lang.native.processing import get_symbolication_function if data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "symbolicate" }, skip_internal=False) error_logger.error("symbolicate.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_project(project_id) event_id = data["event_id"] symbolication_function = get_symbolication_function(data) has_changed = False from_reprocessing = symbolicate_task is symbolicate_event_from_reprocessing try: with sentry_sdk.start_span( op="tasks.store.symbolicate_event.symbolication") as span: span.set_data("symbolicaton_function", symbolication_function.__name__) with metrics.timer("tasks.store.symbolicate_event.symbolication"): symbolicated_data = symbolication_function(data) span.set_data("symbolicated_data", bool(symbolicated_data)) if symbolicated_data: data = symbolicated_data has_changed = True except RetrySymbolication as e: if start_time and ( time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT: error_logger.warning("symbolicate.slow", extra={ "project_id": project_id, "event_id": event_id }) if start_time and ( time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) error_logger.exception( "symbolicate.failed.infinite_retry", extra={ "project_id": project_id, "event_id": event_id }, ) data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True else: # Requeue the task in the "sleep" queue retry_symbolicate_event.apply_async( args=(), kwargs={ "symbolicate_task_name": symbolicate_task.__name__, "task_kwargs": { "cache_key": cache_key, "event_id": event_id, "start_time": start_time, }, }, countdown=e.retry_after, ) return except Exception: error_logger.exception("tasks.store.symbolicate_event.symbolication") data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: cache_key = event_processing_store.store(data) process_task = process_event_from_reprocessing if from_reprocessing else process_event _do_process_event( cache_key=cache_key, start_time=start_time, event_id=event_id, process_task=process_task, data=data, data_has_changed=has_changed, from_symbolicate=True, )
def _query(self, project, retention_window_start, group_queryset, tags, environment, sort_by, limit, cursor, count_hits, paginator_options, **parameters): # TODO: Product decision: we currently search Group.message to handle # the `query` parameter, because that's what we've always done. We could # do that search against every event in Snuba instead, but results may # differ. # TODO: It's possible `first_release` could be handled by Snuba. if environment is not None: group_queryset = ds.QuerySetBuilder({ 'first_release': ds.CallbackCondition( lambda queryset, version: queryset.extra( where=[ '{} = {}'.format( ds.get_sql_column(GroupEnvironment, 'first_release_id'), ds.get_sql_column(Release, 'id'), ), '{} = %s'.format( ds.get_sql_column(Release, 'organization'), ), '{} = %s'.format( ds.get_sql_column(Release, 'version'), ), ], params=[project.organization_id, version], tables=[Release._meta.db_table], ), ), }).build( group_queryset.extra( where=[ u'{} = {}'.format( ds.get_sql_column(Group, 'id'), ds.get_sql_column(GroupEnvironment, 'group_id'), ), u'{} = %s'.format( ds.get_sql_column(GroupEnvironment, 'environment_id'), ), ], params=[environment.id], tables=[GroupEnvironment._meta.db_table], ), parameters, ) else: group_queryset = ds.QuerySetBuilder({ 'first_release': ds.CallbackCondition( lambda queryset, version: queryset.filter( first_release__organization_id=project.organization_id, first_release__version=version, ), ), }).build( group_queryset, parameters, ) now = timezone.now() # TODO: Presumably we want to search back to the project's full retention, # which may be higher than 90 days in the past, but apparently # `retention_window_start` can be None(?), so we need a fallback. start = max( filter(None, [ retention_window_start, parameters.get('date_from'), now - timedelta(days=90) ]) ) end = parameters.get('date_to') if not end: end = now + ALLOWED_FUTURE_DELTA # This search is for some time window that ends with "now", # so if the requested sort is `date` (`last_seen`) and there # are no other Snuba-based search predicates, we can simply # return the results from Postgres. if sort_by == 'date' \ and not tags \ and not environment \ and not any(param in parameters for param in [ 'age_from', 'age_to', 'last_seen_from', 'last_seen_to', 'times_seen', 'times_seen_lower', 'times_seen_upper' ]): group_queryset = group_queryset.order_by('-last_seen') paginator = DateTimePaginator(group_queryset, '-last_seen', **paginator_options) return paginator.get_result(limit, cursor, count_hits=count_hits) assert start < end # maximum number of Group IDs to send down to Snuba, # if more Group ID candidates are found, a "bare" Snuba # search is performed and the result groups are then # post-filtered via queries to the Sentry DB max_pre_snuba_candidates = options.get('snuba.search.max-pre-snuba-candidates') # pre-filter query candidate_ids = None if max_pre_snuba_candidates and limit <= max_pre_snuba_candidates: candidate_ids = list( group_queryset.values_list('id', flat=True)[:max_pre_snuba_candidates + 1] ) metrics.timing('snuba.search.num_candidates', len(candidate_ids)) if not candidate_ids: # no matches could possibly be found from this point on metrics.incr('snuba.search.no_candidates') return Paginator(Group.objects.none()).get_result() elif len(candidate_ids) > max_pre_snuba_candidates: # If the pre-filter query didn't include anything to significantly # filter down the number of results (from 'first_release', 'query', # 'status', 'bookmarked_by', 'assigned_to', 'unassigned', # 'subscribed_by', 'active_at_from', or 'active_at_to') then it # might have surpassed the `max_pre_snuba_candidates`. In this case, # we *don't* want to pass candidates down to Snuba, and instead we # want Snuba to do all the filtering/sorting it can and *then* apply # this queryset to the results from Snuba, which we call # post-filtering. metrics.incr('snuba.search.too_many_candidates') candidate_ids = None sort, extra_aggregations, score_fn = sort_strategies[sort_by] chunk_growth = options.get('snuba.search.chunk-growth-rate') max_chunk_size = options.get('snuba.search.max-chunk-size') chunk_limit = limit offset = 0 num_chunks = 0 paginator_results = Paginator(Group.objects.none()).get_result() result_groups = [] result_group_ids = set() min_score = float('inf') max_score = -1 max_time = options.get('snuba.search.max-total-chunk-time-seconds') time_start = time.time() # Do smaller searches in chunks until we have enough results # to answer the query (or hit the end of possible results). We do # this because a common case for search is to return 100 groups # sorted by `last_seen`, and we want to avoid returning all of # a project's groups and then post-sorting them all in Postgres # when typically the first N results will do. while (time.time() - time_start) < max_time: num_chunks += 1 # grow the chunk size on each iteration to account for huge projects # and weird queries, up to a max size chunk_limit = min(int(chunk_limit * chunk_growth), max_chunk_size) # but if we have candidate_ids always query for at least that many items chunk_limit = max(chunk_limit, len(candidate_ids) if candidate_ids else 0) # {group_id: group_score, ...} snuba_groups, more_results = snuba_search( start=start, end=end, project_id=project.id, environment_id=environment and environment.id, tags=tags, sort=sort, extra_aggregations=extra_aggregations, score_fn=score_fn, candidate_ids=candidate_ids, limit=chunk_limit, offset=offset, **parameters ) metrics.timing('snuba.search.num_snuba_results', len(snuba_groups)) offset += len(snuba_groups) if not snuba_groups: break if candidate_ids: # pre-filtered candidates were passed down to Snuba, # so we're finished with filtering and these are the # only results result_groups = snuba_groups else: # pre-filtered candidates were *not* passed down to Snuba, # so we need to do post-filtering to verify Sentry DB predicates filtered_group_ids = group_queryset.filter( id__in=[gid for gid, _ in snuba_groups] ).values_list('id', flat=True) group_to_score = dict(snuba_groups) for group_id in filtered_group_ids: if group_id in result_group_ids: # because we're doing multiple Snuba queries, which # happen outside of a transaction, there is a small possibility # of groups moving around in the sort scoring underneath us, # so we at least want to protect against duplicates continue group_score = group_to_score[group_id] result_group_ids.add(group_id) result_groups.append((group_id, group_score)) # used for cursor logic min_score = min(min_score, group_score) max_score = max(max_score, group_score) # HACK: If a cursor is being used and there may be more results available # in Snuba, we need to detect whether the cursor's value will be # found in the result groups. If it isn't in the results yet we need to # continue querying before we hand off to the paginator to decide whether # enough results are found or not, otherwise the paginator will happily # return `limit` worth of results that don't take the cursor into account # at all, since it can't know there are more results available. # TODO: If chunked search works in practice we should probably extend the # paginator to throw something if the cursor value is never found, or do # something other than partially leak internal paginator logic up to here. # Or make separate Paginator implementation just for Snuba search? if cursor is not None \ and not candidate_ids \ and more_results: if cursor.is_prev and min_score < cursor.value: continue elif not cursor.is_prev and max_score > cursor.value: continue paginator_results = SequencePaginator( [(score, id) for (id, score) in result_groups], reverse=True, **paginator_options ).get_result(limit, cursor, count_hits=False) # break the query loop for one of three reasons: # * we started with Postgres candidates and so only do one Snuba query max # * the paginator is returning enough results to satisfy the query (>= the limit) # * there are no more groups in Snuba to post-filter if candidate_ids \ or len(paginator_results.results) >= limit \ or not more_results: break metrics.timing('snuba.search.num_chunks', num_chunks) groups = Group.objects.in_bulk(paginator_results.results) paginator_results.results = [groups[k] for k in paginator_results.results if k in groups] return paginator_results
def _process_single_incr(self, key): client = self.cluster.get_routing_client() lock_key = self._make_lock_key(key) # prevent a stampede due to the way we use celery etas + duplicate # tasks if not client.set(lock_key, "1", nx=True, ex=10): metrics.incr("buffer.revoked", tags={"reason": "locked"}, skip_internal=False) self.logger.debug("buffer.revoked.locked", extra={"redis_key": key}) return pending_key = self._make_pending_key_from_key(key) try: conn = self.cluster.get_local_client_for_key(key) pipe = conn.pipeline() pipe.hgetall(key) pipe.zrem(pending_key, key) pipe.delete(key) values = pipe.execute()[0] # XXX(python3): In python2 this isn't as important since redis will # return string tyes (be it, byte strings), but in py3 we get bytes # back, and really we just want to deal with keys as strings. values = {force_text(k): v for k, v in values.items()} if not values: metrics.incr("buffer.revoked", tags={"reason": "empty"}, skip_internal=False) self.logger.debug("buffer.revoked.empty", extra={"redis_key": key}) return # XXX(py3): Note that ``import_string`` explicitly wants a str in # python2, so we'll decode (for python3) and then translate back to # a byte string (in python2) for import_string. model = import_string(str(values.pop("m").decode("utf-8"))) # NOQA if values["f"].startswith(b"{"): filters = self._load_values( json.loads(values.pop("f").decode("utf-8"))) else: # TODO(dcramer): legacy pickle support - remove in Sentry 9.1 filters = pickle.loads(values.pop("f")) incr_values = {} extra_values = {} signal_only = None for k, v in values.items(): if k.startswith("i+"): incr_values[k[2:]] = int(v) elif k.startswith("e+"): if v.startswith(b"["): extra_values[k[2:]] = self._load_value( json.loads(v.decode("utf-8"))) else: # TODO(dcramer): legacy pickle support - remove in Sentry 9.1 extra_values[k[2:]] = pickle.loads(v) elif k == "s": signal_only = bool(int(v)) # Should be 1 if set super().process(model, incr_values, filters, extra_values, signal_only) finally: client.delete(lock_key)
def filter(self, record): # TODO(mattrobenolt): handle an upstream Sentry metrics.incr("internal.uncaptured.logs", skip_internal=False) return is_current_event_safe()
def process_event(event_manager, project, key, remote_addr, helper, attachments): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data['event_id'] if should_filter: track_outcome(project.organization_id, project.id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id) metrics.incr('events.blacklisted', tags={'reason': filter_reason}, skip_internal=False) event_filtered.send_robust( ip=remote_addr, project=project, sender=process_event, ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug('Dropped event due to error with rate limiter') reason = rate_limit.reason_code if rate_limit else None track_outcome(project.organization_id, project.id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id) metrics.incr( 'events.dropped', tags={ 'reason': reason or 'unknown', }, skip_internal=False, ) event_dropped.send_robust( ip=remote_addr, project=project, reason_code=reason, sender=process_event, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) org_options = OrganizationOption.objects.get_all_values( project.organization_id) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( project.id, event_id, ) if cache.get(cache_key) is not None: track_outcome(project.organization_id, project.id, key.id, Outcome.INVALID, 'duplicate', event_id=event_id) raise APIForbidden('An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = ( org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = (org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, [])) exclude_fields_key = 'sentry:safe_fields' exclude_fields = (org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, [])) scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=process_event, ) return event_id
def post(self, request, organization): """ Add a Member to Organization ```````````````````````````` Invite a member to the organization. :pparam string organization_slug: the slug of the organization the member will belong to :param string email: the email address to invite :param string role: the role of the new member :param array teams: the slugs of the teams the member should belong to. :auth: required """ if not features.has("organizations:invite-members", organization, actor=request.user): return Response( { "organization": "Your organization is not allowed to invite members" }, status=403) _, allowed_roles = get_allowed_roles(request, organization) serializer = OrganizationMemberSerializer( data=request.data, context={ "organization": organization, "allowed_roles": allowed_roles, "allow_existing_invite_request": True, }, ) if not serializer.is_valid(): return Response(serializer.errors, status=400) result = serializer.validated_data if ratelimits.for_organization_member_invite( organization=organization, email=result["email"], user=request.user, auth=request.auth, ): metrics.incr( "member-invite.attempt", instance="rate_limited", skip_internal=True, sample_rate=1.0, ) return Response({"detail": ERR_RATE_LIMITED}, status=429) with transaction.atomic(): # remove any invitation requests for this email before inviting OrganizationMember.objects.filter( Q(invite_status=InviteStatus.REQUESTED_TO_BE_INVITED.value) | Q(invite_status=InviteStatus.REQUESTED_TO_JOIN.value), email=result["email"], organization=organization, ).delete() om = OrganizationMember( organization=organization, email=result["email"], role=result["role"], inviter=request.user, ) if settings.SENTRY_ENABLE_INVITES: om.token = om.generate_token() om.save() if result["teams"]: lock = locks.get(f"org:member:{om.id}", duration=5) with TimedRetryPolicy(10)(lock.acquire): save_team_assignments(om, result["teams"]) if settings.SENTRY_ENABLE_INVITES and result.get("sendInvite"): om.send_invite_email() member_invited.send_robust(member=om, user=request.user, sender=self, referrer=request.data.get("referrer")) self.create_audit_entry( request=request, organization_id=organization.id, target_object=om.id, data=om.get_audit_log_data(), event=AuditLogEntryEvent.MEMBER_INVITE if settings.SENTRY_ENABLE_INVITES else AuditLogEntryEvent.MEMBER_ADD, ) return Response(serialize(om), status=201)
def save(self, project_id, raw=False, assume_normalized=False): # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize() self._normalized = True data = self._data project = Project.objects.get_from_cache(id=project_id) project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) # Check to make sure we're not about to do a bunch of work that's # already been done if we've processed an event with this ID. (This # isn't a perfect solution -- this doesn't handle ``EventMapping`` and # there's a race condition between here and when the event is actually # saved, but it's an improvement. See GH-7677.) try: event = Event.objects.get( project_id=project.id, event_id=data['event_id'], ) except Event.DoesNotExist: pass else: # Make sure we cache on the project before returning event._project_cache = project logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': data['event_id'], 'project_id': project.id, 'model': Event.__name__, } ) return event # Pull out the culprit culprit = self.get_culprit() # Pull the toplevel data we're interested in level = data.get('level') # TODO(mitsuhiko): this code path should be gone by July 2018. # This is going to be fine because no code actually still depends # on integers here. When we need an integer it will be converted # into one later. Old workers used to send integers here. if level is not None and isinstance(level, six.integer_types): level = LOG_LEVELS[level] transaction_name = data.get('transaction') logger_name = data.get('logger') release = data.get('release') dist = data.get('dist') environment = data.get('environment') recorded_timestamp = data.get('timestamp') # We need to swap out the data with the one internal to the newly # created event object event = self._get_event_instance(project_id=project_id) self._data = data = event.data.data event._project_cache = project date = event.datetime platform = event.platform event_id = event.event_id if transaction_name: transaction_name = force_text(transaction_name) # Some of the data that are toplevel attributes are duplicated # into tags (logger, level, environment, transaction). These are # different from legacy attributes which are normalized into tags # ahead of time (site, server_name). setdefault_path(data, 'tags', value=[]) set_tag(data, 'level', level) if logger_name: set_tag(data, 'logger', logger_name) if environment: set_tag(data, 'environment', environment) if transaction_name: set_tag(data, 'transaction', transaction_name) if release: # dont allow a conflicting 'release' tag pop_tag(data, 'release') release = Release.get_or_create( project=project, version=release, date_added=date, ) set_tag(data, 'sentry:release', release.version) if dist and release: dist = release.add_dist(dist, date) # dont allow a conflicting 'dist' tag pop_tag(data, 'dist') set_tag(data, 'sentry:dist', dist.name) else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag pop_tag(data, 'user') set_tag(data, 'sentry:user', event_user.tag_value) # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(data, project)) normalize_stacktraces_for_grouping(data, grouping_config) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: if get_tag(data, key) is None: set_tag(data, key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): set_tag(data, k, v) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.path, None) # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right paramters. data['fingerprint'] = data.get('fingerprint') or ['{{ default }}'] apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project)) # Here we try to use the grouping config that was requested in the # event. If that config has since been deleted (because it was an # experimental grouping config) we fall back to the default. try: hashes = event.get_hashes() except GroupingConfigNotFound: data['grouping_config'] = get_grouping_config_dict_for_project(project) hashes = event.get_hashes() data['hashes'] = hashes # we want to freeze not just the metadata and type in but also the # derived attributes. The reason for this is that we push this # data into kafka for snuba processing and our postprocessing # picks up the data right from the snuba topic. For most usage # however the data is dynamically overriden by Event.title and # Event.location (See Event.as_dict) materialized_metadata = self.materialize_metadata() event_metadata = materialized_metadata['metadata'] data.update(materialized_metadata) data['culprit'] = culprit # index components into ``Event.message`` # See GH-3248 event.message = self.get_search_message(event_metadata, culprit) received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s')) # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(materialized_metadata) group_metadata['last_received'] = received_timestamp kwargs = { 'platform': platform, 'message': event.message, 'culprit': culprit, 'logger': logger_name, 'level': LOG_LEVELS_MAP.get(level), 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': group_metadata, } if release: kwargs['first_release'] = release try: group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **kwargs ) except HashDiscarded: event_discarded.send_robust( project=project, sender=EventManager, ) metrics.incr( 'events.discarded', skip_internal=True, tags={ 'organization_id': project.organization_id, 'platform': platform, }, ) raise else: event_saved.send_robust( project=project, event_size=event.size, sender=EventManager, ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) # When an event was sampled, the canonical source of truth # is the EventMapping table since we aren't going to be writing out an actual # Event row. Otherwise, if the Event isn't being sampled, we can safely # rely on the Event table itself as the source of truth and ignore # EventMapping since it's redundant information. if is_sample: try: with transaction.atomic(using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': EventMapping.__name__, } ) return event environment = Environment.get_or_create( project=project, name=environment, ) group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={ 'first_release': release if release else None, }, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) ReleaseProjectEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append( (tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, }) ) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update( group=group, environment=environment, ) # Update any event attachment that arrived before the event group was defined. EventAttachment.objects.filter( project_id=project.id, event_id=event_id, ).update( group_id=group.id, ) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': Event.__name__, } ) return event tagstore.delay_index_event_tags( organization_id=project.organization_id, project_id=project.id, group_id=group.id, environment_id=environment.id, event_id=event.id, tags=event.tags, date_added=event.datetime, ) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )), ), timestamp=event.datetime, environment_id=environment.id, ) if release: if is_new: buffer.incr( ReleaseProject, {'new_groups': 1}, { 'release_id': release.id, 'project_id': project.id, } ) if is_new_group_environment: buffer.incr( ReleaseProjectEnvironment, {'new_issues_count': 1}, { 'project_id': project.id, 'release_id': release.id, 'environment_id': environment.id, } ) safe_execute( Group.objects.add_tags, group, environment, event.get_tags(), _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send_robust(project=project, group=group, sender=Project) eventstream.insert( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], # We are choosing to skip consuming the event back # in the eventstream if it's flagged as raw. # This means that we want to publish the event # through the event stream, but we don't care # about post processing and handling the commit. skip_consume=raw, ) metrics.timing( 'events.latency', received_timestamp - recorded_timestamp, tags={ 'project_id': project.id, }, ) metrics.timing( 'events.size.data.post_save', event.size, tags={'project_id': project.id} ) return event