def _parse_args(self, request, environment_id=None): resolution = request.GET.get('resolution') if resolution: resolution = self._parse_resolution(resolution) assert resolution in tsdb.get_rollups() end = request.GET.get('until') if end: end = to_datetime(float(end)) else: end = datetime.utcnow().replace(tzinfo=utc) start = request.GET.get('since') if start: start = to_datetime(float(start)) assert start <= end, 'start must be before or equal to end' else: start = end - timedelta(days=1, seconds=-1) return { 'start': start, 'end': end, 'rollup': resolution, 'environment_ids': environment_id and [environment_id], }
def make_group_generator(random, project): epoch = to_timestamp(datetime(2016, 6, 1, 0, 0, 0, tzinfo=timezone.utc)) for id in itertools.count(1): first_seen = epoch + random.randint(0, 60 * 60 * 24 * 30) last_seen = random.randint(first_seen, first_seen + (60 * 60 * 24 * 30)) culprit = make_culprit(random) level = random.choice(LOG_LEVELS.keys()) message = make_message(random) group = Group( id=id, project=project, culprit=culprit, level=level, message=message, first_seen=to_datetime(first_seen), last_seen=to_datetime(last_seen), status=random.choice((GroupStatus.UNRESOLVED, GroupStatus.RESOLVED, )), data={ 'type': 'default', 'metadata': { 'title': message, } } ) if random.random() < 0.8: group.data = make_group_metadata(random, group) yield group
def test_clean_series_trims_extra(): rollup = 60 n = 5 start = to_datetime(rollup * 0) stop = to_datetime(rollup * n) series = [(rollup * i, i) for i in xrange(0, n + 1)] assert clean_series(start, stop, rollup, series) == series[:n]
def _parse_args(self, request): resolution = request.GET.get('resolution') if resolution: resolution = self._parse_resolution(resolution) assert any(r for r in tsdb.rollups if r[0] == resolution) end = request.GET.get('until') if end: end = to_datetime(float(end)) else: end = datetime.utcnow().replace(tzinfo=utc) start = request.GET.get('since') if start: start = to_datetime(float(start)) assert start <= end, 'start must be before or equal to end' else: start = end - timedelta(days=1, seconds=-1) return { 'start': start, 'end': end, 'rollup': resolution, }
def test_clean_series_rejects_offset_timestamp(): rollup = 60 n = 5 start = to_datetime(rollup * 0) stop = to_datetime(rollup * n) series = [(rollup * (i * 1.1), i) for i in xrange(0, n)] with pytest.raises(AssertionError): clean_series(start, stop, rollup, series)
def get_data(self, model, keys, start, end, rollup=None, environment_ids=None, aggregation='count()', group_on_model=True, group_on_time=False): """ Normalizes all the TSDB parameters and sends a query to snuba. `group_on_time`: whether to add a GROUP BY clause on the 'time' field. `group_on_model`: whether to add a GROUP BY clause on the primary model. """ model_columns = self.model_columns.get(model) if model_columns is None: raise Exception(u"Unsupported TSDBModel: {}".format(model.name)) model_group, model_aggregate = model_columns groupby = [] if group_on_model and model_group is not None: groupby.append(model_group) if group_on_time: groupby.append('time') if aggregation == 'count()' and model_aggregate is not None: # Special case, because count has different semantics, we change: # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate` groupby.append(model_aggregate) model_aggregate = None keys_map = dict(zip(model_columns, self.flatten_keys(keys))) keys_map = {k: v for k, v in six.iteritems(keys_map) if k is not None and v is not None} if environment_ids is not None: keys_map['environment'] = environment_ids aggregations = [[aggregation, model_aggregate, 'aggregate']] # For historical compatibility with bucket-counted TSDB implementations # we grab the original bucketed series and add the rollup time to the # timestamp of the last bucket to get the end time. rollup, series = self.get_optimal_rollup_series(start, end, rollup) start = to_datetime(series[0]) end = to_datetime(series[-1] + rollup) if keys: result = snuba.query(start, end, groupby, None, keys_map, aggregations, rollup, referrer='tsdb', is_grouprelease=(model == TSDBModel.frequent_releases_by_group)) else: result = {} if group_on_time: keys_map['time'] = series self.zerofill(result, groupby, keys_map) self.trim(result, groupby, keys) return result
def test_clean_series(): rollup = 60 n = 5 start = to_datetime(rollup * 0) stop = to_datetime(rollup * n) series = [(rollup * i, i) for i in xrange(0, n)] assert clean_series( start, stop, rollup, series, ) == series
def test_make_counter_key(self): result = self.db.make_counter_key(TSDBModel.project, 1, to_datetime(1368889980), 1, None) assert result == ('ts:1:1368889980:1', 1) result = self.db.make_counter_key( TSDBModel.project, 1, to_datetime(1368889980), 'foo', None) assert result == ('ts:1:1368889980:46', self.db.get_model_key('foo')) result = self.db.make_counter_key(TSDBModel.project, 1, to_datetime(1368889980), 1, 1) assert result == ('ts:1:1368889980:1', '1?e=1') result = self.db.make_counter_key(TSDBModel.project, 1, to_datetime(1368889980), 'foo', 1) assert result == ('ts:1:1368889980:46', self.db.get_model_key('foo') + '?e=1')
def remove_invalid_values(item): timestamp, value = item if timestamp < earliest: value = None elif to_datetime(timestamp) < project.date_added: value = None return (timestamp, value)
def test_hash_discarded_raised(self, mock_refund, mock_incr): project = self.create_project() data = { 'project': project.id, 'platform': 'NOTMATTLANG', 'logentry': { 'formatted': 'test', }, 'event_id': uuid.uuid4().hex, 'extra': { 'foo': 'bar' }, } now = time() mock_save = mock.Mock() mock_save.side_effect = HashDiscarded with mock.patch.object(EventManager, 'save', mock_save): save_event(data=data, start_time=now) mock_incr.assert_called_with([ (tsdb.models.project_total_received_discarded, project.id), (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_blacklisted, project.organization_id), ], timestamp=to_datetime(now), )
def get_recent_mentions(tenant): client = cluster.get_routing_client() key = get_key(tenant) ids = [x for x in client.zrangebyscore( key, time.time() - (RECENT_HOURS * 60), '+inf')][-MAX_RECENT:] with cluster.map() as map_client: items = [map_client.get('%s:%s' % (key, id)) for id in ids] items = [json.loads(x.value) for x in items if x.value is not None] projects = items and dict((x.id, x) for x in Project.objects.filter( pk__in=[x['project'] for x in items], )) or {} groups = items and dict((x.id, x) for x in Group.objects.filter( pk__in=[x['group'] for x in items], )) or {} events = items and dict((x.id, x) for x in Event.objects.filter( pk__in=[x['event'] for x in items if x['event'] is not None], )) or {} for item in items: item['project'] = projects.get(item['project']) item['group'] = groups.get(item['group']) item['event'] = events.get(item['event']) if item['event'] is None and item['group'] is not None: item['event'] = item['group'].get_latest_event() item['last_mentioned'] = to_datetime(item['last_mentioned']) return items
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ reprocessing_active = ProjectOption.objects.get_value( project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT ) # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, 'sentry:sent_failed_event_hint', False ) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={'reprocessing_active': reprocessing_active, 'issues': issues}, ).send_notification() ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'}) error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key}) return True from sentry.models import RawEvent, ProcessingIssue raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp(data['timestamp']).replace(tzinfo=timezone.utc), data=data ) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue['scope'], object=issue['object'], type=issue['type'], data=issue['data'], ) default_cache.delete(cache_key) return True
def get(self, request, group, environment): try: environment = Environment.objects.get( project_id=group.project_id, # XXX(dcramer): we have no great way to pass the empty env name='' if environment == 'none' else environment, ) except Environment.DoesNotExist: raise ResourceDoesNotExist first_release = GroupRelease.objects.filter( group_id=group.id, environment=environment.name, ).order_by('first_seen').first() last_release = GroupRelease.objects.filter( group_id=group.id, environment=environment.name, ).order_by('-first_seen').first() # the current release is the 'latest seen' release within the # environment even if it hasnt affected this issue current_release = GroupRelease.objects.filter( group_id=group.id, environment=environment.name, release_id=ReleaseEnvironment.objects.filter( project_id=group.project_id, environment_id=environment.id, ).order_by('-first_seen').values_list('release_id', flat=True).first(), ).first() last_seen = GroupRelease.objects.filter( group_id=group.id, environment=environment.name, ).order_by('-last_seen').values_list('last_seen', flat=True).first() until = request.GET.get('until') if until: until = to_datetime(float(until)) context = { 'environment': serialize( environment, request.user, GroupEnvironmentWithStatsSerializer( group=group, until=until, ) ), 'firstRelease': serialize(first_release, request.user), 'lastRelease': serialize(last_release, request.user), 'currentRelease': serialize( current_release, request.user, GroupReleaseWithStatsSerializer( until=until, ) ), 'lastSeen': last_seen, 'firstSeen': first_release.first_seen if first_release else None, } return Response(context)
def _convert(x): return { 'type': x['type'], 'timestamp': to_datetime(x['timestamp']), 'level': x.get('level', 'info'), 'message': x.get('message'), 'category': x.get('category'), 'data': x.get('data') or None, 'event_id': x.get('event_id'), }
def make_group_generator(random, project): epoch = to_timestamp(datetime(2016, 6, 1, 0, 0, 0, tzinfo=timezone.utc)) for id in itertools.count(1): first_seen = epoch + random.randint(0, 60 * 60 * 24 * 30) last_seen = random.randint(first_seen, first_seen + (60 * 60 * 24 * 30)) group = Group( id=id, project=project, culprit=make_culprit(random), level=random.choice(LOG_LEVELS.keys()), message=make_message(random), first_seen=to_datetime(first_seen), last_seen=to_datetime(last_seen), ) if random.random() < 0.8: group.data = make_group_metadata(random, group) yield group
def merge_frequencies(self, model, destination, sources, timestamp=None): if not self.enable_frequency_sketches: return rollups = [] for rollup, samples in self.rollups.items(): _, series = self.get_optimal_rollup_series( to_datetime(self.get_earliest_timestamp(rollup, timestamp=timestamp)), end=None, rollup=rollup, ) rollups.append(( rollup, map(to_datetime, series), )) exports = defaultdict(list) for source in sources: for rollup, series in rollups: for timestamp in series: keys = self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), source, ) arguments = ['EXPORT'] + list(self.DEFAULT_SKETCH_PARAMETERS) exports[source].extend([ (CountMinScript, keys, arguments), ('DEL',) + tuple(keys), ]) imports = [] for source, results in self.cluster.execute_commands(exports).items(): results = iter(results) for rollup, series in rollups: for timestamp in series: imports.append(( CountMinScript, self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), destination, ), ['IMPORT'] + list(self.DEFAULT_SKETCH_PARAMETERS) + next(results).value, )) next(results) # pop off the result of DEL self.cluster.execute_commands({ destination: imports, })
def get_registered_devices(self): rv = [] for device in self.config.get('devices') or (): rv.append({ 'timestamp': to_datetime(device['ts']), 'name': device['name'], 'key_handle': device['binding']['keyHandle'], 'app_id': device['binding']['appId'], }) rv.sort(key=lambda x: x['name']) return rv
def _parse_args(self, request): resolution = request.GET.get("resolution") if resolution: resolution = self._parse_resolution(resolution) assert resolution in tsdb.rollups end = request.GET.get("until") if end: end = to_datetime(float(end)) else: end = datetime.utcnow().replace(tzinfo=utc) start = request.GET.get("since") if start: start = to_datetime(float(start)) assert start <= end, "start must be before or equal to end" else: start = end - timedelta(days=1, seconds=-1) return {"start": start, "end": end, "rollup": resolution}
def get_registered_devices(self): rv = [] for device in self.config.get("devices") or (): rv.append( { "timestamp": to_datetime(device["ts"]), "name": device["name"], "key_handle": device["binding"]["keyHandle"], "app_id": device["binding"]["appId"], } ) rv.sort(key=lambda x: x["name"]) return rv
def build_calendar_data(project): start, stop = reports.get_calendar_query_range(interval, 3) rollup = 60 * 60 * 24 series = [] weekend = frozenset((5, 6)) value = int(random.weibullvariate(5000, 3)) for timestamp in tsdb.get_optimal_rollup_series(start, stop, rollup)[1]: damping = random.uniform(0.2, 0.6) if to_datetime(timestamp).weekday in weekend else 1 jitter = random.paretovariate(1.2) series.append((timestamp, int(value * damping * jitter))) value = value * random.uniform(0.25, 2) return reports.clean_calendar_data(project, series, start, stop, rollup, stop)
def get_active_series(self, start=None, end=None, timestamp=None): rollups = {} for rollup, samples in self.rollups.items(): _, series = self.get_optimal_rollup_series( start if start is not None else to_datetime( self.get_earliest_timestamp( rollup, timestamp=timestamp, ), ), end, rollup=rollup, ) rollups[rollup] = map(to_datetime, series) return rollups
def make_release_generator(): id_sequence = itertools.count(1) while True: dt = to_datetime( random.randint( timestamp - (30 * 24 * 60 * 60), timestamp, ), ) p = random.choice(projects) yield Release( id=next(id_sequence), project=p, organization_id=p.organization_id, version=''.join([random.choice('0123456789abcdef') for _ in range(40)]), date_added=dt, )
def to_context(organization, interval, reports): report = reduce(merge_reports, reports.values()) series = [(to_datetime(timestamp), Point(*values)) for timestamp, values in report.series] return { 'series': { 'points': series, 'maximum': max(sum(point) for timestamp, point in series), 'all': sum([sum(point) for timestamp, point in series]), 'resolved': sum([point.resolved for timestamp, point in series]), }, 'distribution': { 'types': list( zip( ( DistributionType( 'New', '#8477e0'), DistributionType( 'Reopened', '#6C5FC7'), DistributionType('Existing', '#534a92'), ), report.issue_summaries, ), ), 'total': sum(report.issue_summaries), }, 'comparisons': [ ('last week', change(report.aggregates[-1], report.aggregates[-2])), ( 'four week average', change( report.aggregates[-1], mean(report.aggregates) if all(v is not None for v in report.aggregates) else None, ) ), ], 'projects': { 'series': build_project_breakdown_series(reports), }, 'calendar': to_calendar( interval, report.calendar_series, ), }
def _convert(x): return { 'type': x['type'], 'timestamp': to_datetime(x['timestamp']), 'data': x['data'], }
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome from sentry.ingest.outcomes_consumer import mark_signal_sent if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") key_id = None if data is None else data.get("key_id") if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save(project_id, assume_normalized=True) # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id, ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) # There is no signal supposed to be sent for this particular # outcome-reason combination. Prevent the outcome consumer from # emitting it for now. # # XXX(markus): Revisit decision about signals once outcomes consumer is stable. mark_signal_sent(project_id, event_id) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id, ) else: if cache_key: # Note that event is now a model, and no longer the data save_attachments(cache_key, event) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has("organizations:event-attachments", event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"])
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') key_id = None if data is None else data.get('key_id') if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'post'}, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) event = manager.save(project_id, assume_normalized=True) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id ) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or \ features.has('organizations:event-attachments', event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing( 'events.time-to-process', time() - start_time, instance=data['platform'])
def alert(request): platform = request.GET.get('platform', 'python') org = Organization( id=1, slug='example', name='Example', ) project = Project( id=1, slug='example', name='Example', organization=org, ) random = get_random(request) group = next( make_group_generator(random, project), ) event = Event( id=1, event_id='44f1419e73884cd2b45c79918f4b6dc4', project=project, group=group, message=group.message, data=load_data(platform), datetime=to_datetime( random.randint( to_timestamp(group.first_seen), to_timestamp(group.last_seen), ), ), ) rule = Rule(label="An example rule") interface_list = [] for interface in six.itervalues(event.interfaces): body = interface.to_email_html(event) if not body: continue interface_list.append((interface.get_title(), mark_safe(body))) return MailPreview( html_template='sentry/emails/error.html', text_template='sentry/emails/error.txt', context={ 'rule': rule, 'group': group, 'event': event, 'link': 'http://example.com/link', 'interfaces': interface_list, 'tags': event.get_tags(), 'project_label': project.slug, 'tags': [ ('logger', 'javascript'), ('environment', 'prod'), ('level', 'error'), ('device', 'Other') ], 'commits': [{ # TODO(dcramer): change to use serializer "repository": {"status": "active", "name": "Example Repo", "url": "https://github.com/example/example", "dateCreated": "2018-02-28T23:39:22.402Z", "provider": {"id": "github", "name": "GitHub"}, "id": "1"}, "score": 2, "subject": "feat: Do something to raven/base.py", "message": "feat: Do something to raven/base.py\naptent vivamus vehicula tempus volutpat hac tortor", "id": "1b17483ffc4a10609e7921ee21a8567bfe0ed006", "shortId": "1b17483", "author": {"username": "******", "isManaged": False, "lastActive": "2018-03-01T18:25:28.149Z", "id": "1", "isActive": True, "has2fa": False, "name": "*****@*****.**", "avatarUrl": "https://secure.gravatar.com/avatar/51567a4f786cd8a2c41c513b592de9f9?s=32&d=mm", "dateJoined": "2018-02-27T22:04:32.847Z", "emails": [{"is_verified": False, "id": "1", "email": "*****@*****.**"}], "avatar": {"avatarUuid": None, "avatarType": "letter_avatar"}, "lastLogin": "******", "email": "*****@*****.**"} }], }, ).render(request)
def get_data( self, model, keys, start, end, rollup=None, environment_ids=None, aggregation="count()", group_on_model=True, group_on_time=False, ): """ Normalizes all the TSDB parameters and sends a query to snuba. `group_on_time`: whether to add a GROUP BY clause on the 'time' field. `group_on_model`: whether to add a GROUP BY clause on the primary model. """ # XXX: to counteract the hack in project_key_stats.py if model in [ TSDBModel.key_total_received, TSDBModel.key_total_blacklisted, TSDBModel.key_total_rejected, ]: keys = list(set(map(lambda x: int(x), keys))) # 10s is the only rollup under an hour that we support if rollup and rollup == 10 and model in self.lower_rollup_query_settings.keys( ): model_query_settings = self.lower_rollup_query_settings.get(model) else: model_query_settings = self.model_query_settings.get(model) if model_query_settings is None: raise Exception(u"Unsupported TSDBModel: {}".format(model.name)) model_group = model_query_settings.groupby model_aggregate = model_query_settings.aggregate groupby = [] if group_on_model and model_group is not None: groupby.append(model_group) if group_on_time: groupby.append("time") if aggregation == "count()" and model_aggregate is not None: # Special case, because count has different semantics, we change: # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate` groupby.append(model_aggregate) model_aggregate = None columns = (model_query_settings.groupby, model_query_settings.aggregate) keys_map = dict(zip(columns, self.flatten_keys(keys))) keys_map = { k: v for k, v in six.iteritems(keys_map) if k is not None and v is not None } if environment_ids is not None: keys_map["environment"] = environment_ids aggregations = [[aggregation, model_aggregate, "aggregate"]] # For historical compatibility with bucket-counted TSDB implementations # we grab the original bucketed series and add the rollup time to the # timestamp of the last bucket to get the end time. rollup, series = self.get_optimal_rollup_series(start, end, rollup) start = to_datetime(series[0]) end = to_datetime(series[-1] + rollup) limit = min(10000, int(len(keys) * ((end - start).total_seconds() / rollup))) if keys: result = snuba.query( dataset=model_query_settings.dataset, start=start, end=end, groupby=groupby, conditions=deepcopy( model_query_settings.conditions ), # copy because we modify the conditions in snuba.query filter_keys=keys_map, aggregations=aggregations, rollup=rollup, limit=limit, referrer="tsdb", is_grouprelease=( model == TSDBModel.frequent_releases_by_group), ) else: result = {} if group_on_time: keys_map["time"] = series self.zerofill(result, groupby, keys_map) self.trim(result, groupby, keys) return result
def build_project_breakdown_series(reports): Key = namedtuple('Key', 'label url color data') def get_legend_data(report): filtered, rate_limited = report.usage_summary return { 'events': sum(sum(value) for timestamp, value in report.series), 'filtered': filtered, 'rate_limited': rate_limited, } # Find the reports with the most total events. (The number of reports to # keep is the same as the number of colors available to use in the legend.) instances = map( operator.itemgetter(0), sorted( reports.items(), key=lambda (instance, report): sum(sum(values) for timestamp, values in report[0]), reverse=True, ), )[:len(colors)] # Starting building the list of items to include in the report chart. This # is a list of [Key, Report] pairs, in *ascending* order of the total sum # of values in the series. (This is so when we render the series, the # largest color blocks are at the bottom and it feels appropriately # weighted.) selections = map( lambda (instance, color): ( Key( instance.slug, instance.get_absolute_url(), color, get_legend_data(reports[instance]), ), reports[instance], ), zip( instances, colors, ), )[::-1] # Collect any reports that weren't in the selection set, merge them # together and add it at the top (front) of the stack. overflow = set(reports) - set(instances) if overflow: overflow_report = reduce( merge_reports, [reports[instance] for instance in overflow], ) selections.insert( 0, (Key('Other', None, '#f2f0fa', get_legend_data(overflow_report)), overflow_report, ) ) def summarize(key, points): total = sum(points) return [(key, total)] if total else [] # Collect all of the independent series into a single series to make it # easier to render, resulting in a series where each value is a sequence of # (key, count) pairs. series = reduce( merge_series, [series_map( functools.partial(summarize, key), report[0], ) for key, report in selections], ) legend = [key for key, value in reversed(selections)] return { 'points': [(to_datetime(timestamp), value) for timestamp, value in series], 'maximum': max(sum(count for key, count in value) for timestamp, value in series), 'legend': { 'rows': legend, 'total': Key( 'Total', None, None, reduce(merge_mappings, [key.data for key in legend]), ), }, }
def datetime(self): return to_datetime(self.timestamp)
def save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas, tsdb from sentry.models import ProjectKey if cache_key: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'}) return with configure_scope() as scope: scope.set_tag("project", project_id) try: manager = EventManager(data) event = manager.save(project_id) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) except HashDiscarded: increment_list = [ (tsdb.models.project_total_received_discarded, project_id), ] try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: pass else: increment_list.extend([ (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_blacklisted, project.organization_id), ]) project_key = None if data.get('key_id') is not None: try: project_key = ProjectKey.objects.get_from_cache(id=data['key_id']) except ProjectKey.DoesNotExist: pass else: increment_list.append((tsdb.models.key_total_blacklisted, project_key.id)) quotas.refund( project, key=project_key, timestamp=start_time, ) tsdb.incr_multi( increment_list, timestamp=to_datetime(start_time) if start_time is not None else None, ) finally: if cache_key: default_cache.delete(cache_key) attachment_cache.delete(cache_key) if start_time: metrics.timing( 'events.time-to-process', time() - start_time, instance=data['platform'])
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') key_id = None if data is None else data.get('key_id') if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'post' }, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) event = manager.save(project_id, assume_normalized=True) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) # This is where we can finally say that we have accepted the event. track_outcome(event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) track_outcome(project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or \ features.has('organizations:event-attachments', event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing('events.time-to-process', time() - start_time, instance=data['platform'])
def get_data( self, model, keys, start, end, rollup=None, environment_ids=None, aggregation="count()", group_on_model=True, group_on_time=False, ): """ Normalizes all the TSDB parameters and sends a query to snuba. `group_on_time`: whether to add a GROUP BY clause on the 'time' field. `group_on_model`: whether to add a GROUP BY clause on the primary model. """ model_columns = self.model_columns.get(model) if model_columns is None: raise Exception(u"Unsupported TSDBModel: {}".format(model.name)) model_group, model_aggregate = model_columns groupby = [] if group_on_model and model_group is not None: groupby.append(model_group) if group_on_time: groupby.append("time") if aggregation == "count()" and model_aggregate is not None: # Special case, because count has different semantics, we change: # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate` groupby.append(model_aggregate) model_aggregate = None keys_map = dict(zip(model_columns, self.flatten_keys(keys))) keys_map = {k: v for k, v in six.iteritems(keys_map) if k is not None and v is not None} if environment_ids is not None: keys_map["environment"] = environment_ids aggregations = [[aggregation, model_aggregate, "aggregate"]] # For historical compatibility with bucket-counted TSDB implementations # we grab the original bucketed series and add the rollup time to the # timestamp of the last bucket to get the end time. rollup, series = self.get_optimal_rollup_series(start, end, rollup) start = to_datetime(series[0]) end = to_datetime(series[-1] + rollup) limit = min(10000, int(len(keys) * ((end - start).total_seconds() / rollup))) if keys: result = snuba.query( start=start, end=end, groupby=groupby, conditions=None, filter_keys=keys_map, aggregations=aggregations, rollup=rollup, limit=limit, referrer="tsdb", is_grouprelease=(model == TSDBModel.frequent_releases_by_group), ) else: result = {} if group_on_time: keys_map["time"] = series self.zerofill(result, groupby, keys_map) self.trim(result, groupby, keys) return result
def get(self, request, group, environment): project = group.project try: environment = Environment.objects.get( projects=project, organization_id=project.organization_id, # XXX(dcramer): we have no great way to pass the empty env name='' if environment == 'none' else environment, ) except Environment.DoesNotExist: raise ResourceDoesNotExist first_release = GroupRelease.objects.filter( group_id=group.id, environment=environment.name, ).order_by('first_seen').first() last_release = GroupRelease.objects.filter( group_id=group.id, environment=environment.name, ).order_by('-first_seen').first() # the current release is the 'latest seen' release within the # environment even if it hasnt affected this issue current_release = GroupRelease.objects.filter( group_id=group.id, environment=environment.name, release_id=ReleaseEnvironment.objects.filter( release_id__in=ReleaseProject.objects.filter( project_id=group.project_id).values_list('release_id', flat=True), organization_id=group.project.organization_id, environment_id=environment.id, ).order_by('-first_seen').values_list('release_id', flat=True).first(), ).first() last_seen = GroupRelease.objects.filter( group_id=group.id, environment=environment.name, ).order_by('-last_seen').values_list('last_seen', flat=True).first() until = request.GET.get('until') if until: until = to_datetime(float(until)) context = { 'environment': serialize( environment, request.user, GroupEnvironmentWithStatsSerializer( group=group, until=until, )), 'firstRelease': serialize(first_release, request.user), 'lastRelease': serialize(last_release, request.user), 'currentRelease': serialize(current_release, request.user, GroupReleaseWithStatsSerializer(until=until, )), 'lastSeen': last_seen, 'firstSeen': first_release.first_seen if first_release else None, } return Response(context)
def digest(request): random = get_random(request) # TODO: Refactor all of these into something more manageable. org = Organization( id=1, slug='example', name='Example Organization', ) team = Team( id=1, slug='example', name='Example Team', organization=org, ) project = Project( id=1, slug='example', name='Example Project', team=team, organization=org, ) rules = { i: Rule( id=i, project=project, label="Rule #%s" % (i, ), ) for i in range(1, random.randint(2, 4)) } state = { 'project': project, 'groups': {}, 'rules': rules, 'event_counts': {}, 'user_counts': {}, } records = [] event_sequence = itertools.count(1) group_generator = make_group_generator(random, project) for i in range(random.randint(1, 30)): group = next(group_generator) state['groups'][group.id] = group offset = timedelta(seconds=0) for i in range(random.randint(1, 10)): offset += timedelta(seconds=random.random() * 120) event = Event(id=next(event_sequence), event_id=uuid.uuid4().hex, project=project, group=group, message=group.message, data=load_data('python'), datetime=to_datetime( random.randint( to_timestamp(group.first_seen), to_timestamp(group.last_seen), ), )) records.append( Record( event.event_id, Notification( event, random.sample(state['rules'], random.randint(1, len(state['rules']))), ), to_timestamp(event.datetime), )) state['event_counts'][group.id] = random.randint(10, 1e4) state['user_counts'][group.id] = random.randint(10, 1e4) digest = build_digest(project, records, state) start, end, counts = get_digest_metadata(digest) context = { 'project': project, 'counts': counts, 'digest': digest, 'start': start, 'end': end, } add_unsubscribe_link(context) return MailPreview( html_template='sentry/emails/digests/body.html', text_template='sentry/emails/digests/body.txt', context=context, ).render(request)
def report(request): from sentry.tasks import reports random = get_random(request) duration = 60 * 60 * 24 * 7 timestamp = to_timestamp( reports.floor_to_utc_day( to_datetime( random.randint( to_timestamp( datetime(2015, 6, 1, 0, 0, 0, tzinfo=timezone.utc)), to_timestamp( datetime(2016, 7, 1, 0, 0, 0, tzinfo=timezone.utc)), )))) start, stop = interval = reports._to_interval(timestamp, duration) organization = Organization( id=1, slug='example', name='Example', ) team = Team( id=1, slug='example', name='Example', organization=organization, ) projects = [] for i in xrange(0, random.randint(1, 8)): name = ' '.join(random.sample(loremipsum.words, random.randint(1, 4))) projects.append( Project( id=i, organization=organization, team=team, slug=slugify(name), name=name, date_added=start - timedelta(days=random.randint(0, 120)), )) def make_release_generator(): id_sequence = itertools.count(1) while True: dt = to_datetime( random.randint( timestamp - (30 * 24 * 60 * 60), timestamp, ), ) p = random.choice(projects) yield Release( id=next(id_sequence), project=p, organization_id=p.organization_id, version=''.join( [random.choice('0123456789abcdef') for _ in range(40)]), date_added=dt, ) def build_issue_summaries(): summaries = [] for i in range(3): summaries.append( int(random.weibullvariate(10, 1) * random.paretovariate(0.5))) return summaries def build_usage_summary(): return ( int(random.weibullvariate(3, 1) * random.paretovariate(0.2)), int(random.weibullvariate(5, 1) * random.paretovariate(0.2)), ) def build_calendar_data(project): start, stop = reports.get_calendar_query_range(interval, 3) rollup = 60 * 60 * 24 series = [] weekend = frozenset((5, 6)) value = int(random.weibullvariate(5000, 3)) for timestamp in tsdb.get_optimal_rollup_series(start, stop, rollup)[1]: damping = random.uniform( 0.2, 0.6) if to_datetime(timestamp).weekday in weekend else 1 jitter = random.paretovariate(1.2) series.append((timestamp, int(value * damping * jitter))) value = value * random.uniform(0.25, 2) return reports.clean_calendar_data(project, series, start, stop, rollup, stop) def build_report(project): daily_maximum = random.randint(1000, 10000) rollup = 60 * 60 * 24 series = [(timestamp + (i * rollup), (random.randint(0, daily_maximum), random.randint(0, daily_maximum))) for i in xrange(0, 7)] aggregates = [ random.randint(0, daily_maximum * 7) if random.random() < 0.9 else None for _ in xrange(0, 4) ] return reports.Report( series, aggregates, build_issue_summaries(), build_usage_summary(), build_calendar_data(project), ) if random.random() < 0.85: personal = { 'resolved': random.randint(0, 100), 'users': int(random.paretovariate(0.2)), } else: personal = { 'resolved': 0, 'users': 0, } return MailPreview( html_template='sentry/emails/reports/body.html', text_template='sentry/emails/reports/body.txt', context={ 'duration': reports.durations[duration], 'interval': { 'start': reports.date_format(start), 'stop': reports.date_format(stop), }, 'report': reports.to_context( organization, interval, {project: build_report(project) for project in projects}), 'organization': organization, 'personal': personal, 'user': request.user, }, ).render(request)
def save_event(cache_key=None, data=None, start_time=None, event_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas, tsdb from sentry.models import ProjectKey if cache_key: data = default_cache.get(cache_key) if event_id is None and data is not None: event_id = data['event_id'] if data is None: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'post' }) return project_id = data.pop('project') delete_raw_event(project_id, event_id, allow_hint_clear=True) Raven.tags_context({ 'project': project_id, }) try: manager = EventManager(data) manager.save(project_id) except HashDiscarded: tsdb.incr( tsdb.models.project_total_received_discarded, project_id, timestamp=to_datetime(start_time) if start_time is not None else None, ) try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: pass else: project_key = None if data.get('key_id') is not None: try: project_key = ProjectKey.objects.get_from_cache( id=data['key_id']) except ProjectKey.DoesNotExist: pass quotas.refund( project, key=project_key, timestamp=start_time, ) finally: if cache_key: default_cache.delete(cache_key) if start_time: metrics.timing('events.time-to-process', time() - start_time, instance=data['platform'])
def _to_interval(timestamp, duration): return (to_datetime(timestamp - duration), to_datetime(timestamp), )
def track_outcome(org_id, project_id, key_id, outcome, reason=None, timestamp=None): """ This is a central point to track org/project counters per incoming event. NB: This should only ever be called once per incoming event, which means it should only be called at the point we know the final outcome for the event (invalid, rate_limited, accepted, discarded, etc.) This increments all the relevant legacy RedisTSDB counters, as well as sending a single metric event to Kafka which can be used to reconstruct the counters with SnubaTSDB. """ global outcomes_publisher if outcomes_publisher is None: outcomes_publisher = QueuedPublisherService( KafkaPublisher(settings.KAFKA_CLUSTERS[outcomes['cluster']])) timestamp = timestamp or to_datetime(time.time()) increment_list = [] if outcome != 'invalid': # This simply preserves old behavior. We never counted invalid events # (too large, duplicate, CORS) toward regular `received` counts. increment_list.extend([ (tsdb.models.project_total_received, project_id), (tsdb.models.organization_total_received, org_id), (tsdb.models.key_total_received, key_id), ]) if outcome == 'filtered': increment_list.extend([ (tsdb.models.project_total_blacklisted, project_id), (tsdb.models.organization_total_blacklisted, org_id), (tsdb.models.key_total_blacklisted, key_id), ]) elif outcome == 'rate_limited': increment_list.extend([ (tsdb.models.project_total_rejected, project_id), (tsdb.models.organization_total_rejected, org_id), (tsdb.models.key_total_rejected, key_id), ]) if reason in FILTER_STAT_KEYS_TO_VALUES: increment_list.append((FILTER_STAT_KEYS_TO_VALUES[reason], project_id)) increment_list = [(model, key) for model, key in increment_list if key is not None] if increment_list: tsdb.incr_multi(increment_list, timestamp=timestamp) # Send a snuba metrics payload. if random.random() <= options.get('snuba.track-outcomes-sample-rate'): outcomes_publisher.publish( outcomes['topic'], json.dumps({ 'timestamp': timestamp, 'org_id': org_id, 'project_id': project_id, 'key_id': key_id, 'outcome': outcome, 'reason': reason, }))
def track_outcome( org_id, project_id, key_id, outcome, reason=None, timestamp=None, event_id=None, category=None, quantity=None, ): """ This is a central point to track org/project counters per incoming event. NB: This should only ever be called once per incoming event, which means it should only be called at the point we know the final outcome for the event (invalid, rate_limited, accepted, discarded, etc.) This increments all the relevant legacy RedisTSDB counters, as well as sending a single metric event to Kafka which can be used to reconstruct the counters with SnubaTSDB. """ global outcomes_publisher if outcomes_publisher is None: outcomes_publisher = KafkaPublisher(settings.KAFKA_CLUSTERS[outcomes["cluster"]]) if quantity is None: quantity = 1 assert isinstance(org_id, six.integer_types) assert isinstance(project_id, six.integer_types) assert isinstance(key_id, (type(None), six.integer_types)) assert isinstance(outcome, Outcome) assert isinstance(timestamp, (type(None), datetime)) assert isinstance(category, (type(None), DataCategory)) assert isinstance(quantity, int) timestamp = timestamp or to_datetime(time.time()) tsdb_in_consumer = decide_tsdb_in_consumer() if not tsdb_in_consumer: increment_list = list( tsdb_increments_from_outcome( org_id=org_id, project_id=project_id, key_id=key_id, outcome=outcome, reason=reason ) ) if increment_list: tsdb.incr_multi(increment_list, timestamp=timestamp) if project_id and event_id: mark_tsdb_incremented(project_id, event_id) # Send a snuba metrics payload. outcomes_publisher.publish( outcomes["topic"], json.dumps( { "timestamp": timestamp, "org_id": org_id, "project_id": project_id, "key_id": key_id, "outcome": outcome.value, "reason": reason, "event_id": event_id, "category": category, "quantity": quantity, } ), ) metrics.incr( "events.outcomes", skip_internal=True, tags={"outcome": outcome.name.lower(), "reason": reason}, )
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None, reprocessing_rev=None): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ reprocessing_active = ProjectOption.objects.get_value( project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT) # In case there is reprocessing active but the current reprocessing # revision is already different than when we started, we want to # immediately retry the event. This resolves the problem when # otherwise a concurrent change of debug symbols might leave a # reprocessing issue stuck in the project forever. if reprocessing_active and \ reprocessing.get_reprocessing_revision(project_id, cached=False) != \ reprocessing_rev: raise RetryProcessing() # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, 'sentry:sent_failed_event_hint', False) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={ 'reprocessing_active': reprocessing_active, 'issues': issues }, ).send_notification() ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'}) error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key}) return True from sentry.models import RawEvent, ProcessingIssue raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp( data['timestamp']).replace(tzinfo=timezone.utc), data=data) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue['scope'], object=issue['object'], type=issue['type'], data=issue['data'], ) default_cache.delete(cache_key) return True
def track_outcome(org_id, project_id, key_id, outcome, reason=None, timestamp=None, event_id=None): """ This is a central point to track org/project counters per incoming event. NB: This should only ever be called once per incoming event, which means it should only be called at the point we know the final outcome for the event (invalid, rate_limited, accepted, discarded, etc.) This increments all the relevant legacy RedisTSDB counters, as well as sending a single metric event to Kafka which can be used to reconstruct the counters with SnubaTSDB. """ global outcomes_publisher if outcomes_publisher is None: outcomes_publisher = QueuedPublisherService( KafkaPublisher(settings.KAFKA_CLUSTERS[outcomes["cluster"]]) ) assert isinstance(org_id, six.integer_types) assert isinstance(project_id, six.integer_types) assert isinstance(key_id, (type(None), six.integer_types)) assert isinstance(outcome, Outcome) assert isinstance(timestamp, (type(None), datetime)) timestamp = timestamp or to_datetime(time.time()) increment_list = [] if outcome != Outcome.INVALID: # This simply preserves old behavior. We never counted invalid events # (too large, duplicate, CORS) toward regular `received` counts. increment_list.extend( [ (tsdb.models.project_total_received, project_id), (tsdb.models.organization_total_received, org_id), (tsdb.models.key_total_received, key_id), ] ) if outcome == Outcome.FILTERED: increment_list.extend( [ (tsdb.models.project_total_blacklisted, project_id), (tsdb.models.organization_total_blacklisted, org_id), (tsdb.models.key_total_blacklisted, key_id), ] ) elif outcome == Outcome.RATE_LIMITED: increment_list.extend( [ (tsdb.models.project_total_rejected, project_id), (tsdb.models.organization_total_rejected, org_id), (tsdb.models.key_total_rejected, key_id), ] ) if reason in FILTER_STAT_KEYS_TO_VALUES: increment_list.append((FILTER_STAT_KEYS_TO_VALUES[reason], project_id)) increment_list = [(model, key) for model, key in increment_list if key is not None] if increment_list: tsdb.incr_multi(increment_list, timestamp=timestamp) # Send a snuba metrics payload. outcomes_publisher.publish( outcomes["topic"], json.dumps( { "timestamp": timestamp, "org_id": org_id, "project_id": project_id, "key_id": key_id, "outcome": outcome.value, "reason": reason, "event_id": event_id, } ), ) metrics.incr( "events.outcomes", skip_internal=True, tags={"outcome": outcome.name.lower(), "reason": reason}, )
def digest(request): random = get_random(request) # TODO: Refactor all of these into something more manageable. org = Organization(id=1, slug="example", name="Example Organization") project = Project(id=1, slug="example", name="Example Project", organization=org) rules = { i: Rule(id=i, project=project, label="Rule #%s" % (i,)) for i in range(1, random.randint(2, 4)) } state = { "project": project, "groups": {}, "rules": rules, "event_counts": {}, "user_counts": {}, } records = [] event_sequence = itertools.count(1) group_generator = make_group_generator(random, project) for i in range(random.randint(1, 30)): group = next(group_generator) state["groups"][group.id] = group offset = timedelta(seconds=0) for i in range(random.randint(1, 10)): offset += timedelta(seconds=random.random() * 120) event = Event( id=next(event_sequence), event_id=uuid.uuid4().hex, project=project, group=group, message=group.message, data=load_data("python"), datetime=to_datetime( random.randint(to_timestamp(group.first_seen), to_timestamp(group.last_seen)) ), ) records.append( Record( event.event_id, Notification( event, random.sample(state["rules"], random.randint(1, len(state["rules"]))) ), to_timestamp(event.datetime), ) ) state["event_counts"][group.id] = random.randint(10, 1e4) state["user_counts"][group.id] = random.randint(10, 1e4) digest = build_digest(project, records, state) start, end, counts = get_digest_metadata(digest) context = { "project": project, "counts": counts, "digest": digest, "start": start, "end": end, "referrer": "digest_email", } add_unsubscribe_link(context) return MailPreview( html_template="sentry/emails/digests/body.html", text_template="sentry/emails/digests/body.txt", context=context, ).render(request)
def datetime(self) -> Optional[datetime]: return to_datetime(self.timestamp)
def process_event(event_manager, project, key, remote_addr, helper, attachments): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() tsdb_start_time = to_datetime(start_time) should_filter, filter_reason = event_manager.should_filter() if should_filter: increment_list = [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_blacklisted, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_blacklisted, key.id), ] try: increment_list.append( (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id)) # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES except KeyError: pass tsdb.incr_multi( increment_list, timestamp=tsdb_start_time, ) metrics.incr('events.blacklisted', tags={'reason': filter_reason}, skip_internal=False) event_filtered.send_robust( ip=remote_addr, project=project, sender=process_event, ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug('Dropped event due to error with rate limiter') tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_rejected, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_rejected, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_rejected, key.id), ], timestamp=tsdb_start_time, ) metrics.incr( 'events.dropped', tags={ 'reason': rate_limit.reason_code if rate_limit else 'unknown', }, skip_internal=False, ) event_dropped.send_robust( ip=remote_addr, project=project, reason_code=rate_limit.reason_code if rate_limit else None, sender=process_event, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.key_total_received, key.id), ], timestamp=tsdb_start_time, ) org_options = OrganizationOption.objects.get_all_values( project.organization_id) data = event_manager.get_data() del event_manager event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( project.id, event_id, ) if cache.get(cache_key) is not None: raise APIForbidden('An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = ( org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = (org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, [])) exclude_fields_key = 'sentry:safe_fields' exclude_fields = (org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, [])) scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=process_event, ) return event_id
def merge_frequencies(self, model, destination, sources, timestamp=None, environment_ids=None): environment_ids = list((set(environment_ids) if environment_ids is not None else set()).union([None])) self.validate_arguments([model], environment_ids) if not self.enable_frequency_sketches: return rollups = [] for rollup, samples in self.rollups.items(): _, series = self.get_optimal_rollup_series( to_datetime( self.get_earliest_timestamp(rollup, timestamp=timestamp)), end=None, rollup=rollup, ) rollups.append((rollup, map(to_datetime, series))) for (cluster, durable ), environment_ids in self.get_cluster_groups(environment_ids): exports = defaultdict(list) for source in sources: for rollup, series in rollups: for timestamp in series: keys = [] for environment_id in environment_ids: keys.extend( self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), source, environment_id)) arguments = ["EXPORT"] + list( self.DEFAULT_SKETCH_PARAMETERS) exports[source].extend([(CountMinScript, keys, arguments), ["DEL"] + keys]) try: responses = cluster.execute_commands(exports) except Exception: if durable: raise else: continue imports = [] for source, results in responses.items(): results = iter(results) for rollup, series in rollups: for timestamp in series: for environment_id, payload in zip( environment_ids, next(results).value): imports.append(( CountMinScript, self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), destination, environment_id, ), ["IMPORT"] + list(self.DEFAULT_SKETCH_PARAMETERS) + [payload], )) next(results) # pop off the result of DEL try: cluster.execute_commands({destination: imports}) except Exception: if durable: raise
def get_constrained_date_range( params, allowed_resolution: AllowedResolution = AllowedResolution.one_hour, max_points=MAX_POINTS, ) -> Tuple[datetime, datetime, int]: interval = parse_stats_period(params.get("interval", "1h")) interval = int(3600 if interval is None else interval.total_seconds()) smallest_interval, interval_str = allowed_resolution.value if interval % smallest_interval != 0 or interval < smallest_interval: raise InvalidParams( f"The interval has to be a multiple of the minimum interval of {interval_str}." ) if interval > ONE_DAY: raise InvalidParams("The interval has to be less than one day.") if ONE_DAY % interval != 0: raise InvalidParams( "The interval should divide one day without a remainder.") using_minute_resolution = interval % ONE_HOUR != 0 start, end = get_date_range_from_params(params) now = get_now() # if `end` is explicitly given, we add a second to it, so it is treated as # inclusive. the rounding logic down below will take care of the rest. if params.get("end"): end += timedelta(seconds=1) date_range = end - start # round the range up to a multiple of the interval. # the minimum is 1h so the "totals" will not go out of sync, as they will # use the materialized storage due to no grouping on the `started` column. # NOTE: we can remove the difference between `interval` / `rounding_interval` # as soon as snuba can provide us with grouped totals in the same query # as the timeseries (using `WITH ROLLUP` in clickhouse) rounding_interval = int(math.ceil(interval / ONE_HOUR) * ONE_HOUR) # Hack to disabled rounding interval for metrics-based queries: if interval < ONE_MINUTE: rounding_interval = interval date_range = timedelta( seconds=int(rounding_interval * math.ceil(date_range.total_seconds() / rounding_interval))) if using_minute_resolution: if date_range.total_seconds() > 6 * ONE_HOUR: raise InvalidParams( "The time-range when using one-minute resolution intervals is restricted to 6 hours." ) if (now - start).total_seconds() > 30 * ONE_DAY: raise InvalidParams( "The time-range when using one-minute resolution intervals is restricted to the last 30 days." ) if date_range.total_seconds() / interval > max_points: raise InvalidParams( "Your interval and date range would create too many results. " "Use a larger interval, or a smaller date range.") end_ts = int(rounding_interval * math.ceil(to_timestamp(end) / rounding_interval)) end = to_datetime(end_ts) # when expanding the rounding interval, we would adjust the end time too far # to the future, in which case the start time would not actually contain our # desired date range. adjust for this by extend the time by another interval. # for example, when "45m" means the range from 08:49:00-09:34:00, our rounding # has to go from 08:00:00 to 10:00:00. if rounding_interval > interval and (end - date_range) > start: date_range += timedelta(seconds=rounding_interval) start = end - date_range # snuba <-> sentry has a 5 minute cache for *exact* queries, which these # are because of the way we do our rounding. For that reason we round the end # of "realtime" queries to one minute into the future to get a one-minute cache instead. if end > now: end = to_datetime(ONE_MINUTE * (math.floor(to_timestamp(now) / ONE_MINUTE) + 1)) return start, end, interval
def process(self, request, project, key, auth, helper, data, attachments=None, **kwargs): metrics.incr('events.total') if not data: raise APIError('No JSON data was found') remote_addr = request.META['REMOTE_ADDR'] data = LazyData( data=data, content_encoding=request.META.get('HTTP_CONTENT_ENCODING', ''), helper=helper, project=project, key=key, auth=auth, client_ip=remote_addr, ) event_received.send_robust( ip=remote_addr, project=project, sender=type(self), ) start_time = time() tsdb_start_time = to_datetime(start_time) should_filter, filter_reason = helper.should_filter( project, data, ip_address=remote_addr) if should_filter: increment_list = [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_blacklisted, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_blacklisted, key.id), ] try: increment_list.append( (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id)) # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES except KeyError: pass tsdb.incr_multi( increment_list, timestamp=tsdb_start_time, ) metrics.incr('events.blacklisted', tags={ 'reason': filter_reason}) event_filtered.send_robust( ip=remote_addr, project=project, sender=type(self), ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason,)) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute( quotas.is_rate_limited, project=project, key=key, _with_transaction=False ) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: helper.log.debug( 'Dropped event due to error with rate limiter') tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_rejected, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_rejected, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_rejected, key.id), ], timestamp=tsdb_start_time, ) metrics.incr( 'events.dropped', tags={ 'reason': rate_limit.reason_code if rate_limit else 'unknown', } ) event_dropped.send_robust( ip=remote_addr, project=project, sender=type(self), reason_code=rate_limit.reason_code if rate_limit else None, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.key_total_received, key.id), ], timestamp=tsdb_start_time, ) org_options = OrganizationOption.objects.get_all_values( project.organization_id) event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % (project.id, event_id, ) if cache.get(cache_key) is not None: raise APIForbidden( 'An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = (org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = ( org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, []) ) exclude_fields_key = 'sentry:safe_fields' exclude_fields = ( org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, []) ) scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) helper.log.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=type(self), ) return event_id
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None, reprocessing_rev=None): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ reprocessing_active = ProjectOption.objects.get_value( project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT ) # In case there is reprocessing active but the current reprocessing # revision is already different than when we started, we want to # immediately retry the event. This resolves the problem when # otherwise a concurrent change of debug symbols might leave a # reprocessing issue stuck in the project forever. if reprocessing_active and \ reprocessing.get_reprocessing_revision(project_id, cached=False) != \ reprocessing_rev: raise RetryProcessing() # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, 'sentry:sent_failed_event_hint', False ) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={'reprocessing_active': reprocessing_active, 'issues': issues}, ).send_notification() ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'}, skip_internal=False) error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key}) return True data = CanonicalKeyDict(data) from sentry.models import RawEvent, ProcessingIssue raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp(data['timestamp']).replace(tzinfo=timezone.utc), data=data ) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue['scope'], object=issue['object'], type=issue['type'], data=issue['data'], ) default_cache.delete(cache_key) return True
def merge_frequencies(self, model, destination, sources, timestamp=None, environment_ids=None): environment_ids = list( (set(environment_ids) if environment_ids is not None else set()).union( [None])) self.validate_arguments([model], environment_ids) if not self.enable_frequency_sketches: return rollups = [] for rollup, samples in self.rollups.items(): _, series = self.get_optimal_rollup_series( to_datetime(self.get_earliest_timestamp(rollup, timestamp=timestamp)), end=None, rollup=rollup, ) rollups.append((rollup, map(to_datetime, series), )) for (cluster, durable), environment_ids in self.get_cluster_groups(environment_ids): exports = defaultdict(list) for source in sources: for rollup, series in rollups: for timestamp in series: keys = [] for environment_id in environment_ids: keys.extend( self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), source, environment_id, ) ) arguments = ['EXPORT'] + list(self.DEFAULT_SKETCH_PARAMETERS) exports[source].extend( [ (CountMinScript, keys, arguments), ['DEL'] + keys, ] ) try: responses = cluster.execute_commands(exports) except Exception: if durable: raise else: continue imports = [] for source, results in responses.items(): results = iter(results) for rollup, series in rollups: for timestamp in series: for environment_id, payload in zip(environment_ids, next(results).value): imports.append( ( CountMinScript, self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), destination, environment_id, ), ['IMPORT'] + list(self.DEFAULT_SKETCH_PARAMETERS) + [payload], ), ) next(results) # pop off the result of DEL try: cluster.execute_commands({ destination: imports, }) except Exception: if durable: raise
def get_project_release_stats(project_id, release, stat, rollup, start, end, environments=None): assert stat in ("users", "sessions") # since snuba end queries are exclusive of the time and we're bucketing to # a full hour, we need to round to the next hour since snuba is exclusive # on the end. end = to_datetime((to_timestamp(end) // DATASET_BUCKET + 1) * DATASET_BUCKET) filter_keys = {"project_id": [project_id]} conditions = [["release", "=", release]] if environments is not None: conditions.append(["environment", "IN", environments]) buckets = int((end - start).total_seconds() / rollup) stats = _make_stats(start, rollup, buckets, default=None) # Due to the nature of the probabilistic data structures some # subtractions can become negative. As such we're making sure a number # never goes below zero to avoid confusion. totals = { stat: 0, stat + "_healthy": 0, stat + "_crashed": 0, stat + "_abnormal": 0, stat + "_errored": 0, } for rv in raw_query( dataset=Dataset.Sessions, selected_columns=[ "bucketed_started", stat, stat + "_crashed", stat + "_abnormal", stat + "_errored", "duration_quantiles", ], groupby=["bucketed_started"], start=start, end=end, rollup=rollup, conditions=conditions, filter_keys=filter_keys, referrer="sessions.release-stats-details", )["data"]: ts = parse_snuba_datetime(rv["bucketed_started"]) bucket = int((ts - start).total_seconds() / rollup) stats[bucket][1] = { stat: rv[stat], stat + "_healthy": max(0, rv[stat] - rv[stat + "_errored"]), stat + "_crashed": rv[stat + "_crashed"], stat + "_abnormal": rv[stat + "_abnormal"], stat + "_errored": max( 0, rv[stat + "_errored"] - rv[stat + "_crashed"] - rv[stat + "_abnormal"] ), } stats[bucket][1].update(extract_duration_quantiles(rv)) # Session stats we can sum up directly without another query # as the data becomes available. if stat == "sessions": for k in totals: totals[k] += stats[bucket][1][k] for idx, bucket in enumerate(stats): if bucket[1] is None: stats[idx][1] = { stat: 0, stat + "_healthy": 0, stat + "_crashed": 0, stat + "_abnormal": 0, stat + "_errored": 0, "duration_p50": None, "duration_p90": None, } # For users we need a secondary query over the entire time range if stat == "users": rows = raw_query( dataset=Dataset.Sessions, selected_columns=["users", "users_crashed", "users_abnormal", "users_errored"], start=start, end=end, conditions=conditions, filter_keys=filter_keys, referrer="sessions.crash-free-breakdown-users", )["data"] if rows: rv = rows[0] totals = { "users": rv["users"], "users_healthy": max(0, rv["users"] - rv["users_errored"]), "users_crashed": rv["users_crashed"], "users_abnormal": rv["users_abnormal"], "users_errored": max( 0, rv["users_errored"] - rv["users_crashed"] - rv["users_abnormal"] ), } return stats, totals
def merge_distinct_counts(self, model, destination, sources, timestamp=None): rollups = {} for rollup, samples in self.rollups.items(): _, series = self.get_optimal_rollup_series( to_datetime( self.get_earliest_timestamp(rollup, timestamp=timestamp)), end=None, rollup=rollup, ) rollups[rollup] = map(to_datetime, series) temporary_id = uuid.uuid1().hex def make_temporary_key(key): return '{}{}:{}'.format(self.prefix, temporary_id, key) data = {} for rollup, series in rollups.items(): data[rollup] = {timestamp: [] for timestamp in series} with self.cluster.fanout() as client: for source in sources: c = client.target_key(source) for rollup, series in data.items(): for timestamp, results in series.items(): key = self.make_key( model, rollup, to_timestamp(timestamp), source, ) results.append(c.get(key)) c.delete(key) with self.cluster.fanout() as client: c = client.target_key(destination) temporary_key_sequence = itertools.count() for rollup, series in data.items(): for timestamp, results in series.items(): values = {} for result in results: if result.value is None: continue k = make_temporary_key(next(temporary_key_sequence)) values[k] = result.value if values: key = self.make_key( model, rollup, to_timestamp(timestamp), destination, ) c.mset(values) c.pfmerge(key, key, *values.keys()) c.delete(*values.keys()) c.expireat( key, self.calculate_expiry( rollup, self.rollups[rollup], timestamp, ), )
def create_failed_event( cache_key, data, project_id, issues, event_id, start_time=None, reprocessing_rev=None ): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ # We can only create failed events for events that can potentially # create failed events. if not reprocessing.event_supports_reprocessing(data): return False reprocessing_active = ProjectOption.objects.get_value( project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT ) # In case there is reprocessing active but the current reprocessing # revision is already different than when we started, we want to # immediately retry the event. This resolves the problem when # otherwise a concurrent change of debug symbols might leave a # reprocessing issue stuck in the project forever. if ( reprocessing_active and reprocessing.get_reprocessing_revision(project_id, cached=False) != reprocessing_rev ): raise RetryProcessing() # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, "sentry:sent_failed_event_hint", False ) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={"reprocessing_active": reprocessing_active, "issues": issues}, ).send_notification() ProjectOption.objects.set_value(project, "sentry:sent_failed_event_hint", True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = default_cache.get(cache_key) if data is None: metrics.incr("events.failed", tags={"reason": "cache", "stage": "raw"}, skip_internal=False) error_logger.error("process.failed_raw.empty", extra={"cache_key": cache_key}) return True data = CanonicalKeyDict(data) from sentry.models import RawEvent, ProcessingIssue raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp(data["timestamp"]).replace(tzinfo=timezone.utc), data=data, ) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue["scope"], object=issue["object"], type=issue["type"], data=issue["data"], ) default_cache.delete(cache_key) return True
def alert(request): platform = request.GET.get('platform', 'python') org = Organization( id=1, slug='example', name='Example', ) team = Team( id=1, slug='example', name='Example', organization=org, ) project = Project( id=1, slug='example', name='Example', team=team, organization=org, ) random = get_random(request) group = next(make_group_generator(random, project), ) event = Event( id=1, event_id='44f1419e73884cd2b45c79918f4b6dc4', project=project, group=group, message=group.message, data=load_data(platform), datetime=to_datetime( random.randint( to_timestamp(group.first_seen), to_timestamp(group.last_seen), ), ), ) rule = Rule(label="An example rule") interface_list = [] for interface in six.itervalues(event.interfaces): body = interface.to_email_html(event) if not body: continue interface_list.append((interface.get_title(), mark_safe(body))) return MailPreview( html_template='sentry/emails/error.html', text_template='sentry/emails/error.txt', context={ 'rule': rule, 'group': group, 'event': event, 'link': 'http://example.com/link', 'interfaces': interface_list, 'tags': event.get_tags(), 'project_label': project.name, 'tags': [('logger', 'javascript'), ('environment', 'prod'), ('level', 'error'), ('device', 'Other')] }, ).render(request)
def build_project_breakdown_series(reports): def get_legend_data(report): accepted_errors, accepted_transactions, filtered, rate_limited = report.series_outcomes return { "accepted_errors": accepted_errors, "accepted_transactions": accepted_transactions, "filtered": filtered, "rate_limited": rate_limited, } # Find the reports with the most total events. (The number of reports to # keep is the same as the number of colors available to use in the legend.) instances = map( operator.itemgetter(0), sorted( reports.items(), key=lambda instance__report: sum( sum(values) for timestamp, values in instance__report[1][0]), reverse=True, ), )[:len(project_breakdown_colors)] # Starting building the list of items to include in the report chart. This # is a list of [Key, Report] pairs, in *ascending* order of the total sum # of values in the series. (This is so when we render the series, the # largest color blocks are at the bottom and it feels appropriately # weighted.) selections = map( lambda instance__color: ( Key( instance__color[0].slug, instance__color[0].get_absolute_url(), instance__color[1], get_legend_data(reports[instance__color[0]]), ), reports[instance__color[0]], ), zip(instances, project_breakdown_colors), )[::-1] # Collect any reports that weren't in the selection set, merge them # together and add it at the top (front) of the stack. overflow = set(reports) - set(instances) if overflow: overflow_report = reduce(merge_reports, [reports[instance] for instance in overflow]) selections.insert( 0, (Key("Other", None, "#f2f0fa", get_legend_data(overflow_report)), overflow_report)) def summarize(key, points): total = sum(points) return [(key, total)] if total else [] # Collect all of the independent series into a single series to make it # easier to render, resulting in a series where each value is a sequence of # (key, count) pairs. series = reduce( merge_series, [ series_map(partial(summarize, key), report[0]) for key, report in selections ], ) legend = [key for key, value in reversed(selections)] return { "points": [(to_datetime(timestamp), value) for timestamp, value in series], "maximum": max(sum(count for key, count in value) for timestamp, value in series), "legend": { "rows": legend, "total": Key("Total", None, total_color, reduce(merge_mappings, [key.data for key in legend])), }, }
def get_data(self, model, keys, start, end, rollup=None, environment_id=None, aggregation='count()', group_on_model=True, group_on_time=False): """ Normalizes all the TSDB parameters and sends a query to snuba. `group_on_time`: whether to add a GROUP BY clause on the 'time' field. `group_on_model`: whether to add a GROUP BY clause on the primary model. """ model_columns = self.model_columns.get(model) if model_columns is None: raise Exception("Unsupported TSDBModel: {}".format(model.name)) model_group, model_aggregate = model_columns groupby = [] if group_on_model and model_group is not None: groupby.append(model_group) if group_on_time: groupby.append('time') if aggregation == 'count()' and model_aggregate is not None: # Special case, because count has different semantics, we change: # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate` groupby.append(model_aggregate) model_aggregate = None keys_map = dict(zip(model_columns, self.flatten_keys(keys))) keys_map = { k: v for k, v in six.iteritems(keys_map) if k is not None and v is not None } if environment_id is not None: keys_map['environment'] = [environment_id] aggregations = [[aggregation, model_aggregate, 'aggregate']] # For historical compatibility with bucket-counted TSDB implementations # we grab the original bucketed series and add the rollup time to the # timestamp of the last bucket to get the end time. rollup, series = self.get_optimal_rollup_series(start, end, rollup) start = to_datetime(series[0]) end = to_datetime(series[-1] + rollup) result = snuba.query(start, end, groupby, None, keys_map, aggregations, rollup, referrer='tsdb') if group_on_time: keys_map['time'] = series self.zerofill(result, groupby, keys_map) self.trim(result, groupby, keys) return result
def _to_interval(timestamp, duration): return ( to_datetime(timestamp - duration), to_datetime(timestamp), )
def digest(request): random = get_random(request) # TODO: Refactor all of these into something more manageable. org = Organization(id=1, slug="example", name="Example Organization") project = Project(id=1, slug="example", name="Example Project", organization=org) rules = { i: Rule(id=i, project=project, label="Rule #%s" % (i, )) for i in range(1, random.randint(2, 4)) } state = { "project": project, "groups": {}, "rules": rules, "event_counts": {}, "user_counts": {}, } records = [] group_generator = make_group_generator(random, project) for i in range(random.randint(1, 30)): group = next(group_generator) state["groups"][group.id] = group offset = timedelta(seconds=0) for i in range(random.randint(1, 10)): offset += timedelta(seconds=random.random() * 120) data = dict(load_data("python")) data["message"] = group.message data.pop("logentry", None) event_manager = EventManager(data) event_manager.normalize() data = event_manager.get_data() timestamp = to_datetime( random.randint(to_timestamp(group.first_seen), to_timestamp(group.last_seen))) event = SnubaEvent({ "event_id": uuid.uuid4().hex, "project_id": project.id, "group_id": group.id, "message": group.message, "data": data.data, "timestamp": timestamp.strftime("%Y-%m-%dT%H:%M:%S"), }) event.group = group records.append( Record( event.event_id, Notification( event, random.sample(state["rules"], random.randint(1, len(state["rules"])))), to_timestamp(event.datetime), )) state["event_counts"][group.id] = random.randint(10, 1e4) state["user_counts"][group.id] = random.randint(10, 1e4) digest = build_digest(project, records, state) start, end, counts = get_digest_metadata(digest) context = { "project": project, "counts": counts, "digest": digest, "start": start, "end": end, "referrer": "digest_email", } add_unsubscribe_link(context) return MailPreview( html_template="sentry/emails/digests/body.html", text_template="sentry/emails/digests/body.txt", context=context, ).render(request)