def test_integration(self): Project.objects.all().delete() now = datetime(2016, 9, 12, tzinfo=pytz.utc) project = self.create_project( organization=self.organization, team=self.team, date_added=now - timedelta(days=90), ) tsdb.incr( tsdb.models.project, project.id, now - timedelta(days=1), ) member_set = set(project.team.member_set.all()) with self.tasks(), \ mock.patch.object(tsdb, 'get_earliest_timestamp') as get_earliest_timestamp: # Ensure ``get_earliest_timestamp`` is relative to the fixed # "current" timestamp -- this prevents filtering out data points # that would be considered expired relative to the *actual* current # timestamp. get_earliest_timestamp.return_value = to_timestamp(now - timedelta(days=60)) prepare_reports(timestamp=to_timestamp(now)) assert len(mail.outbox) == len(member_set) == 1 message = mail.outbox[0] assert self.organization.name in message.subject
def test_get_optimal_rollup_series_aligned_intervals(self, now): now.return_value = datetime(2016, 8, 1, tzinfo=pytz.utc) start = now() - timedelta(seconds=30) assert self.tsdb.get_optimal_rollup_series(start) == ( 10, [to_timestamp(start + timedelta(seconds=10) * i) for i in xrange(4)], ) start = now() - timedelta(minutes=30) assert self.tsdb.get_optimal_rollup_series(start) == ( ONE_MINUTE, [to_timestamp(start + timedelta(minutes=1) * i) for i in xrange(31)], ) start = now() - timedelta(hours=5) assert self.tsdb.get_optimal_rollup_series(start) == ( ONE_HOUR, [to_timestamp(start + timedelta(hours=1) * i) for i in xrange(6)], ) start = now() - timedelta(days=7) assert self.tsdb.get_optimal_rollup_series(start) == ( ONE_DAY, [to_timestamp(start + timedelta(hours=24) * i) for i in xrange(8)], )
def test_get_optimal_rollup_series_offset_intervals(self, now): # This test is a funny one (notice it doesn't return a range that # includes the start position.) This occurs because the algorithm for # determining the series to be returned will attempt to return the same # duration of time as represented by the start and end timestamps, but # doesn't necessarily return data *from that specific interval* (the # end timestamp is always included.) now.return_value = datetime(2016, 8, 1, 0, 0, 15, tzinfo=pytz.utc) start = now() - timedelta(seconds=19) assert self.tsdb.get_optimal_rollup_series(start, rollup=10) == ( 10, [ to_timestamp(datetime(2016, 8, 1, 0, 0, 0, tzinfo=pytz.utc)), to_timestamp(datetime(2016, 8, 1, 0, 0, 10, tzinfo=pytz.utc)), ] ) now.return_value = datetime(2016, 8, 1, 0, 0, 30, tzinfo=pytz.utc) start = now() - timedelta(seconds=ONE_MINUTE - 1) assert self.tsdb.get_optimal_rollup_series(start, rollup=ONE_MINUTE) == ( ONE_MINUTE, [to_timestamp(datetime(2016, 8, 1, 0, 0, 0, tzinfo=pytz.utc))] ) now.return_value = datetime(2016, 8, 1, 12, tzinfo=pytz.utc) start = now() - timedelta(seconds=ONE_DAY - 1) assert self.tsdb.get_optimal_rollup_series(start, rollup=ONE_DAY) == ( ONE_DAY, [to_timestamp(datetime(2016, 8, 1, 0, tzinfo=pytz.utc))] )
def merge_distinct_counts(self, model, destination, sources, timestamp=None): rollups = self.get_active_series(timestamp=timestamp) temporary_id = uuid.uuid1().hex def make_temporary_key(key): return '{}{}:{}'.format(self.prefix, temporary_id, key) data = {} for rollup, series in rollups.items(): data[rollup] = {timestamp: [] for timestamp in series} with self.cluster.fanout() as client: for source in sources: c = client.target_key(source) for rollup, series in data.items(): for timestamp, results in series.items(): key = self.make_key( model, rollup, to_timestamp(timestamp), source, ) results.append(c.get(key)) c.delete(key) with self.cluster.fanout() as client: c = client.target_key(destination) temporary_key_sequence = itertools.count() for rollup, series in data.items(): for timestamp, results in series.items(): values = {} for result in results: if result.value is None: continue k = make_temporary_key(next(temporary_key_sequence)) values[k] = result.value if values: key = self.make_key( model, rollup, to_timestamp(timestamp), destination, ) c.mset(values) c.pfmerge(key, key, *values.keys()) c.delete(*values.keys()) c.expireat( key, self.calculate_expiry( rollup, self.rollups[rollup], timestamp, ), )
def merge_frequencies(self, model, destination, sources, timestamp=None): if not self.enable_frequency_sketches: return rollups = [] for rollup, samples in self.rollups.items(): _, series = self.get_optimal_rollup_series( to_datetime(self.get_earliest_timestamp(rollup, timestamp=timestamp)), end=None, rollup=rollup, ) rollups.append(( rollup, map(to_datetime, series), )) exports = defaultdict(list) for source in sources: for rollup, series in rollups: for timestamp in series: keys = self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), source, ) arguments = ['EXPORT'] + list(self.DEFAULT_SKETCH_PARAMETERS) exports[source].extend([ (CountMinScript, keys, arguments), ('DEL',) + tuple(keys), ]) imports = [] for source, results in self.cluster.execute_commands(exports).items(): results = iter(results) for rollup, series in rollups: for timestamp in series: imports.append(( CountMinScript, self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), destination, ), ['IMPORT'] + list(self.DEFAULT_SKETCH_PARAMETERS) + next(results).value, )) next(results) # pop off the result of DEL self.cluster.execute_commands({ destination: imports, })
def get_optimal_rollup(self, start_timestamp, end_timestamp): """ Identify the lowest granularity rollup available within the given time range. """ num_seconds = int(to_timestamp(end_timestamp)) - int(to_timestamp(start_timestamp)) # calculate the highest rollup within time range for rollup, samples in self.rollups: if rollup * samples >= num_seconds: return rollup return self.rollups[-1][0]
def delete_distinct_counts(self, models, keys, start=None, end=None, timestamp=None, environment_ids=None): self.validate_arguments(models, environment_ids) environment_ids = ( set(environment_ids) if environment_ids is not None else set()).union( [None]) rollups = self.get_active_series(start, end, timestamp) for cluster, environment_ids in self.get_cluster_groups(environment_ids): with cluster.fanout() as client: for rollup, series in rollups.items(): for timestamp in series: for model in models: for key in keys: c = client.target_key(key) for environment_id in environment_ids: c.delete( self.make_key( model, rollup, to_timestamp(timestamp), key, environment_id, ) )
def normalize_crumb(cls, crumb): ty = crumb.get('type') or 'default' ts = parse_new_timestamp(crumb.get('timestamp')) if ts is None: raise InterfaceValidationError('Unable to determine timestamp ' 'for crumb') rv = { 'type': ty, 'timestamp': to_timestamp(ts), } level = crumb.get('level') if level not in (None, 'info'): rv['level'] = level msg = crumb.get('message') if msg is not None: rv['message'] = trim(unicode(msg), 4096) category = crumb.get('category') if category is not None: rv['category'] = trim(unicode(category), 256) event_id = crumb.get('event_id') if event_id is not None: rv['event_id'] = event_id if 'data' in crumb: rv['data'] = trim(crumb['data'], 4096) return rv
def normalize_crumb(cls, crumb): ty = crumb.get('type') or 'default' ts = parse_timestamp(crumb.get('timestamp')) if ts is None: raise InterfaceValidationError('Unable to determine timestamp ' 'for crumb') rv = { 'type': ty, 'timestamp': to_timestamp(ts), } level = crumb.get('level') if level not in (None, 'info'): rv['level'] = level msg = crumb.get('message') if msg is not None: rv['message'] = trim(six.text_type(msg), 4096) category = crumb.get('category') if category is not None: rv['category'] = trim(six.text_type(category), 256) event_id = crumb.get('event_id') if event_id is not None: rv['event_id'] = event_id if crumb.get('data'): for key, value in six.iteritems(crumb['data']): if not isinstance(value, six.string_types): crumb['data'][key] = json.dumps(value) rv['data'] = trim(crumb['data'], 4096) return rv
def delete_frequencies(self, models, keys, start=None, end=None, timestamp=None, environment_ids=None): environment_ids = ( set(environment_ids) if environment_ids is not None else set()).union( [None]) self.validate_arguments(models, environment_ids) rollups = self.get_active_series(start, end, timestamp) for (cluster, durable), environment_ids in self.get_cluster_groups(environment_ids): manager = cluster.fanout() if not durable: manager = SuppressionWrapper(manager) with manager as client: for rollup, series in rollups.items(): for timestamp in series: for model in models: for key in keys: c = client.target_key(key) for environment_id in environment_ids: for k in self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), key, environment_id ): c.delete(k)
def record_multi(self, items, timestamp=None): """ Record an occurence of an item in a distinct counter. """ if timestamp is None: timestamp = timezone.now() ts = int(to_timestamp(timestamp)) # ``timestamp`` is not actually a timestamp :( with self.cluster.fanout() as client: for model, key, values in items: c = client.target_key(key) for rollup, max_values in six.iteritems(self.rollups): k = self.make_key( model, rollup, ts, key, ) c.pfadd(k, *values) c.expireat( k, self.calculate_expiry( rollup, max_values, timestamp, ), )
def test_releases_request(self): now = parse_datetime('2018-03-09T01:00:00Z') project = self.create_project() release = Release.objects.create( organization_id=self.organization.id, version='version X', date_added=now, ) release.add_project(project) dts = [now + timedelta(hours=i) for i in range(4)] with responses.RequestsMock() as rsps: def snuba_response(request): body = json.loads(request.body) assert body['aggregations'] == [['count()', None, 'aggregate']] assert body['project'] == [project.id] assert body['groupby'] == ['release', 'time'] assert ['release', 'IN', ['version X']] in body['conditions'] return (200, {}, json.dumps({ 'data': [{'release': 'version X', 'time': '2018-03-09T01:00:00Z', 'aggregate': 100}], 'meta': [{'name': 'release'}, {'name': 'time'}, {'name': 'aggregate'}] })) rsps.add_callback( responses.POST, settings.SENTRY_SNUBA + '/query', callback=snuba_response) results = self.db.get_range( TSDBModel.release, [release.id], dts[0], dts[-1], rollup=3600) assert results == { release.id: [ (int(to_timestamp(d)), 100 if d == now else 0) for d in dts] }
def test_environment_request(self): now = parse_datetime('2018-03-09T01:00:00Z') project = self.create_project() env = self.create_environment(project=project, name="prod") dts = [now + timedelta(hours=i) for i in range(4)] with responses.RequestsMock() as rsps: def snuba_response(request): body = json.loads(request.body) assert body['aggregations'] == [['count()', None, 'aggregate']] assert body['project'] == [project.id] assert body['groupby'] == ['project_id', 'time'] assert ['environment', 'IN', ['prod']] in body['conditions'] return (200, {}, json.dumps({ 'data': [{'project_id': project.id, 'time': '2018-03-09T01:00:00Z', 'aggregate': 100}], 'meta': [{'name': 'project_id'}, {'name': 'time'}, {'name': 'aggregate'}] })) rsps.add_callback( responses.POST, settings.SENTRY_SNUBA + '/query', callback=snuba_response) results = self.db.get_range(TSDBModel.project, [project.id], dts[0], dts[-1], environment_id=env.id, rollup=3600) assert results == { project.id: [ (int(to_timestamp(d)), 100 if d == now else 0) for d in dts] }
def get_range(self, model, keys, start, end, rollup=None, environment_id=None): """ To get a range of data for group ID=[1, 2, 3]: >>> now = timezone.now() >>> get_keys(TimeSeriesModel.group, [1, 2, 3], >>> start=now - timedelta(days=1), >>> end=now) """ self.validate_arguments([model], [environment_id]) rollup, series = self.get_optimal_rollup_series(start, end, rollup) series = map(to_datetime, series) results = [] cluster, _ = self.get_cluster(environment_id) with cluster.map() as client: for key in keys: for timestamp in series: hash_key, hash_field = self.make_counter_key( model, rollup, timestamp, key, environment_id) results.append( (to_timestamp(timestamp), key, client.hget( hash_key, hash_field))) results_by_key = defaultdict(dict) for epoch, key, count in results: results_by_key[key][epoch] = int(count.value or 0) for key, points in six.iteritems(results_by_key): results_by_key[key] = sorted(points.items()) return dict(results_by_key)
def test_integration(self): Project.objects.all().delete() now = datetime(2016, 9, 12, tzinfo=pytz.utc) project = self.create_project( organization=self.organization, team=self.team, date_added=now - timedelta(days=90), ) tsdb.incr( tsdb.models.project, project.id, now - timedelta(days=1), ) member_set = set(project.team.member_set.all()) with self.tasks(): prepare_reports(timestamp=to_timestamp(now)) assert len(mail.outbox) == len(member_set) == 1 message = mail.outbox[0] assert self.organization.name in message.subject
def make_group_generator(random, project): epoch = to_timestamp(datetime(2016, 6, 1, 0, 0, 0, tzinfo=timezone.utc)) for id in itertools.count(1): first_seen = epoch + random.randint(0, 60 * 60 * 24 * 30) last_seen = random.randint(first_seen, first_seen + (60 * 60 * 24 * 30)) culprit = make_culprit(random) level = random.choice(LOG_LEVELS.keys()) message = make_message(random) group = Group( id=id, project=project, culprit=culprit, level=level, message=message, first_seen=to_datetime(first_seen), last_seen=to_datetime(last_seen), status=random.choice((GroupStatus.UNRESOLVED, GroupStatus.RESOLVED, )), data={ 'type': 'default', 'metadata': { 'title': message, } } ) if random.random() < 0.8: group.data = make_group_metadata(random, group) yield group
def record_multi(self, items, timestamp=None, environment_id=None): """ Record an occurence of an item in a distinct counter. """ self.validate_arguments([model for model, key, values in items], [environment_id]) if timestamp is None: timestamp = timezone.now() ts = int(to_timestamp(timestamp)) # ``timestamp`` is not actually a timestamp :( for cluster, environment_ids in self.get_cluster_groups(set([None, environment_id])): with cluster.fanout() as client: for model, key, values in items: c = client.target_key(key) for rollup, max_values in six.iteritems(self.rollups): for environment_id in environment_ids: k = self.make_key( model, rollup, ts, key, environment_id, ) c.pfadd(k, *values) c.expireat( k, self.calculate_expiry( rollup, max_values, timestamp, ), )
def serialize(self): return { 'uuid': b64encode(self.uuid.bytes), 'timestamp': to_timestamp(self.datetime), 'type': self.type, 'data': self.data, }
def record_frequency_multi(self, requests, timestamp=None): if timestamp is None: timestamp = timezone.now() ts = int(to_timestamp(timestamp)) # ``timestamp`` is not actually a timestamp :( commands = {} for model, request in requests: for key, items in request.iteritems(): keys = [] expirations = {} # Figure out all of the keys we need to be incrementing, as # well as their expiration policies. for rollup, max_values in self.rollups: chunk = self.make_frequency_table_keys(model, rollup, ts, key) keys.extend(chunk) expiry = self.calculate_expiry(rollup, max_values, timestamp) for k in chunk: expirations[k] = expiry arguments = ['INCR'] + list(self.DEFAULT_SKETCH_PARAMETERS) for member, score in items.items(): arguments.extend((score, member)) # Since we're essentially merging dictionaries, we need to # append this to any value that already exists at the key. cmds = commands.setdefault(key, []) cmds.append((CountMinScript, keys, arguments)) for k, t in expirations.items(): cmds.append(('EXPIREAT', k, t)) self.cluster.execute_commands(commands)
def test_range_rollups(self): # Daily daystart = self.now.replace(hour=0) # day buckets start on day boundaries dts = [daystart + timedelta(days=i) for i in range(2)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400 ) == { self.proj1.id: [ (timestamp(dts[0]), 24), (timestamp(dts[1]), 0) ] } # Minutely dts = [self.now + timedelta(minutes=i) for i in range(120)] # Expect every 10th minute to have a 1, else 0 expected = [(to_timestamp(d), int(i % 10 == 0)) for i, d in enumerate(dts)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60 ) == { self.proj1.id: expected }
def test_integration(self, has_feature): Project.objects.all().delete() now = datetime(2016, 9, 12, tzinfo=pytz.utc) has_feature.side_effect = lambda name, *a, **k: { 'organizations:reports:deliver': True, 'organizations:reports:prepare': True, }.get(name, False) project = self.create_project( organization=self.organization, team=self.team, ) tsdb.incr( tsdb.models.project, project.id, now - timedelta(days=1), ) member_set = set(project.team.member_set.all()) with self.tasks(): prepare_reports(timestamp=to_timestamp(now)) assert len(mail.outbox) == len(member_set) == 1 message = mail.outbox[0] assert self.organization.name in message.subject
def zerofill(data, start, end, rollup): rv = [] start = ((int(to_timestamp(start)) / rollup) * rollup) - rollup end = ((int(to_timestamp(end)) / rollup) * rollup) + rollup i = 0 for key in six.moves.xrange(start, end, rollup): try: if data[i][0] == key: rv.append(data[i]) i += 1 continue except IndexError: pass rv.append((key, [])) return rv
def test_get_group_backfill_attributes(self): now = datetime(2017, 5, 3, 6, 6, 6, tzinfo=pytz.utc) assert get_group_backfill_attributes( get_caches(), Group( active_at=now, first_seen=now, last_seen=now, platform='javascript', message='Hello from JavaScript', level=logging.INFO, score=Group.calculate_score(3, now), logger='javascript', times_seen=1, first_release=None, culprit='', data={ 'type': 'default', 'last_received': to_timestamp(now), 'metadata': {}, }, ), [ Event( platform='python', message='Hello from Python', datetime=now - timedelta(hours=1), data={ 'type': 'default', 'metadata': {}, 'tags': [ ['level', 'error'], ['logger', 'python'], ], }, ), Event( platform='java', message='Hello from Java', datetime=now - timedelta(hours=2), data={ 'type': 'default', 'metadata': {}, 'tags': [ ['level', 'debug'], ['logger', 'java'], ], }, ), ], ) == { 'active_at': now - timedelta(hours=2), 'first_seen': now - timedelta(hours=2), 'platform': 'java', 'score': Group.calculate_score(3, now), 'logger': 'java', 'times_seen': 3, 'first_release': None, }
def _fill_default_parameters(timestamp=None, rollup=None): if timestamp is None: timestamp = to_timestamp(floor_to_utc_day(timezone.now())) if rollup is None: rollup = 60 * 60 * 24 * 7 return (timestamp, rollup)
def get_data_for_date(date): dt = datetime(date.year, date.month, date.day, tzinfo=pytz.utc) ts = to_timestamp(dt) value = series_value_map.get(ts, None) return (dt, { 'value': value, 'color': value_color_map[value], })
def get_tag_value_paginator_for_projects(self, projects, environments, key, start, end, query=None, order_by='-last_seen'): from sentry.api.paginator import SequencePaginator if not order_by == '-last_seen': raise ValueError("Unsupported order_by: %s" % order_by) snuba_key = snuba.get_snuba_column_name(key) conditions = [] if snuba_key in BLACKLISTED_COLUMNS: snuba_key = 'tags[%s]' % (key,) if query: conditions.append([snuba_key, 'LIKE', u'%{}%'.format(query)]) else: conditions.append([snuba_key, '!=', '']) filters = { 'project_id': projects, } if environments: filters['environment'] = environments results = snuba.query( start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ['count()', '', 'times_seen'], ['min', 'timestamp', 'first_seen'], ['max', 'timestamp', 'last_seen'], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer='tagstore.get_tag_value_paginator_for_projects', ) tag_values = [ TagValue( key=key, value=value, **fix_tag_value_data(data) ) for value, data in six.iteritems(results) ] desc = order_by.startswith('-') score_field = order_by.lstrip('-') return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc )
def normalize_to_epoch(self, timestamp, seconds): """ Given a ``timestamp`` (datetime object) normalize to an epoch timestamp. i.e. if the rollup is minutes, the resulting timestamp would have the seconds and microseconds rounded down. """ epoch = int(to_timestamp(timestamp)) return epoch - (epoch % seconds)
def event_to_record(event, rules): if not rules: logger.warning('Creating record for %r that does not contain any rules!', event) return Record( event.event_id, Notification(strip_for_serialization(event), [rule.id for rule in rules]), to_timestamp(event.datetime), )
def clean_series(start, stop, rollup, series): """ Validate a series, ensuring that it follows the specified rollup and boundaries. The start bound is inclusive, while the stop bound is exclusive (similar to the slice operation.) """ start_timestamp = to_timestamp(start) stop_timestamp = to_timestamp(stop) result = [] for i, (timestamp, value) in enumerate(series): assert timestamp == start_timestamp + rollup * i if timestamp >= stop_timestamp: break result.append((timestamp, value)) return result
def scoreclause_sql(sc, connection): db = getattr(connection, 'alias', 'default') has_values = sc.last_seen is not None and sc.times_seen is not None if is_postgres(db): if has_values: sql = 'log(times_seen + %d) * 600 + %d' % (sc.times_seen, to_timestamp(sc.last_seen)) else: sql = 'log(times_seen) * 600 + last_seen::abstime::int' elif is_mysql(db): if has_values: sql = 'log(times_seen + %d) * 600 + %d' % (sc.times_seen, to_timestamp(sc.last_seen)) else: sql = 'log(times_seen) * 600 + unix_timestamp(last_seen)' else: # XXX: if we cant do it atomically let's do it the best we can sql = int(sc) return (sql, [])
def query(start, end, groupby, conditions=None, filter_keys=None, aggregation=None, aggregateby=None, rollup=None, arrayjoin=None): """ Sends a query to snuba. `conditions`: A list of (column, operator, literal) conditions to be passed to the query. Conditions that we know will not have to be translated should be passed this way (eg tag[foo] = bar). `filter_keys`: A dictionary of {col: [key, ...]} that will be converted into "col IN (key, ...)" conditions. These are used to restrict the query to known sets of project/issue/environment/release etc. Appropriate translations (eg. from environment model ID to environment name) are performed on the query, and the inverse translation performed on the result. The project_id(s) to restrict the query to will also be automatically inferred from these keys. """ groupby = groupby or [] conditions = conditions or [] filter_keys = filter_keys or {} # Forward and reverse translation maps from model ids to snuba keys, per column snuba_map = { col: get_snuba_map(col, keys) for col, keys in six.iteritems(filter_keys) } snuba_map = { k: v for k, v in six.iteritems(snuba_map) if k is not None and v is not None } rev_snuba_map = { col: dict(reversed(i) for i in keys.items()) for col, keys in six.iteritems(snuba_map) } for col, keys in six.iteritems(filter_keys): if col in snuba_map: keys = [snuba_map[col][k] for k in keys] conditions.append((col, 'IN', keys)) if 'project_id' in filter_keys: # If we are given a set of project ids, use those directly. project_ids = filter_keys['project_id'] elif filter_keys: # Otherwise infer the project_ids from any related models ids = [get_related_project_ids(k, filter_keys[k]) for k in filter_keys] project_ids = list(set.union(*map(set, ids))) else: project_ids = [] if not project_ids: raise Exception( "No project_id filter, or none could be inferred from other filters." ) # If the grouping, aggregation, or any of the conditions reference `issue` # we need to fetch the issue definitions (issue -> fingerprint hashes) get_issues = 'issue' in groupby + [aggregateby ] + [c[0] for c in conditions] issues = get_project_issues( project_ids, filter_keys.get('issue')) if get_issues else None url = '{0}/query'.format(SNUBA) request = { k: v for k, v in six.iteritems({ 'from_date': start.isoformat(), 'to_date': end.isoformat(), 'conditions': conditions, 'groupby': groupby, 'project': project_ids, 'aggregation': aggregation, 'aggregateby': aggregateby, 'granularity': rollup, 'issues': issues, 'arrayjoin': arrayjoin, }) if v is not None } response = requests.post(url, data=json.dumps(request)) # TODO handle error responses response = json.loads(response.text) # Validate and scrub response, and translate snuba keys back to IDs expected_cols = groupby + ['aggregate'] assert all(c['name'] in expected_cols for c in response['meta']) for d in response['data']: if 'time' in d: d['time'] = int(to_timestamp(parse_datetime(d['time']))) if d['aggregate'] is None: d['aggregate'] = 0 for col in rev_snuba_map: if col in d: d[col] = rev_snuba_map[col][d[col]] return nest_groups(response['data'], groupby)
def normalize_crumb(cls, crumb, rust_renormalized): if rust_renormalized: crumb = dict(crumb) ts = parse_timestamp(crumb.get('timestamp')) if ts: crumb['timestamp'] = to_timestamp(ts) else: crumb['timestamp'] = None for key in ( 'type', 'level', 'message', 'category', 'event_id', 'data', ): crumb.setdefault(key, None) return crumb ty = crumb.get('type') or 'default' level = crumb.get('level') if not isinstance(level, six.string_types) or \ (level not in LOG_LEVELS_MAP and level != 'critical'): level = 'info' ts = parse_timestamp(crumb.get('timestamp')) if ts is None: raise InterfaceValidationError( 'Unable to determine timestamp for crumb') ts = to_timestamp(ts) msg = crumb.get('message') if msg is not None: msg = trim(six.text_type(msg), 4096) category = crumb.get('category') if category is not None: category = trim(six.text_type(category), 256) event_id = crumb.get('event_id') data = crumb.get('data') if not isinstance(data, dict): # TODO(dcramer): we dont want to discard the the rest of the # crumb, but it'd be nice if we could record an error # raise InterfaceValidationError( # 'The ``data`` on breadcrumbs must be a mapping (received {})'.format( # type(crumb['data']), # ) # ) data = None else: data = trim(data, 4096) return { 'type': ty, 'level': level, 'timestamp': ts, 'message': msg, 'category': category, 'event_id': event_id, 'data': data }
def load_data( platform, default=None, sample_name=None, timestamp=None, start_timestamp=None, trace=None, span_id=None, spans=None, ): # NOTE: Before editing this data, make sure you understand the context # in which its being used. It is NOT only used for local development and # has production consequences. # * bin/load-mocks to generate fake data for local testing # * When a new project is created, a fake event is generated as a "starter" # event so it's not an empty project. # * When a user clicks Test Configuration from notification plugin settings page, # a fake event is generated to go through the pipeline. data = None language = None platform_data = INTEGRATION_ID_TO_PLATFORM_DATA.get(platform) if platform_data is not None and platform_data["type"] != "language": language = platform_data["language"] samples_root = os.path.join(DATA_ROOT, "samples") all_samples = {f for f in os.listdir(samples_root) if f.endswith(".json")} for platform in (platform, language, default): if not platform: continue # Verify by checking if the file is within our folder explicitly # avoids being able to have a name that invokes traversing directories. json_path = f"{platform}.json" if json_path not in all_samples: continue if not sample_name: try: sample_name = INTEGRATION_ID_TO_PLATFORM_DATA[platform]["name"] except KeyError: pass # XXX: At this point, it's assumed that `json_path` was safely found # within `samples_root` due to the check above and cannot traverse # into paths. with open(os.path.join(samples_root, json_path)) as fp: data = json.load(fp) break if data is None: return data = CanonicalKeyDict(data) if platform in ("csp", "hkpk", "expectct", "expectstaple"): return data # Generate a timestamp in the present. if timestamp is None: timestamp = datetime.utcnow() - timedelta(minutes=1) timestamp = timestamp - timedelta(microseconds=timestamp.microsecond % 1000) timestamp = timestamp.replace(tzinfo=pytz.utc) data.setdefault("timestamp", to_timestamp(timestamp)) if data.get("type") == "transaction": if start_timestamp is None: start_timestamp = timestamp - timedelta(seconds=3) else: start_timestamp = start_timestamp.replace(tzinfo=pytz.utc) data["start_timestamp"] = to_timestamp(start_timestamp) if trace is None: trace = uuid4().hex if span_id is None: span_id = uuid4().hex[:16] for tag in data["tags"]: if tag[0] == "trace": tag[1] = trace elif tag[0] == "trace.span": tag[1] = span_id data["contexts"]["trace"]["trace_id"] = trace data["contexts"]["trace"]["span_id"] = span_id if spans: data["spans"] = spans for span in data.get("spans", []): # Use data to generate span timestamps consistently and based # on event timestamp duration = span.get("data", {}).get("duration", 10.0) offset = span.get("data", {}).get("offset", 0) # Span doesn't have a parent, make it the transaction if span.get("parent_span_id") is None: span["parent_span_id"] = span_id if span.get("span_id") is None: span["span_id"] = uuid4().hex[:16] span_start = data["start_timestamp"] + offset span["trace_id"] = trace span.setdefault("start_timestamp", span_start) span.setdefault("timestamp", span_start + duration) measurements = data.get("measurements") if measurements: measurement_markers = {} for key, entry in measurements.items(): if key in ["fp", "fcp", "lcp", "fid"]: measurement_markers[f"mark.{key}"] = { "value": round(data["start_timestamp"] + entry["value"] / 1000, 3) } measurements.update(measurement_markers) data["platform"] = platform # XXX: Message is a legacy alias for logentry. Do not overwrite if set. if "message" not in data: data["message"] = f"This is an example {sample_name or platform} exception" data.setdefault( "user", generate_user(ip_address="127.0.0.1", username="******", id=1, email="*****@*****.**"), ) data.setdefault( "extra", { "session": {"foo": "bar"}, "results": [1, 2, 3, 4, 5], "emptyList": [], "emptyMap": {}, "length": 10837790, "unauthorized": False, "url": "http://example.org/foo/bar/", }, ) data.setdefault("modules", {"my.package": "1.0.0"}) data.setdefault( "request", { "cookies": "foo=bar;biz=baz", "url": "http://example.com/foo", "headers": { "Referer": "http://example.com", "Content-Type": "application/json", "User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36", }, "env": {"ENV": "prod"}, "query_string": "foo=bar", "data": '{"hello": "world"}', "method": "GET", }, ) return data
def get_project_release_stats(project_id, release, stat, rollup, start, end, environments=None): assert stat in ("users", "sessions") # since snuba end queries are exclusive of the time and we're bucketing to # a full hour, we need to round to the next hour since snuba is exclusive # on the end. end = to_datetime( (to_timestamp(end) // DATASET_BUCKET + 1) * DATASET_BUCKET) filter_keys = {"project_id": [project_id]} conditions = [["release", "=", release]] if environments is not None: conditions.append(["environment", "IN", environments]) buckets = int((end - start).total_seconds() / rollup) stats = _make_stats(start, rollup, buckets, default=None) # Due to the nature of the probabilistic data structures some # subtractions can become negative. As such we're making sure a number # never goes below zero to avoid confusion. totals = { stat: 0, stat + "_healthy": 0, stat + "_crashed": 0, stat + "_abnormal": 0, stat + "_errored": 0, } for rv in raw_query( dataset=Dataset.Sessions, selected_columns=[ "bucketed_started", stat, stat + "_crashed", stat + "_abnormal", stat + "_errored", "duration_quantiles", ], groupby=["bucketed_started"], start=start, end=end, rollup=rollup, conditions=conditions, filter_keys=filter_keys, referrer="sessions.release-stats-details", )["data"]: ts = parse_snuba_datetime(rv["bucketed_started"]) bucket = int((ts - start).total_seconds() / rollup) stats[bucket][1] = { stat: rv[stat], stat + "_healthy": max(0, rv[stat] - rv[stat + "_errored"]), stat + "_crashed": rv[stat + "_crashed"], stat + "_abnormal": rv[stat + "_abnormal"], stat + "_errored": max( 0, rv[stat + "_errored"] - rv[stat + "_crashed"] - rv[stat + "_abnormal"]), } stats[bucket][1].update(extract_duration_quantiles(rv)) # Session stats we can sum up directly without another query # as the data becomes available. if stat == "sessions": for k in totals: totals[k] += stats[bucket][1][k] for idx, bucket in enumerate(stats): if bucket[1] is None: stats[idx][1] = { stat: 0, stat + "_healthy": 0, stat + "_crashed": 0, stat + "_abnormal": 0, stat + "_errored": 0, "duration_p50": None, "duration_p90": None, } # For users we need a secondary query over the entire time range if stat == "users": rows = raw_query( dataset=Dataset.Sessions, selected_columns=[ "users", "users_crashed", "users_abnormal", "users_errored" ], start=start, end=end, conditions=conditions, filter_keys=filter_keys, referrer="sessions.crash-free-breakdown-users", )["data"] if rows: rv = rows[0] totals = { "users": rv["users"], "users_healthy": max(0, rv["users"] - rv["users_errored"]), "users_crashed": rv["users_crashed"], "users_abnormal": rv["users_abnormal"], "users_errored": max( 0, rv["users_errored"] - rv["users_crashed"] - rv["users_abnormal"]), } return stats, totals
def setup_fixture(self): self.timestamp = to_timestamp( datetime.datetime(2021, 1, 14, 12, 27, 28, tzinfo=pytz.utc)) self.received = self.timestamp self.session_started = self.timestamp // 60 * 60 self.organization1 = self.organization self.organization2 = self.create_organization() self.organization3 = self.create_organization() self.project1 = self.project self.project2 = self.create_project() self.project3 = self.create_project() self.project4 = self.create_project(organization=self.organization2) self.user2 = self.create_user(is_superuser=False) self.create_member(user=self.user2, organization=self.organization1, role="member", teams=[]) self.create_member(user=self.user, organization=self.organization3, role="admin", teams=[]) self.create_environment(self.project2, name="development") template = { "distinct_id": "00000000-0000-0000-0000-000000000000", "status": "exited", "seq": 0, "release": "[email protected]", "environment": "production", "retention_days": 90, "duration": None, "errors": 0, "started": self.session_started, "received": self.received, } def make_session(project, **kwargs): return dict( template, session_id=uuid4().hex, org_id=project.organization_id, project_id=project.id, **kwargs, ) self.store_session(make_session(self.project1)) self.store_session(make_session(self.project1, release="[email protected]")) self.store_session( make_session(self.project1, started=self.session_started - 60 * 60)) self.store_session( make_session(self.project1, started=self.session_started - 12 * 60 * 60)) self.store_session(make_session(self.project2, status="crashed")) self.store_session( make_session(self.project2, environment="development")) self.store_session( make_session(self.project3, errors=1, release="[email protected]")) self.store_session( make_session( self.project3, distinct_id="39887d89-13b2-4c84-8c23-5d13d2102664", started=self.session_started - 60 * 60, )) self.store_session( make_session(self.project3, distinct_id="39887d89-13b2-4c84-8c23-5d13d2102664", errors=1)) self.store_session(make_session(self.project4))
def convert_search_filter_to_snuba_query( search_filter: SearchFilter, key: Optional[str] = None, params: Optional[Mapping[str, Union[int, str, datetime]]] = None, ) -> Optional[Sequence[any]]: name = search_filter.key.name if key is None else key value = search_filter.value.value # We want to use group_id elsewhere so shouldn't be removed from the dataset # but if a user has a tag with the same name we want to make sure that works if name in {"group_id"}: name = f"tags[{name}]" if name in NO_CONVERSION_FIELDS: return elif name in key_conversion_map: return key_conversion_map[name](search_filter, name, params) elif name in ARRAY_FIELDS and search_filter.value.is_wildcard(): # Escape and convert meta characters for LIKE expressions. raw_value = search_filter.value.raw_value like_value = raw_value.replace("%", "\\%").replace("_", "\\_").replace("*", "%") operator = "LIKE" if search_filter.operator == "=" else "NOT LIKE" return [name, operator, like_value] elif name in ARRAY_FIELDS and search_filter.is_in_filter: operator = "=" if search_filter.operator == "IN" else "!=" # XXX: This `arrayConcat` usage is unnecessary, but we need it in place to # trick the legacy Snuba language into not treating `name` as a # function. Once we switch over to snql it can be removed. return [ ["hasAny", [["arrayConcat", [name]], ["array", [f"'{v}'" for v in value]]]], operator, 1, ] elif name in ARRAY_FIELDS and search_filter.value.raw_value == "": return [["notEmpty", [name]], "=", 1 if search_filter.operator == "!=" else 0] else: # timestamp{,.to_{hour,day}} need a datetime string # last_seen needs an integer if isinstance(value, datetime) and name not in { "timestamp", "timestamp.to_hour", "timestamp.to_day", }: value = int(to_timestamp(value)) * 1000 # Validate event ids are uuids if name == "id": if search_filter.value.is_wildcard(): raise InvalidSearchQuery("Wildcard conditions are not permitted on `id` field.") elif not search_filter.value.is_event_id(): raise InvalidSearchQuery(INVALID_EVENT_DETAILS.format("Filter")) # most field aliases are handled above but timestamp.to_{hour,day} are # handled here if name in FIELD_ALIASES: name = FIELD_ALIASES[name].get_expression(params) # Tags are never null, but promoted tags are columns and so can be null. # To handle both cases, use `ifNull` to convert to an empty string and # compare so we need to check for empty values. if search_filter.key.is_tag: name = ["ifNull", [name, "''"]] # Handle checks for existence if search_filter.operator in ("=", "!=") and search_filter.value.value == "": if search_filter.key.is_tag: return [name, search_filter.operator, value] else: # If not a tag, we can just check that the column is null. return [["isNull", [name]], search_filter.operator, 1] is_null_condition = None # TODO(wmak): Skip this for all non-nullable keys not just event.type if ( search_filter.operator in ("!=", "NOT IN") and not search_filter.key.is_tag and name != "event.type" ): # Handle null columns on inequality comparisons. Any comparison # between a value and a null will result to null, so we need to # explicitly check for whether the condition is null, and OR it # together with the inequality check. # We don't need to apply this for tags, since if they don't exist # they'll always be an empty string. is_null_condition = [["isNull", [name]], "=", 1] if search_filter.value.is_wildcard(): condition = [["match", [name, f"'(?i){value}'"]], search_filter.operator, 1] else: condition = [name, search_filter.operator, value] # We only want to return as a list if we have the check for null # present. Returning as a list causes these conditions to be ORed # together. Otherwise just return the raw condition, so that it can be # used correctly in aggregates. if is_null_condition: return [is_null_condition, condition] else: return condition
def build_incident_attachment(incident): logo_url = absolute_uri( get_asset_url("sentry", "images/sentry-email-avatar.png")) alert_rule = incident.alert_rule incident_trigger = (IncidentTrigger.objects.filter( incident=incident).order_by("-date_modified").first()) if incident_trigger: alert_rule_trigger = incident_trigger.alert_rule_trigger # TODO: If we're relying on this and expecting possible delays between a trigger fired and this function running, # then this could actually be incorrect if they changed the trigger's time window in this time period. Should we store it? start = incident_trigger.date_modified - timedelta( seconds=alert_rule_trigger.alert_rule.snuba_query.time_window) end = incident_trigger.date_modified else: start, end = None, None if incident.status == IncidentStatus.CLOSED.value: status = "Resolved" color = RESOLVED_COLOR elif incident.status == IncidentStatus.WARNING.value: status = "Warning" color = LEVEL_TO_COLOR["warning"] elif incident.status == IncidentStatus.CRITICAL.value: status = "Critical" color = LEVEL_TO_COLOR["fatal"] agg_text = QUERY_AGGREGATION_DISPLAY.get(alert_rule.snuba_query.aggregate, alert_rule.snuba_query.aggregate) agg_value = get_incident_aggregates(incident, start, end, use_alert_aggregate=True)["count"] time_window = alert_rule.snuba_query.time_window / 60 text = "{} {} in the last {} minutes".format(agg_value, agg_text, time_window) if alert_rule.snuba_query.query != "": text = text + "\nFilter: {}".format(alert_rule.snuba_query.query) ts = incident.date_started title = u"{}: {}".format(status, alert_rule.name) return { "fallback": title, "title": title, "title_link": absolute_uri( reverse( "sentry-metric-alert", kwargs={ "organization_slug": incident.organization.slug, "incident_id": incident.identifier, }, )), "text": text, "fields": [], "mrkdwn_in": ["text"], "footer_icon": logo_url, "footer": "Sentry Incident", "ts": to_timestamp(ts), "color": color, "actions": [], }
def _get_ts(ts): if ts is None: return int(time.time()) if isinstance(ts, datetime): return int(to_timestamp(ts)) return int(ts)
def timestamp(d): t = int(to_timestamp(d)) return t - (t % 3600)
def get_tag_value_paginator_for_projects(self, projects, environments, key, start=None, end=None, query=None, order_by="-last_seen"): from sentry.api.paginator import SequencePaginator if not order_by == "-last_seen": raise ValueError("Unsupported order_by: %s" % order_by) snuba_key = snuba.get_snuba_column_name(key) conditions = [] if key in FUZZY_NUMERIC_KEYS: converted_query = int( query) if query is not None and query.isdigit() else None if converted_query is not None: conditions.append([ snuba_key, ">=", converted_query - FUZZY_NUMERIC_DISTANCE ]) conditions.append([ snuba_key, "<=", converted_query + FUZZY_NUMERIC_DISTANCE ]) else: if snuba_key in BLACKLISTED_COLUMNS: snuba_key = "tags[%s]" % (key, ) if query: conditions.append([snuba_key, "LIKE", u"%{}%".format(query)]) else: conditions.append([snuba_key, "!=", ""]) filters = {"project_id": projects} if environments: filters["environment"] = environments results = snuba.query( start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ["count()", "", "times_seen"], ["min", "timestamp", "first_seen"], ["max", "timestamp", "last_seen"], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer="tagstore.get_tag_value_paginator_for_projects", ) tag_values = [ TagValue(key=key, value=six.text_type(value), **fix_tag_value_data(data)) for value, data in six.iteritems(results) ] desc = order_by.startswith("-") score_field = order_by.lstrip("-") return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc, )
def floor_to_10s_epoch(value): seconds = value.second floored_second = 10 * (seconds / 10) value = value.replace(second=floored_second, microsecond=0) return int(to_timestamp(value))
def test_frequency_tables(self): now = datetime.utcnow().replace(tzinfo=pytz.UTC) model = TSDBModel.frequent_projects_by_organization # None of the registered frequency tables actually support # environments, so we have to pretend like one actually does self.db.models_with_environment_support = self.db.models_with_environment_support | set( [model]) rollup = 3600 self.db.record_frequency_multi(((model, { 'organization:1': { "project:1": 1, "project:2": 2, "project:3": 3, }, }), ), now) self.db.record_frequency_multi( ((model, { 'organization:1': { "project:1": 1, "project:2": 1, "project:3": 1, "project:4": 1, }, "organization:2": { "project:5": 1, }, }), ), now - timedelta(hours=1), ) self.db.record_frequency_multi( ((model, { 'organization:1': { "project:2": 1, "project:3": 2, "project:4": 3, }, "organization:2": { "project:5": 0.5, }, }), ), now - timedelta(hours=1), environment_id=1, ) assert self.db.get_most_frequent( model, ('organization:1', 'organization:2'), now, rollup=rollup, ) == { 'organization:1': [ ('project:3', 3.0), ('project:2', 2.0), ('project:1', 1.0), ], 'organization:2': [], } assert self.db.get_most_frequent( model, ('organization:1', 'organization:2'), now - timedelta(hours=1), now, rollup=rollup, environment_id=1, ) == { 'organization:1': [ ('project:4', 3.0), ('project:3', 2.0), ('project:2', 1.0), ], 'organization:2': [ ('project:5', 0.5), ], } assert self.db.get_most_frequent( model, ('organization:1', 'organization:2'), now, limit=1, rollup=rollup, ) == { 'organization:1': [ ('project:3', 3.0), ], 'organization:2': [], } assert self.db.get_most_frequent( model, ('organization:1', 'organization:2'), now - timedelta(hours=1), now, rollup=rollup, ) == { 'organization:1': [ ('project:3', 3.0 + 3.0), ('project:2', 2.0 + 2.0), ('project:4', 4.0), ('project:1', 1.0 + 1.0), ], 'organization:2': [ ('project:5', 1.5), ], } assert self.db.get_most_frequent( model, ('organization:1', 'organization:2'), now - timedelta(hours=1), now, rollup=rollup, environment_id=0, ) == { 'organization:1': [], 'organization:2': [], } timestamp = int(to_timestamp(now) // rollup) * rollup assert self.db.get_most_frequent_series( model, ( 'organization:1', 'organization:2', 'organization:3', ), now - timedelta(hours=1), now, rollup=rollup, ) == { 'organization:1': [ (timestamp - rollup, { 'project:1': 1.0, 'project:2': 2.0, 'project:3': 3.0, 'project:4': 4.0, }), (timestamp, { 'project:1': 1.0, 'project:2': 2.0, 'project:3': 3.0, }), ], 'organization:2': [ (timestamp - rollup, { 'project:5': 1.5, }), (timestamp, {}), ], 'organization:3': [ (timestamp - rollup, {}), (timestamp, {}), ], } assert self.db.get_frequency_series( model, { 'organization:1': ("project:1", "project:2", "project:3", "project:4"), 'organization:2': ("project:5", ), }, now - timedelta(hours=1), now, rollup=rollup, ) == { 'organization:1': [ (timestamp - rollup, { "project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 4.0, }), (timestamp, { "project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 0.0, }), ], 'organization:2': [ (timestamp - rollup, { "project:5": 1.5, }), (timestamp, { "project:5": 0.0, }), ], } assert self.db.get_frequency_series( model, { 'organization:1': ("project:1", "project:2", "project:3", "project:4"), 'organization:2': ("project:5", ), }, now - timedelta(hours=1), now, rollup=rollup, environment_id=1, ) == { 'organization:1': [ (timestamp - rollup, { "project:1": 0.0, "project:2": 1.0, "project:3": 2.0, "project:4": 3.0, }), (timestamp, { "project:1": 0.0, "project:2": 0.0, "project:3": 0.0, "project:4": 0.0, }), ], 'organization:2': [ (timestamp - rollup, { "project:5": 0.5, }), (timestamp, { "project:5": 0.0, }), ], } assert self.db.get_frequency_totals( model, { 'organization:1': ("project:1", "project:2", "project:3", "project:4", "project:5"), 'organization:2': ("project:1", "project:2", "project:3", "project:4", "project:5"), }, now - timedelta(hours=1), now, rollup=rollup, ) == { 'organization:1': { "project:1": 1.0 + 1.0, "project:2": 2.0 + 2.0, "project:3": 3.0 + 3.0, "project:4": 4.0, "project:5": 0.0, }, 'organization:2': { "project:1": 0.0, "project:2": 0.0, "project:3": 0.0, "project:4": 0.0, "project:5": 1.5, }, } self.db.merge_frequencies( model, 'organization:1', ['organization:2'], now, environment_ids=[0, 1], ) assert self.db.get_frequency_totals( model, { 'organization:1': ("project:1", "project:2", "project:3", "project:4", "project:5"), 'organization:2': ("project:1", "project:2", "project:3", "project:4", "project:5"), }, now - timedelta(hours=1), now, rollup=rollup, ) == { 'organization:1': { "project:1": 1.0 + 1.0, "project:2": 2.0 + 2.0, "project:3": 3.0 + 3.0, "project:4": 4.0, "project:5": 1.5, }, 'organization:2': { "project:1": 0.0, "project:2": 0.0, "project:3": 0.0, "project:4": 0.0, "project:5": 0.0, }, } assert self.db.get_frequency_totals( model, { 'organization:1': ("project:1", "project:2", "project:3", "project:4", "project:5"), 'organization:2': ("project:1", "project:2", "project:3", "project:4", "project:5"), }, now - timedelta(hours=1), now, rollup=rollup, environment_id=1, ) == { 'organization:1': { "project:1": 0.0, "project:2": 1.0, "project:3": 2.0, "project:4": 3.0, "project:5": 0.5, }, 'organization:2': { "project:1": 0.0, "project:2": 0.0, "project:3": 0.0, "project:4": 0.0, "project:5": 0.0, }, } self.db.delete_frequencies( [model], ['organization:1', 'organization:2'], now - timedelta(hours=1), now, environment_ids=[0, 1], ) assert self.db.get_most_frequent( model, ('organization:1', 'organization:2'), now, ) == { 'organization:1': [], 'organization:2': [], } assert self.db.get_most_frequent( model, ('organization:1', 'organization:2'), now, environment_id=1, ) == { 'organization:1': [], 'organization:2': [], }
def floor_to_hour_epoch(value): value = value.replace(minute=0, second=0, microsecond=0) return int(to_timestamp(value))
def build_group_attachment(group, event=None, tags=None, identity=None, actions=None, rules=None): # XXX(dcramer): options are limited to 100 choices, even when nested status = group.get_status() members = get_member_assignees(group) teams = get_team_assignees(group) logo_url = absolute_uri( get_asset_url("sentry", "images/sentry-email-avatar.png")) color = (LEVEL_TO_COLOR.get(event.get_tag("level"), "error") if event else LEVEL_TO_COLOR["error"]) text = build_attachment_text(group, event) or "" if actions is None: actions = [] assignee = get_assignee(group) resolve_button = { "name": "resolve_dialog", "value": "resolve_dialog", "type": "button", "text": "Resolve...", } ignore_button = { "name": "status", "value": "ignored", "type": "button", "text": "Ignore" } project = Project.objects.get_from_cache(id=group.project_id) cache_key = "has_releases:2:%s" % (project.id) has_releases = cache.get(cache_key) if has_releases is None: has_releases = ReleaseProject.objects.filter( project_id=project.id).exists() if has_releases: cache.set(cache_key, True, 3600) else: cache.set(cache_key, False, 60) if not has_releases: resolve_button.update({ "name": "status", "text": "Resolve", "value": "resolved" }) if status == GroupStatus.RESOLVED: resolve_button.update({ "name": "status", "text": "Unresolve", "value": "unresolved" }) if status == GroupStatus.IGNORED: ignore_button.update({"text": "Stop Ignoring", "value": "unresolved"}) option_groups = [] if teams: option_groups.append({"text": "Teams", "options": teams}) if members: option_groups.append({"text": "People", "options": members}) payload_actions = [ resolve_button, ignore_button, { "name": "assign", "text": "Select Assignee...", "type": "select", "selected_options": [assignee], "option_groups": option_groups, }, ] fields = [] if tags: event_tags = event.tags if event else group.get_latest_event().tags for key, value in event_tags: std_key = tagstore.get_standardized_key(key) if std_key not in tags: continue labeled_value = tagstore.get_tag_value_label(key, value) fields.append({ "title": std_key.encode("utf-8"), "value": labeled_value.encode("utf-8"), "short": True, }) if actions: action_texts = [ _f for _f in [build_action_text(group, identity, a) for a in actions] if _f ] text += "\n" + "\n".join(action_texts) color = ACTIONED_ISSUE_COLOR payload_actions = [] ts = group.last_seen if event: event_ts = event.datetime ts = max(ts, event_ts) footer = u"{}".format(group.qualified_short_id) if rules: rule_url = build_rule_url(rules[0], group, project) footer += u" via <{}|{}>".format(rule_url, rules[0].label) if len(rules) > 1: footer += u" (+{} other)".format(len(rules) - 1) obj = event if event is not None else group return { "fallback": u"[{}] {}".format(project.slug, obj.title), "title": build_attachment_title(obj), "title_link": group.get_absolute_url(params={"referrer": "slack"}), "text": text, "fields": fields, "mrkdwn_in": ["text"], "callback_id": json.dumps({"issue": group.id}), "footer_icon": logo_url, "footer": footer, "ts": to_timestamp(ts), "color": color, "actions": payload_actions, }
def test_frequency_tables(self): now = datetime.utcnow().replace(tzinfo=pytz.UTC) model = TSDBModel.frequent_projects_by_organization rollup = 3600 self.db.record_frequency_multi(((model, { 'organization:1': { "project:1": 1, "project:2": 2, "project:3": 3, }, }), ), now) self.db.record_frequency_multi( ((model, { 'organization:1': { "project:1": 1, "project:2": 2, "project:3": 3, "project:4": 4, }, "organization:2": { "project:5": 1.5, }, }), ), now - timedelta(hours=1), ) assert self.db.get_most_frequent( model, ('organization:1', 'organization:2'), now, rollup=rollup, ) == { 'organization:1': [ ('project:3', 3.0), ('project:2', 2.0), ('project:1', 1.0), ], 'organization:2': [], } assert self.db.get_most_frequent( model, ('organization:1', 'organization:2'), now, limit=1, rollup=rollup, ) == { 'organization:1': [ ('project:3', 3.0), ], 'organization:2': [], } assert self.db.get_most_frequent( model, ('organization:1', 'organization:2'), now - timedelta(hours=1), now, rollup=rollup, ) == { 'organization:1': [ ('project:3', 3.0 + 3.0), ('project:2', 2.0 + 2.0), ('project:4', 4.0), ('project:1', 1.0 + 1.0), ], 'organization:2': [ ('project:5', 1.5), ], } timestamp = int(to_timestamp(now) // rollup) * rollup assert self.db.get_most_frequent_series( model, ( 'organization:1', 'organization:2', 'organization:3', ), now - timedelta(hours=1), now, rollup=rollup, ) == { 'organization:1': [ (timestamp - rollup, { 'project:1': 1.0, 'project:2': 2.0, 'project:3': 3.0, 'project:4': 4.0, }), (timestamp, { 'project:1': 1.0, 'project:2': 2.0, 'project:3': 3.0, }), ], 'organization:2': [ (timestamp - rollup, { 'project:5': 1.5, }), (timestamp, {}), ], 'organization:3': [ (timestamp - rollup, {}), (timestamp, {}), ], } assert self.db.get_frequency_series( model, { 'organization:1': ("project:1", "project:2", "project:3", "project:4"), 'organization:2': ("project:5", ), }, now - timedelta(hours=1), now, rollup=rollup, ) == { 'organization:1': [ (timestamp - rollup, { "project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 4.0, }), (timestamp, { "project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 0.0, }), ], 'organization:2': [ (timestamp - rollup, { "project:5": 1.5, }), (timestamp, { "project:5": 0.0, }), ], } assert self.db.get_frequency_totals( model, { 'organization:1': ("project:1", "project:2", "project:3", "project:4", "project:5"), 'organization:2': ("project:1", ), }, now - timedelta(hours=1), now, rollup=rollup, ) == { 'organization:1': { "project:1": 1.0 + 1.0, "project:2": 2.0 + 2.0, "project:3": 3.0 + 3.0, "project:4": 4.0, "project:5": 0.0, }, 'organization:2': { "project:1": 0.0, }, }
def get_snuba_translators(filter_keys, is_grouprelease=False): """ Some models are stored differently in snuba, eg. as the environment name instead of the the environment ID. Here we create and return forward() and reverse() translation functions that perform all the required changes. forward() is designed to work on the filter_keys and so should be called with a map of {column: [key1, key2], ...} and should return an updated map with the filter keys replaced with the ones that Snuba expects. reverse() is designed to work on result rows, so should be called with a row in the form {column: value, ...} and will return a translated result row. Because translation can potentially rely on combinations of different parts of the result row, I decided to implement them as composable functions over the row to be translated. This should make it simpler to add any other needed translations as long as you can express them as forward(filters) and reverse(row) functions. """ # Helper lambdas to compose translator functions identity = (lambda x: x) compose = (lambda f, g: lambda x: f(g(x))) replace = (lambda d, key, val: d.update({key: val}) or d) forward = identity reverse = identity map_columns = { 'environment': (Environment, 'name', lambda name: None if name == '' else name), 'tags[sentry:release]': (Release, 'version', identity), } for col, (model, field, fmt) in six.iteritems(map_columns): fwd, rev = None, None ids = filter_keys.get(col) if not ids: continue if is_grouprelease and col == "tags[sentry:release]": # GroupRelease -> Release translation is a special case because the # translation relies on both the Group and Release value in the result row. # # We create a map of {grouprelease_id: (group_id, version), ...} and the corresponding # reverse map of {(group_id, version): grouprelease_id, ...} # NB this does depend on `issue` being defined in the query result, and the correct # set of issues being resolved, which is outside the control of this function. gr_map = GroupRelease.objects.filter(id__in=ids).values_list( "id", "group_id", "release_id" ) ver = dict(Release.objects.filter(id__in=[x[2] for x in gr_map]).values_list( "id", "version" )) fwd_map = {gr: (group, ver[release]) for (gr, group, release) in gr_map} rev_map = dict(reversed(t) for t in six.iteritems(fwd_map)) fwd = ( lambda col, trans: lambda filters: replace( filters, col, [trans[k][1] for k in filters[col]] ) )(col, fwd_map) rev = ( lambda col, trans: lambda row: replace( # The translate map may not have every combination of issue/release # returned by the query. row, col, trans.get((row["issue"], row[col])) ) )(col, rev_map) else: fwd_map = { k: fmt(v) for k, v in model.objects.filter(id__in=ids).values_list("id", field) } rev_map = dict(reversed(t) for t in six.iteritems(fwd_map)) fwd = ( lambda col, trans: lambda filters: replace( filters, col, [trans[k] for k in filters[col] if k] ) )(col, fwd_map) rev = ( lambda col, trans: lambda row: replace( row, col, trans[row[col]]) if col in row else row )(col, rev_map) if fwd: forward = compose(forward, fwd) if rev: reverse = compose(reverse, rev) # Extra reverse translator for time column. reverse = compose( reverse, lambda row: replace(row, "time", int(to_timestamp(parse_datetime(row["time"])))) if "time" in row else row, ) return (forward, reverse)
def normalize_to_rollup(self, timestamp, seconds): """ Given a ``timestamp`` (datetime object) normalize to an epoch rollup. """ epoch = int(to_timestamp(timestamp)) return int(epoch / seconds)
def digest(request): seed = request.GET.get('seed', str(time.time())) logger.debug('Using random seed value: %s') random = Random(seed) now = datetime.utcnow().replace(tzinfo=pytz.utc) # TODO: Refactor all of these into something more manageable. org = Organization( id=1, slug='example', name='Example Organization', ) team = Team( id=1, slug='example', name='Example Team', organization=org, ) project = Project( id=1, slug='example', name='Example Project', team=team, organization=org, ) rules = { i: Rule( id=i, project=project, label="Rule #%s" % (i, ), ) for i in xrange(1, random.randint(2, 4)) } state = { 'project': project, 'groups': {}, 'rules': rules, 'event_counts': {}, 'user_counts': {}, } records = [] group_sequence = itertools.count(1) event_sequence = itertools.count(1) for i in xrange(random.randint(1, 30)): group_id = next(group_sequence) culprit = '{module} in {function}'.format( module='.'.join(''.join( random.sample( WORDS, random.randint(1, int(random.paretovariate(2.2))))) for word in xrange(1, 4)), function=random.choice(WORDS)) group = state['groups'][group_id] = Group( id=group_id, project=project, message=words(int(random.weibullvariate(8, 4)), common=False), culprit=culprit, level=random.choice(LOG_LEVELS.keys()), ) offset = timedelta(seconds=0) for i in xrange(random.randint(1, 10)): offset += timedelta(seconds=random.random() * 120) event = Event( id=next(event_sequence), event_id=uuid.uuid4().hex, project=project, group=group, message=group.message, data=load_data('python'), datetime=now - offset, ) records.append( Record( event.event_id, Notification( event, random.sample(state['rules'], random.randint(1, len(state['rules']))), ), to_timestamp(event.datetime), )) state['event_counts'][group_id] = random.randint(10, 1e4) state['user_counts'][group_id] = random.randint(10, 1e4) digest = build_digest(project, records, state) start, end, counts = get_digest_metadata(digest) return MailPreview( html_template='sentry/emails/digests/body.html', text_template='sentry/emails/digests/body.txt', context={ 'project': project, 'counts': counts, 'digest': digest, 'start': start, 'end': end, }, ).render()
def get_data_for_date(date): dt = datetime(date.year, date.month, date.day, tzinfo=pytz.utc) ts = to_timestamp(dt) value = series_value_map.get(ts, None) return (dt, {"value": value, "color": value_color_map[value]})
def test_build_group_attachment(self): self.user = self.create_user("*****@*****.**") self.org = self.create_organization(name="Rowdy Tiger", owner=None) self.team = self.create_team(organization=self.org, name="Mariachi Band") self.project = self.create_project( organization=self.org, teams=[self.team], name="Bengal-Elephant-Giraffe-Tree-House") self.create_member(user=self.user, organization=self.org, role="owner", teams=[self.team]) group = self.create_group(project=self.project) ts = group.last_seen assert build_group_attachment(group) == { "color": "#E03E2F", "text": "", "actions": [ { "name": "status", "text": "Resolve", "type": "button", "value": "resolved" }, { "text": "Ignore", "type": "button", "name": "status", "value": "ignored" }, { "option_groups": [ { "text": "Teams", "options": [{ "text": "#mariachi-band", "value": "team:" + str(self.team.id), }], }, { "text": "People", "options": [{ "text": "*****@*****.**", "value": "user:"******"text": "Select Assignee...", "selected_options": [None], "type": "select", "name": "assign", }, ], "mrkdwn_in": ["text"], "title": group.title, "fields": [], "footer": "BENGAL-ELEPHANT-GIRAFFE-TREE-HOUSE-1", "ts": to_timestamp(ts), "title_link": "http://testserver/organizations/rowdy-tiger/issues/" + str(group.id) + "/?referrer=slack", "callback_id": '{"issue":' + str(group.id) + "}", "fallback": f"[{self.project.slug}] {group.title}", "footer_icon": "http://testserver/_static/{version}/sentry/images/sentry-email-avatar.png", } event = self.store_event(data={}, project_id=self.project.id) ts = event.datetime assert build_group_attachment(group, event) == { "color": "#E03E2F", "text": "", "actions": [ { "name": "status", "text": "Resolve", "type": "button", "value": "resolved" }, { "text": "Ignore", "type": "button", "name": "status", "value": "ignored" }, { "option_groups": [ { "text": "Teams", "options": [{ "text": "#mariachi-band", "value": "team:" + str(self.team.id), }], }, { "text": "People", "options": [{ "text": "*****@*****.**", "value": "user:"******"text": "Select Assignee...", "selected_options": [None], "type": "select", "name": "assign", }, ], "mrkdwn_in": ["text"], "title": event.title, "fields": [], "footer": "BENGAL-ELEPHANT-GIRAFFE-TREE-HOUSE-1", "ts": to_timestamp(ts), "title_link": "http://testserver/organizations/rowdy-tiger/issues/" + str(group.id) + "/?referrer=slack", "callback_id": '{"issue":' + str(group.id) + "}", "fallback": f"[{self.project.slug}] {event.title}", "footer_icon": "http://testserver/_static/{version}/sentry/images/sentry-email-avatar.png", } assert build_group_attachment(group, event, link_to_event=True) == { "color": "#E03E2F", "text": "", "actions": [ { "name": "status", "text": "Resolve", "type": "button", "value": "resolved" }, { "text": "Ignore", "type": "button", "name": "status", "value": "ignored" }, { "option_groups": [ { "text": "Teams", "options": [{ "text": "#mariachi-band", "value": "team:" + str(self.team.id), }], }, { "text": "People", "options": [{ "text": "*****@*****.**", "value": "user:"******"text": "Select Assignee...", "selected_options": [None], "type": "select", "name": "assign", }, ], "mrkdwn_in": ["text"], "title": event.title, "fields": [], "footer": "BENGAL-ELEPHANT-GIRAFFE-TREE-HOUSE-1", "ts": to_timestamp(ts), "title_link": f"http://testserver/organizations/rowdy-tiger/issues/{group.id}/events/{event.event_id}/" + "?referrer=slack", "callback_id": '{"issue":' + str(group.id) + "}", "fallback": f"[{self.project.slug}] {event.title}", "footer_icon": "http://testserver/_static/{version}/sentry/images/sentry-email-avatar.png", }
def build_attachment(group, event=None, tags=None, identity=None, actions=None, rules=None): # XXX(dcramer): options are limited to 100 choices, even when nested status = group.get_status() members = get_member_assignees(group) teams = get_team_assignees(group) logo_url = absolute_uri( get_asset_url('sentry', 'images/sentry-email-avatar.png')) color = NEW_ISSUE_COLOR text = build_attachment_text(group, event) or '' if actions is None: actions = [] assignee = get_assignee(group) resolve_button = { 'name': 'resolve_dialog', 'value': 'resolve_dialog', 'type': 'button', 'text': 'Resolve...', } ignore_button = { 'name': 'status', 'value': 'ignored', 'type': 'button', 'text': 'Ignore', } if status == GroupStatus.RESOLVED: resolve_button.update({ 'name': 'status', 'text': 'Unresolve', 'value': 'unresolved', }) if status == GroupStatus.IGNORED: ignore_button.update({ 'text': 'Stop Ignoring', 'value': 'unresolved', }) option_groups = [] if teams: option_groups.append({ 'text': 'Teams', 'options': teams, }) if members: option_groups.append({ 'text': 'People', 'options': members, }) payload_actions = [ resolve_button, ignore_button, { 'name': 'assign', 'text': 'Select Assignee...', 'type': 'select', 'selected_options': [assignee], 'option_groups': option_groups, }, ] # TODO(epurkhiser): Remove when teams are no longer early adopter if not features.has('organizations:new-teams', group.organization): payload_actions[2]['options'] = members del payload_actions[2]['option_groups'] fields = [] if tags: event_tags = event.tags if event else group.get_latest_event().tags for tag_key, tag_value in event_tags: if tag_key in tags: fields.append({ 'title': tag_key.encode('utf-8'), 'value': tag_value.encode('utf-8'), 'short': True, }) if actions: action_texts = filter( None, [build_action_text(group, identity, a) for a in actions]) text += '\n' + '\n'.join(action_texts) color = ACTIONED_ISSUE_COLOR payload_actions = [] ts = group.last_seen if event: event_ts = event.datetime ts = max(ts, event_ts) footer = u'{}'.format(group.qualified_short_id) if rules: footer += u' via {}'.format(rules[0].label) if len(rules) > 1: footer += u' (+{} other)'.format(len(rules) - 1) return { 'fallback': u'[{}] {}'.format(group.project.slug, group.title), 'title': build_attachment_title(group, event), 'title_link': add_notification_referrer_param(group.get_absolute_url(), 'slack'), 'text': text, 'fields': fields, 'mrkdwn_in': ['text'], 'callback_id': json.dumps({'issue': group.id}), 'footer_icon': logo_url, 'footer': footer, 'ts': to_timestamp(ts), 'color': color, 'actions': payload_actions, }
def test_frequency_tables(self): now = datetime.utcnow().replace(tzinfo=pytz.UTC) model = TSDBModel.frequent_issues_by_project # None of the registered frequency tables actually support # environments, so we have to pretend like one actually does self.db.models_with_environment_support = self.db.models_with_environment_support | { model } rollup = 3600 self.db.record_frequency_multi(((model, { "organization:1": { "project:1": 1, "project:2": 2, "project:3": 3 } }), ), now) self.db.record_frequency_multi( (( model, { "organization:1": { "project:1": 1, "project:2": 1, "project:3": 1, "project:4": 1, }, "organization:2": { "project:5": 1 }, }, ), ), now - timedelta(hours=1), ) self.db.record_frequency_multi( (( model, { "organization:1": { "project:2": 1, "project:3": 2, "project:4": 3 }, "organization:2": { "project:5": 0.5 }, }, ), ), now - timedelta(hours=1), environment_id=1, ) assert self.db.get_most_frequent(model, ("organization:1", "organization:2"), now, rollup=rollup) == { "organization:1": [("project:3", 3.0), ("project:2", 2.0), ("project:1", 1.0)], "organization:2": [], } assert self.db.get_most_frequent( model, ("organization:1", "organization:2"), now - timedelta(hours=1), now, rollup=rollup, environment_id=1, ) == { "organization:1": [("project:4", 3.0), ("project:3", 2.0), ("project:2", 1.0)], "organization:2": [("project:5", 0.5)], } assert self.db.get_most_frequent(model, ("organization:1", "organization:2"), now, limit=1, rollup=rollup) == { "organization:1": [("project:3", 3.0)], "organization:2": [] } assert self.db.get_most_frequent( model, ("organization:1", "organization:2"), now - timedelta(hours=1), now, rollup=rollup, ) == { "organization:1": [ ("project:3", 3.0 + 3.0), ("project:2", 2.0 + 2.0), ("project:4", 4.0), ("project:1", 1.0 + 1.0), ], "organization:2": [("project:5", 1.5)], } assert (self.db.get_most_frequent( model, ("organization:1", "organization:2"), now - timedelta(hours=1), now, rollup=rollup, environment_id=0, ) == { "organization:1": [], "organization:2": [] }) timestamp = int(to_timestamp(now) // rollup) * rollup assert self.db.get_most_frequent_series( model, ("organization:1", "organization:2", "organization:3"), now - timedelta(hours=1), now, rollup=rollup, ) == { "organization:1": [ ( timestamp - rollup, { "project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 4.0 }, ), (timestamp, { "project:1": 1.0, "project:2": 2.0, "project:3": 3.0 }), ], "organization:2": [(timestamp - rollup, { "project:5": 1.5 }), (timestamp, {})], "organization:3": [(timestamp - rollup, {}), (timestamp, {})], } assert self.db.get_frequency_series( model, { "organization:1": ("project:1", "project:2", "project:3", "project:4"), "organization:2": ("project:5", ), }, now - timedelta(hours=1), now, rollup=rollup, ) == { "organization:1": [ ( timestamp - rollup, { "project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 4.0 }, ), ( timestamp, { "project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 0.0 }, ), ], "organization:2": [ (timestamp - rollup, { "project:5": 1.5 }), (timestamp, { "project:5": 0.0 }), ], } assert self.db.get_frequency_series( model, { "organization:1": ("project:1", "project:2", "project:3", "project:4"), "organization:2": ("project:5", ), }, now - timedelta(hours=1), now, rollup=rollup, environment_id=1, ) == { "organization:1": [ ( timestamp - rollup, { "project:1": 0.0, "project:2": 1.0, "project:3": 2.0, "project:4": 3.0 }, ), ( timestamp, { "project:1": 0.0, "project:2": 0.0, "project:3": 0.0, "project:4": 0.0 }, ), ], "organization:2": [ (timestamp - rollup, { "project:5": 0.5 }), (timestamp, { "project:5": 0.0 }), ], } assert self.db.get_frequency_totals( model, { "organization:1": ("project:1", "project:2", "project:3", "project:4", "project:5"), "organization:2": ("project:1", "project:2", "project:3", "project:4", "project:5"), }, now - timedelta(hours=1), now, rollup=rollup, ) == { "organization:1": { "project:1": 1.0 + 1.0, "project:2": 2.0 + 2.0, "project:3": 3.0 + 3.0, "project:4": 4.0, "project:5": 0.0, }, "organization:2": { "project:1": 0.0, "project:2": 0.0, "project:3": 0.0, "project:4": 0.0, "project:5": 1.5, }, } self.db.merge_frequencies(model, "organization:1", ["organization:2"], now, environment_ids=[0, 1]) assert self.db.get_frequency_totals( model, { "organization:1": ("project:1", "project:2", "project:3", "project:4", "project:5"), "organization:2": ("project:1", "project:2", "project:3", "project:4", "project:5"), }, now - timedelta(hours=1), now, rollup=rollup, ) == { "organization:1": { "project:1": 1.0 + 1.0, "project:2": 2.0 + 2.0, "project:3": 3.0 + 3.0, "project:4": 4.0, "project:5": 1.5, }, "organization:2": { "project:1": 0.0, "project:2": 0.0, "project:3": 0.0, "project:4": 0.0, "project:5": 0.0, }, } assert self.db.get_frequency_totals( model, { "organization:1": ("project:1", "project:2", "project:3", "project:4", "project:5"), "organization:2": ("project:1", "project:2", "project:3", "project:4", "project:5"), }, now - timedelta(hours=1), now, rollup=rollup, environment_id=1, ) == { "organization:1": { "project:1": 0.0, "project:2": 1.0, "project:3": 2.0, "project:4": 3.0, "project:5": 0.5, }, "organization:2": { "project:1": 0.0, "project:2": 0.0, "project:3": 0.0, "project:4": 0.0, "project:5": 0.0, }, } self.db.delete_frequencies( [model], ["organization:1", "organization:2"], now - timedelta(hours=1), now, environment_ids=[0, 1], ) assert self.db.get_most_frequent(model, ("organization:1", "organization:2"), now) == { "organization:1": [], "organization:2": [], } assert self.db.get_most_frequent(model, ("organization:1", "organization:2"), now, environment_id=1) == { "organization:1": [], "organization:2": [] }
def build_incident_attachment(incident): logo_url = absolute_uri( get_asset_url("sentry", "images/sentry-email-avatar.png")) aggregates = get_incident_aggregates(incident) if incident.status == IncidentStatus.CLOSED.value: status = "Resolved" color = RESOLVED_COLOR else: status = "Fired" color = LEVEL_TO_COLOR["error"] fields = [ { "title": "Status", "value": status, "short": True }, { "title": "Events", "value": aggregates["count"], "short": True }, { "title": "Users", "value": aggregates["unique_users"], "short": True }, ] ts = incident.date_started title = u"INCIDENT: {} (#{})".format(incident.title, incident.identifier) return { "fallback": title, "title": title, "title_link": absolute_uri( reverse( "sentry-incident", kwargs={ "organization_slug": incident.organization.slug, "incident_id": incident.identifier, }, )), "text": " ", "fields": fields, "mrkdwn_in": ["text"], "footer_icon": logo_url, "footer": "Sentry Incident", "ts": to_timestamp(ts), "color": color, "actions": [], }
def report(request): from sentry.tasks import reports random = get_random(request) duration = 60 * 60 * 24 * 7 timestamp = to_timestamp( reports.floor_to_utc_day( to_datetime( random.randint( to_timestamp( datetime(2015, 6, 1, 0, 0, 0, tzinfo=timezone.utc)), to_timestamp( datetime(2016, 7, 1, 0, 0, 0, tzinfo=timezone.utc)), )))) start, stop = interval = reports._to_interval(timestamp, duration) organization = Organization(id=1, slug="example", name="Example") projects = [] for i in xrange(0, random.randint(1, 8)): name = " ".join(random.sample(loremipsum.words, random.randint(1, 4))) projects.append( Project( id=i, organization=organization, slug=slugify(name), name=name, date_added=start - timedelta(days=random.randint(0, 120)), )) def make_release_generator(): id_sequence = itertools.count(1) while True: dt = to_datetime( random.randint(timestamp - (30 * 24 * 60 * 60), timestamp)) p = random.choice(projects) yield Release( id=next(id_sequence), project=p, organization_id=p.organization_id, version="".join( [random.choice("0123456789abcdef") for _ in range(40)]), date_added=dt, ) def build_issue_summaries(): summaries = [] for i in range(3): summaries.append( int(random.weibullvariate(10, 1) * random.paretovariate(0.5))) return summaries def build_usage_summary(): return ( int(random.weibullvariate(3, 1) * random.paretovariate(0.2)), int(random.weibullvariate(5, 1) * random.paretovariate(0.2)), ) def build_calendar_data(project): start, stop = reports.get_calendar_query_range(interval, 3) rollup = 60 * 60 * 24 series = [] weekend = frozenset((5, 6)) value = int(random.weibullvariate(5000, 3)) for timestamp in tsdb.get_optimal_rollup_series(start, stop, rollup)[1]: damping = random.uniform( 0.2, 0.6) if to_datetime(timestamp).weekday in weekend else 1 jitter = random.paretovariate(1.2) series.append((timestamp, int(value * damping * jitter))) value = value * random.uniform(0.25, 2) return reports.clean_calendar_data(project, series, start, stop, rollup, stop) def build_report(project): daily_maximum = random.randint(1000, 10000) rollup = 60 * 60 * 24 series = [( timestamp + (i * rollup), (random.randint(0, daily_maximum), random.randint(0, daily_maximum)), ) for i in xrange(0, 7)] aggregates = [ random.randint(0, daily_maximum * 7) if random.random() < 0.9 else None for _ in xrange(0, 4) ] return reports.Report( series, aggregates, build_issue_summaries(), build_usage_summary(), build_calendar_data(project), ) if random.random() < 0.85: personal = { "resolved": random.randint(0, 100), "users": int(random.paretovariate(0.2)) } else: personal = {"resolved": 0, "users": 0} return MailPreview( html_template="sentry/emails/reports/body.html", text_template="sentry/emails/reports/body.txt", context={ "duration": reports.durations[duration], "interval": { "start": reports.date_format(start), "stop": reports.date_format(stop) }, "report": reports.to_context( organization, interval, {project: build_report(project) for project in projects}), "organization": organization, "personal": personal, "user": request.user, }, ).render(request)
def digest(request): random = get_random(request) # TODO: Refactor all of these into something more manageable. org = Organization(id=1, slug="example", name="Example Organization") project = Project(id=1, slug="example", name="Example Project", organization=org) rules = { i: Rule(id=i, project=project, label="Rule #%s" % (i, )) for i in range(1, random.randint(2, 4)) } state = { "project": project, "groups": {}, "rules": rules, "event_counts": {}, "user_counts": {}, } records = [] group_generator = make_group_generator(random, project) for i in range(random.randint(1, 30)): group = next(group_generator) state["groups"][group.id] = group offset = timedelta(seconds=0) for i in range(random.randint(1, 10)): offset += timedelta(seconds=random.random() * 120) data = dict(load_data("python")) data["message"] = group.message data.pop("logentry", None) event_manager = EventManager(data) event_manager.normalize() data = event_manager.get_data() timestamp = to_datetime( random.randint(to_timestamp(group.first_seen), to_timestamp(group.last_seen))) event = SnubaEvent({ "event_id": uuid.uuid4().hex, "project_id": project.id, "group_id": group.id, "message": group.message, "data": data.data, "timestamp": timestamp.strftime("%Y-%m-%dT%H:%M:%S"), }) event.group = group records.append( Record( event.event_id, Notification( event, random.sample(state["rules"], random.randint(1, len(state["rules"])))), to_timestamp(event.datetime), )) state["event_counts"][group.id] = random.randint(10, 1e4) state["user_counts"][group.id] = random.randint(10, 1e4) digest = build_digest(project, records, state) start, end, counts = get_digest_metadata(digest) context = { "project": project, "counts": counts, "digest": digest, "start": start, "end": end, "referrer": "digest_email", } add_unsubscribe_link(context) return MailPreview( html_template="sentry/emails/digests/body.html", text_template="sentry/emails/digests/body.txt", context=context, ).render(request)
def digest(request): random = get_random(request) # TODO: Refactor all of these into something more manageable. org = Organization( id=1, slug='example', name='Example Organization', ) team = Team( id=1, slug='example', name='Example Team', organization=org, ) project = Project( id=1, slug='example', name='Example Project', team=team, organization=org, ) rules = {i: Rule( id=i, project=project, label="Rule #%s" % (i,), ) for i in range(1, random.randint(2, 4))} state = { 'project': project, 'groups': {}, 'rules': rules, 'event_counts': {}, 'user_counts': {}, } records = [] event_sequence = itertools.count(1) group_generator = make_group_generator(random, project) for i in range(random.randint(1, 30)): group = next(group_generator) state['groups'][group.id] = group offset = timedelta(seconds=0) for i in range(random.randint(1, 10)): offset += timedelta(seconds=random.random() * 120) event = Event( id=next(event_sequence), event_id=uuid.uuid4().hex, project=project, group=group, message=group.message, data=load_data('python'), datetime=to_datetime( random.randint( to_timestamp(group.first_seen), to_timestamp(group.last_seen), ), ) ) records.append( Record( event.event_id, Notification( event, random.sample(state['rules'], random.randint(1, len(state['rules']))), ), to_timestamp(event.datetime), ) ) state['event_counts'][group.id] = random.randint(10, 1e4) state['user_counts'][group.id] = random.randint(10, 1e4) digest = build_digest(project, records, state) start, end, counts = get_digest_metadata(digest) context = { 'project': project, 'counts': counts, 'digest': digest, 'start': start, 'end': end, } add_unsubscribe_link(context) return MailPreview( html_template='sentry/emails/digests/body.html', text_template='sentry/emails/digests/body.txt', context=context, ).render(request)
def convert_search_filter_to_snuba_query(search_filter): name = search_filter.key.name value = search_filter.value.value if name in no_conversion: return elif name == "id" and search_filter.value.is_wildcard(): raise InvalidSearchQuery( "Wildcard conditions are not permitted on `id` field.") elif name == "environment": # conditions added to env_conditions are OR'd env_conditions = [] values = set(value if isinstance(value, (list, tuple)) else [value]) # the "no environment" environment is null in snuba if "" in values: values.remove("") operator = "IS NULL" if search_filter.operator == "=" else "IS NOT NULL" env_conditions.append(["environment", operator, None]) if len(values) == 1: operator = "=" if search_filter.operator == "=" else "!=" env_conditions.append(["environment", operator, values.pop()]) elif values: operator = "IN" if search_filter.operator == "=" else "NOT IN" env_conditions.append(["environment", operator, values]) return env_conditions elif name == "message": if search_filter.value.is_wildcard(): # XXX: We don't want the '^$' values at the beginning and end of # the regex since we want to find the pattern anywhere in the # message. Strip off here value = search_filter.value.value[1:-1] return [["match", ["message", u"'(?i){}'".format(value)]], search_filter.operator, 1] else: # https://clickhouse.yandex/docs/en/query_language/functions/string_search_functions/#position-haystack-needle # positionCaseInsensitive returns 0 if not found and an index of 1 or more if found # so we should flip the operator here operator = "=" if search_filter.operator == "!=" else "!=" # make message search case insensitive return [[ "positionCaseInsensitive", ["message", u"'{}'".format(value)] ], operator, 0] elif (name.startswith("stack.") or name.startswith("error.")) and search_filter.value.is_wildcard(): # Escape and convert meta characters for LIKE expressions. raw_value = search_filter.value.raw_value like_value = raw_value.replace("%", "\\%").replace("_", "\\_").replace("*", "%") operator = "LIKE" if search_filter.operator == "=" else "NOT LIKE" return [name, operator, like_value] elif name == "transaction.status": internal_value = SPAN_STATUS_NAME_TO_CODE.get( search_filter.value.raw_value) if internal_value is None: raise InvalidSearchQuery( u"Invalid value for transaction.status condition. Accepted values are {}" .format(", ".join(SPAN_STATUS_NAME_TO_CODE.keys()))) return [name, search_filter.operator, internal_value] else: value = (int(to_timestamp(value)) * 1000 if isinstance(value, datetime) and name != "timestamp" else value) # Tags are never null, but promoted tags are columns and so can be null. # To handle both cases, use `ifNull` to convert to an empty string and # compare so we need to check for empty values. if search_filter.key.is_tag: name = ["ifNull", [name, "''"]] # Handle checks for existence if search_filter.operator in ( "=", "!=") and search_filter.value.value == "": if search_filter.key.is_tag: return [name, search_filter.operator, value] else: # If not a tag, we can just check that the column is null. return [["isNull", [name]], search_filter.operator, 1] is_null_condition = None if search_filter.operator == "!=" and not search_filter.key.is_tag: # Handle null columns on inequality comparisons. Any comparison # between a value and a null will result to null, so we need to # explicitly check for whether the condition is null, and OR it # together with the inequality check. # We don't need to apply this for tags, since if they don't exist # they'll always be an empty string. is_null_condition = [["isNull", [name]], "=", 1] if search_filter.value.is_wildcard(): condition = [["match", [name, u"'(?i){}'".format(value)]], search_filter.operator, 1] else: condition = [name, search_filter.operator, value] # We only want to return as a list if we have the check for null # present. Returning as a list causes these conditions to be ORed # together. Otherwise just return the raw condition, so that it can be # used correctly in aggregates. if is_null_condition: return [is_null_condition, condition] else: return condition
def alert(request): platform = request.GET.get('platform', 'python') org = Organization( id=1, slug='example', name='Example', ) team = Team( id=1, slug='example', name='Example', organization=org, ) project = Project( id=1, slug='example', name='Example', team=team, organization=org, ) random = get_random(request) group = next( make_group_generator(random, project), ) event = Event( id=1, event_id='44f1419e73884cd2b45c79918f4b6dc4', project=project, group=group, message=group.message, data=load_data(platform), datetime=to_datetime( random.randint( to_timestamp(group.first_seen), to_timestamp(group.last_seen), ), ), ) rule = Rule(label="An example rule") interface_list = [] for interface in six.itervalues(event.interfaces): body = interface.to_email_html(event) if not body: continue interface_list.append((interface.get_title(), mark_safe(body))) return MailPreview( html_template='sentry/emails/error.html', text_template='sentry/emails/error.txt', context={ 'rule': rule, 'group': group, 'event': event, 'link': 'http://example.com/link', 'interfaces': interface_list, 'tags': event.get_tags(), 'project_label': project.name, 'tags': [ ('logger', 'javascript'), ('environment', 'prod'), ('level', 'error'), ('device', 'Other') ] }, ).render(request)
def query(start, end, groupby, conditions=None, filter_keys=None, aggregations=None, rollup=None, arrayjoin=None, limit=None, orderby=None, having=None, referrer=None): """ Sends a query to snuba. `conditions`: A list of (column, operator, literal) conditions to be passed to the query. Conditions that we know will not have to be translated should be passed this way (eg tag[foo] = bar). `filter_keys`: A dictionary of {col: [key, ...]} that will be converted into "col IN (key, ...)" conditions. These are used to restrict the query to known sets of project/issue/environment/release etc. Appropriate translations (eg. from environment model ID to environment name) are performed on the query, and the inverse translation performed on the result. The project_id(s) to restrict the query to will also be automatically inferred from these keys. `aggregations` a list of (aggregation_function, column, alias) tuples to be passed to the query. """ groupby = groupby or [] conditions = conditions or [] having = having or [] aggregations = aggregations or [['count()', '', 'aggregate']] filter_keys = filter_keys or {} # Forward and reverse translation maps from model ids to snuba keys, per column with timer('get_snuba_map'): snuba_map = { col: get_snuba_map(col, keys) for col, keys in six.iteritems(filter_keys) } snuba_map = { k: v for k, v in six.iteritems(snuba_map) if k is not None and v is not None } rev_snuba_map = { col: dict(reversed(i) for i in keys.items()) for col, keys in six.iteritems(snuba_map) } for col, keys in six.iteritems(filter_keys): keys = [k for k in keys if k is not None] if col in snuba_map: keys = [snuba_map[col][k] for k in keys if k in snuba_map[col]] if keys: if len(keys) == 1 and keys[0] is None: conditions.append((col, 'IS NULL', None)) else: conditions.append((col, 'IN', keys)) if 'project_id' in filter_keys: # If we are given a set of project ids, use those directly. project_ids = filter_keys['project_id'] elif filter_keys: # Otherwise infer the project_ids from any related models with timer('get_related_project_ids'): ids = [ get_related_project_ids(k, filter_keys[k]) for k in filter_keys ] project_ids = list(set.union(*map(set, ids))) else: project_ids = [] if not project_ids: raise SnubaError( "No project_id filter, or none could be inferred from other filters." ) # If the grouping, aggregation, or any of the conditions reference `issue` # we need to fetch the issue definitions (issue -> fingerprint hashes) aggregate_cols = [a[1] for a in aggregations] condition_cols = [c[0] for c in flat_conditions(conditions)] all_cols = groupby + aggregate_cols + condition_cols get_issues = 'issue' in all_cols with timer('get_project_issues'): issues = get_project_issues( project_ids, filter_keys.get('issue')) if get_issues else None request = { k: v for k, v in six.iteritems({ 'from_date': start.isoformat(), 'to_date': end.isoformat(), 'conditions': conditions, 'having': having, 'groupby': groupby, 'project': project_ids, 'aggregations': aggregations, 'granularity': rollup, 'issues': issues, 'arrayjoin': arrayjoin, 'limit': limit, 'orderby': orderby, }) if v is not None } headers = {} if referrer: headers['referer'] = referrer try: with timer('snuba_query'): response = _snuba_pool.urlopen('POST', '/query', body=json.dumps(request), headers=headers) except urllib3.exceptions.HTTPError as err: raise SnubaError(err) try: body = json.loads(response.data) except ValueError: raise SnubaError("Could not decode JSON response: {}".format( response.data)) if response.status != 200: if body.get('error'): raise SnubaError(body['error']) else: raise SnubaError('HTTP {}'.format(response.status)) # Validate and scrub response, and translate snuba keys back to IDs aggregate_cols = [a[2] for a in aggregations] expected_cols = set(groupby + aggregate_cols) got_cols = set(c['name'] for c in body['meta']) assert expected_cols == got_cols with timer('process_result'): for d in body['data']: if 'time' in d: d['time'] = int(to_timestamp(parse_datetime(d['time']))) for col in rev_snuba_map: if col in d: d[col] = rev_snuba_map[col][d[col]] return nest_groups(body['data'], groupby, aggregate_cols)
def test_get_group_creation_attributes(self): now = datetime(2017, 5, 3, 6, 6, 6, tzinfo=pytz.utc) events = [ Event( platform='javascript', message='Hello from JavaScript', datetime=now, data={ 'type': 'default', 'metadata': {}, 'tags': [ ['level', 'info'], ['logger', 'javascript'], ], }, ), Event( platform='python', message='Hello from Python', datetime=now - timedelta(hours=1), data={ 'type': 'default', 'metadata': {}, 'tags': [ ['level', 'error'], ['logger', 'python'], ], }, ), Event( platform='java', message='Hello from Java', datetime=now - timedelta(hours=2), data={ 'type': 'default', 'metadata': {}, 'tags': [ ['level', 'debug'], ['logger', 'java'], ], }, ), ] assert get_group_creation_attributes( get_caches(), events, ) == { 'active_at': now - timedelta(hours=2), 'first_seen': now - timedelta(hours=2), 'last_seen': now, 'platform': 'java', 'message': 'Hello from JavaScript', 'level': logging.INFO, 'score': Group.calculate_score(3, now), 'logger': 'java', 'times_seen': 3, 'first_release': None, 'culprit': '', 'data': { 'type': 'default', 'last_received': to_timestamp(now), 'metadata': {}, }, }