def _post_by_key(self, request, full_config_requested): public_keys = request.relay_request_data.get("publicKeys") public_keys = set(public_keys or ()) project_keys = {} # type: dict[str, ProjectKey] project_ids = set() # type: set[int] with start_span(op="relay_fetch_keys"): with metrics.timer("relay_project_configs.fetching_keys.duration"): for key in ProjectKey.objects.get_many_from_cache( public_keys, key="public_key"): if key.status != ProjectKeyStatus.ACTIVE: continue project_keys[key.public_key] = key project_ids.add(key.project_id) projects = {} # type: dict[int, Project] organization_ids = set() # type: set[int] with start_span(op="relay_fetch_projects"): with metrics.timer( "relay_project_configs.fetching_projects.duration"): for project in Project.objects.get_many_from_cache( project_ids): projects[project.id] = project organization_ids.add(project.organization_id) # Preload all organizations and their options to prevent repeated # database access when computing the project configuration. orgs = {} # type: dict[int, Organization] with start_span(op="relay_fetch_orgs"): with metrics.timer("relay_project_configs.fetching_orgs.duration"): for org in Organization.objects.get_many_from_cache( organization_ids): if request.relay.has_org_access(org): orgs[org.id] = org with start_span(op="relay_fetch_org_options"): with metrics.timer( "relay_project_configs.fetching_org_options.duration"): for org_id in orgs: OrganizationOption.objects.get_all_values(org_id) metrics.timing("relay_project_configs.projects_requested", len(project_ids)) metrics.timing("relay_project_configs.projects_fetched", len(projects)) metrics.timing("relay_project_configs.orgs_fetched", len(orgs)) configs = {} for public_key in public_keys: configs[public_key] = {"disabled": True} key = project_keys.get(public_key) if key is None: continue project = projects.get(key.project_id) if project is None: continue organization = orgs.get(project.organization_id) if organization is None: continue # Prevent organization from being fetched again in quotas. project.set_cached_field_value("organization", organization) with Hub.current.start_span(op="get_config"): with metrics.timer( "relay_project_configs.get_config.duration"): project_config = config.get_project_config( project, full_config=full_config_requested, project_keys=[key], ) configs[public_key] = project_config.to_dict() if full_config_requested: projectconfig_cache.set_many(configs) return Response({"configs": configs}, status=200)
def get_event_stats_data(self, request, organization, get_event_stats, top_events=False): with self.handle_query_errors(): with sentry_sdk.start_span( op="discover.endpoint", description="base.stats_query_creation"): columns = request.GET.getlist("yAxis", ["count()"]) query = request.GET.get("query") try: params = self.get_filter_params(request, organization) except NoProjects: return {"data": []} params = self.quantize_date_params(request, params) rollup = get_rollup_from_request( request, params, "1h", InvalidSearchQuery( "Your interval and date range would create too many results. " "Use a larger interval, or a smaller date range."), ) # Backwards compatibility for incidents which uses the old # column aliases as it straddles both versions of events/discover. # We will need these aliases until discover2 flags are enabled for all # users. column_map = { "user_count": "count_unique(user)", "event_count": "count()", "epm()": "epm(%d)" % rollup, "eps()": "eps(%d)" % rollup, } query_columns = [ column_map.get(column, column) for column in columns ] reference_event = self.reference_event(request, organization, params.get("start"), params.get("end")) with sentry_sdk.start_span(op="discover.endpoint", description="base.stats_query"): result = get_event_stats(query_columns, query, params, rollup, reference_event) serializer = SnubaTSResultSerializer(organization, None, request.user) with sentry_sdk.start_span(op="discover.endpoint", description="base.stats_serialization"): if top_events: results = {} for key, event_result in six.iteritems(result): if len(query_columns) > 1: results[key] = self.serialize_multiple_axis( serializer, event_result, columns, query_columns) else: # Need to get function alias if count is a field, but not the axis results[key] = serializer.serialize( event_result, get_function_alias(query_columns[0])) return results elif len(query_columns) > 1: return self.serialize_multiple_axis(serializer, result, columns, query_columns) else: return serializer.serialize(result)
def handle_message(self, message): """ Parses the value from Kafka, and if valid passes the payload to the callback defined by the subscription. If the subscription has been removed, or no longer has a valid callback then just log metrics/errors and continue. :param message: :return: """ with sentry_sdk.push_scope() as scope: try: with metrics.timer( "snuba_query_subscriber.parse_message_value"): contents = self.parse_message_value(message.value()) except InvalidMessageError: # If the message is in an invalid format, just log the error # and continue logger.exception( "Subscription update could not be parsed", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) return scope.set_tag("query_subscription_id", contents["subscription_id"]) try: with metrics.timer( "snuba_query_subscriber.fetch_subscription"): subscription = QuerySubscription.objects.get_from_cache( subscription_id=contents["subscription_id"]) if subscription.status != QuerySubscription.Status.ACTIVE.value: metrics.incr( "snuba_query_subscriber.subscription_inactive") return except QuerySubscription.DoesNotExist: metrics.incr( "snuba_query_subscriber.subscription_doesnt_exist") logger.error( "Received subscription update, but subscription does not exist", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) try: _delete_from_snuba(self.topic_to_dataset[message.topic()], contents["subscription_id"]) except Exception: logger.exception( "Failed to delete unused subscription from snuba.") return if subscription.type not in subscriber_registry: metrics.incr( "snuba_query_subscriber.subscription_type_not_registered") logger.error( "Received subscription update, but no subscription handler registered", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) return logger.debug( "query-subscription-consumer.handle_message", extra={ "timestamp": contents["timestamp"], "query_subscription_id": contents["subscription_id"], "project_id": subscription.project_id, "subscription_dataset": subscription.snuba_query.dataset, "subscription_query": subscription.snuba_query.query, "subscription_aggregation": subscription.snuba_query.aggregate, "subscription_time_window": subscription.snuba_query.time_window, "subscription_resolution": subscription.snuba_query.resolution, "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) callback = subscriber_registry[subscription.type] with sentry_sdk.start_span( op="process_message") as span, metrics.timer( "snuba_query_subscriber.callback.duration", instance=subscription.type): span.set_data("payload", contents) callback(contents, subscription)
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, referrer=None, auto_fields=False, use_aggregate_conditions=False, conditions=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") else: # We clobber this value throughout this code, so copy the value selected_columns = selected_columns[:] with sentry_sdk.start_span(op="discover.discover", description="query.filter_transform") as span: span.set_data("query", query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: snuba_filter.having = [] # We need to run a separate query to be able to properly bucket the values for the histogram # Do that here, and format the bucket number in to the columns before passing it through # to event search. idx = 0 function_translations = {} for col in selected_columns: if col.startswith("histogram("): with sentry_sdk.start_span( op="discover.discover", description="query.histogram_calculation") as span: span.set_data("histogram", col) histogram_column = find_histogram_buckets( col, params, snuba_filter.conditions) selected_columns[idx] = histogram_column snuba_name = get_function_alias(histogram_column) sentry_name = get_function_alias(col) function_translations[snuba_name] = sentry_name # Since we're completely renaming the histogram function, we need to also check if we are # ordering by the histogram values, and change that. if orderby is not None: orderby = list(orderby) if isinstance( orderby, (list, tuple)) else [orderby] for i, ordering in enumerate(orderby): if sentry_name == ordering.lstrip("-"): ordering = "{}{}".format( "-" if ordering.startswith("-") else "", snuba_name) orderby[i] = ordering break idx += 1 with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if orderby is not None: orderby = list(orderby) if isinstance(orderby, (list, tuple)) else [orderby] snuba_filter.orderby = [get_function_alias(o) for o in orderby] snuba_filter.update_with( resolve_field_list(selected_columns, snuba_filter, auto_fields=auto_fields)) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter, function_translations) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure # any referenced functions are in the aggregations. if isinstance(having_clause[0], (list, tuple)): # Functions are of the form [fn, [args]] args_to_check = [[having_clause[0]]] conditions_not_in_aggregations = [] while len(args_to_check) > 0: args = args_to_check.pop() for arg in args: if arg[0] in [SNUBA_AND, SNUBA_OR]: args_to_check.extend(arg[1]) else: alias = arg[1][0] found = any( alias == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: conditions_not_in_aggregations.append(alias) if len(conditions_not_in_aggregations) > 0: raise InvalidSearchQuery( u"Aggregate(s) {} used in a condition but are not in the selected columns." .format(", ".join(conditions_not_in_aggregations))) else: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( u"Aggregate {} used in a condition but is not a selected column." .format(having_clause[0])) if conditions is not None: snuba_filter.conditions.extend(conditions) with sentry_sdk.start_span(op="discover.discover", description="query.snuba_query"): result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) with sentry_sdk.start_span(op="discover.discover", description="query.transform_results") as span: span.set_data("result_count", len(result.get("data", []))) return transform_results(result, translated_columns, snuba_filter, selected_columns)
def get_facets(query, params, limit=10, referrer=None): """ High-level API for getting 'facet map' results. Facets are high frequency tags and attribute results that can be used to further refine user queries. When many projects are requested sampling will be enabled to help keep response times low. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment limit (int) The number of records to fetch. referrer (str|None) A referrer string to help locate the origin of this query. Returns Sequence[FacetResult] """ with sentry_sdk.start_span(op="discover.discover", description="facets.filter_transform") as span: span.set_data("query", query) snuba_filter = get_filter(query, params) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter) # Exclude tracing tags as they are noisy and generally not helpful. excluded_tags = [ "tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span", "project"] ] # Sampling keys for multi-project results as we don't need accuracy # with that much data. sample = len(snuba_filter.filter_keys["project_id"]) > 2 with sentry_sdk.start_span(op="discover.discover", description="facets.frequent_tags"): # Get the most frequent tag keys key_names = raw_query( aggregations=[["count", None, "count"]], start=snuba_filter.start, end=snuba_filter.end, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, orderby=["-count", "tags_key"], groupby="tags_key", having=[excluded_tags], dataset=Dataset.Discover, limit=limit, referrer=referrer, turbo=sample, ) top_tags = [r["tags_key"] for r in key_names["data"]] if not top_tags: return [] # TODO(mark) Make the sampling rate scale based on the result size and scaling factor in # sentry.options. To test the lowest acceptable sampling rate, we use 0.1 which # is equivalent to turbo. We don't use turbo though as we need to re-scale data, and # using turbo could cause results to be wrong if the value of turbo is changed in snuba. sampling_enabled = options.get("discover2.tags_facet_enable_sampling") sample_rate = 0.1 if (sampling_enabled and key_names["data"][0]["count"] > 10000) else None # Rescale the results if we're sampling multiplier = 1 / sample_rate if sample_rate is not None else 1 fetch_projects = False if len(params.get("project_id", [])) > 1: if len(top_tags) == limit: top_tags.pop() fetch_projects = True results = [] if fetch_projects: with sentry_sdk.start_span(op="discover.discover", description="facets.projects"): project_values = raw_query( aggregations=[["count", None, "count"]], start=snuba_filter.start, end=snuba_filter.end, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, groupby="project_id", orderby="-count", dataset=Dataset.Discover, referrer=referrer, sample=sample_rate, # Ensures Snuba will not apply FINAL turbo=sample_rate is not None, ) results.extend([ FacetResult("project", r["project_id"], int(r["count"]) * multiplier) for r in project_values["data"] ]) # Get tag counts for our top tags. Fetching them individually # allows snuba to leverage promoted tags better and enables us to get # the value count we want. max_aggregate_tags = options.get("discover2.max_tags_to_combine") individual_tags = [] aggregate_tags = [] for i, tag in enumerate(top_tags): if tag == "environment": # Add here tags that you want to be individual individual_tags.append(tag) elif i >= len(top_tags) - max_aggregate_tags: aggregate_tags.append(tag) else: individual_tags.append(tag) with sentry_sdk.start_span(op="discover.discover", description="facets.individual_tags") as span: span.set_data("tag_count", len(individual_tags)) for tag_name in individual_tags: tag = u"tags[{}]".format(tag_name) tag_values = raw_query( aggregations=[["count", None, "count"]], conditions=snuba_filter.conditions, start=snuba_filter.start, end=snuba_filter.end, filter_keys=snuba_filter.filter_keys, orderby=["-count"], groupby=[tag], limit=TOP_VALUES_DEFAULT_LIMIT, dataset=Dataset.Discover, referrer=referrer, sample=sample_rate, # Ensures Snuba will not apply FINAL turbo=sample_rate is not None, ) results.extend([ FacetResult(tag_name, r[tag], int(r["count"]) * multiplier) for r in tag_values["data"] ]) if aggregate_tags: with sentry_sdk.start_span(op="discover.discover", description="facets.aggregate_tags"): conditions = snuba_filter.conditions conditions.append(["tags_key", "IN", aggregate_tags]) tag_values = raw_query( aggregations=[["count", None, "count"]], conditions=conditions, start=snuba_filter.start, end=snuba_filter.end, filter_keys=snuba_filter.filter_keys, orderby=["tags_key", "-count"], groupby=["tags_key", "tags_value"], dataset=Dataset.Discover, referrer=referrer, sample=sample_rate, # Ensures Snuba will not apply FINAL turbo=sample_rate is not None, limitby=[TOP_VALUES_DEFAULT_LIMIT, "tags_key"], ) results.extend([ FacetResult(r["tags_key"], r["tags_value"], int(r["count"]) * multiplier) for r in tag_values["data"] ]) return results
def post_process_group(is_new, is_regression, is_new_group_environment, cache_key, group_id=None, **kwargs): """ Fires post processing hooks for a group. """ from sentry.eventstore.models import Event from sentry.eventstore.processing import event_processing_store from sentry.reprocessing2 import is_reprocessed_event from sentry.utils import snuba with snuba.options_override({"consistent": True}): # We use the data being present/missing in the processing store # to ensure that we don't duplicate work should the forwarding consumers # need to rewind history. data = event_processing_store.get(cache_key) if not data: logger.info( "post_process.skipped", extra={ "cache_key": cache_key, "reason": "missing_cache" }, ) return event = Event(project_id=data["project"], event_id=data["event_id"], group_id=group_id, data=data) set_current_event_project(event.project_id) is_transaction_event = not bool(event.group_id) from sentry.models import EventDict, Organization, Project # Re-bind node data to avoid renormalization. We only want to # renormalize when loading old data from the database. event.data = EventDict(event.data, skip_renormalization=True) # Re-bind Project and Org since we're reading the Event object # from cache which may contain stale parent models. event.project = Project.objects.get_from_cache(id=event.project_id) event.project.set_cached_field_value( "organization", Organization.objects.get_from_cache( id=event.project.organization_id)) # Simplified post processing for transaction events. # This should eventually be completely removed and transactions # will not go through any post processing. if is_transaction_event: transaction_processed.send_robust( sender=post_process_group, project=event.project, event=event, ) event_processing_store.delete_by_key(cache_key) return is_reprocessed = is_reprocessed_event(event.data) sentry_sdk.set_tag("is_reprocessed", is_reprocessed) # NOTE: we must pass through the full Event object, and not an # event_id since the Event object may not actually have been stored # in the database due to sampling. from sentry.models import Commit, GroupInboxReason from sentry.models.group import get_group_with_redirect from sentry.models.groupinbox import add_group_to_inbox from sentry.rules.processor import RuleProcessor from sentry.tasks.groupowner import process_suspect_commits from sentry.tasks.servicehooks import process_service_hook # Re-bind Group since we're reading the Event object # from cache, which may contain a stale group and project event.group, _ = get_group_with_redirect(event.group_id) event.group_id = event.group.id event.group.project = event.project event.group.project.set_cached_field_value("organization", event.project.organization) bind_organization_context(event.project.organization) _capture_stats(event, is_new) with sentry_sdk.start_span( op="tasks.post_process_group.add_group_to_inbox"): try: if is_reprocessed and is_new: add_group_to_inbox(event.group, GroupInboxReason.REPROCESSED) except Exception: logger.exception( "Failed to add group to inbox for reprocessed groups") if not is_reprocessed: # we process snoozes before rules as it might create a regression # but not if it's new because you can't immediately snooze a new group has_reappeared = not is_new try: if has_reappeared: has_reappeared = process_snoozes(event.group) except Exception: logger.exception("Failed to process snoozes for group") try: if not has_reappeared: # If true, we added the .UNIGNORED reason already if is_new: add_group_to_inbox(event.group, GroupInboxReason.NEW) elif is_regression: add_group_to_inbox(event.group, GroupInboxReason.REGRESSION) except Exception: logger.exception( "Failed to add group to inbox for non-reprocessed groups") with sentry_sdk.start_span( op="tasks.post_process_group.handle_owner_assignment"): try: handle_owner_assignment(event.project, event.group, event) except Exception: logger.exception("Failed to handle owner assignments") rp = RuleProcessor(event, is_new, is_regression, is_new_group_environment, has_reappeared) has_alert = False with sentry_sdk.start_span( op="tasks.post_process_group.rule_processor_callbacks"): # TODO(dcramer): ideally this would fanout, but serializing giant # objects back and forth isn't super efficient for callback, futures in rp.apply(): has_alert = True safe_execute(callback, event, futures, _with_transaction=False) try: lock = locks.get( f"w-o:{event.group_id}-d-l", duration=10, ) with lock.acquire(): has_commit_key = f"w-o:{event.project.organization_id}-h-c" org_has_commit = cache.get(has_commit_key) if org_has_commit is None: org_has_commit = Commit.objects.filter( organization_id=event.project.organization_id ).exists() cache.set(has_commit_key, org_has_commit, 3600) if org_has_commit: group_cache_key = f"w-o-i:g-{event.group_id}" if cache.get(group_cache_key): metrics.incr( "sentry.tasks.process_suspect_commits.debounce", tags={"detail": "w-o-i:g debounce"}, ) else: from sentry.utils.committers import get_frame_paths cache.set(group_cache_key, True, 604800) # 1 week in seconds event_frames = get_frame_paths(event.data) process_suspect_commits.delay( event_id=event.event_id, event_platform=event.platform, event_frames=event_frames, group_id=event.group_id, project_id=event.project_id, ) except UnableToAcquireLock: pass except Exception: logger.exception("Failed to process suspect commits") if features.has("projects:servicehooks", project=event.project): allowed_events = {"event.created"} if has_alert: allowed_events.add("event.alert") if allowed_events: for servicehook_id, events in _get_service_hooks( project_id=event.project_id): if any(e in allowed_events for e in events): process_service_hook.delay( servicehook_id=servicehook_id, event=event) from sentry.tasks.sentry_apps import process_resource_change_bound if event.get_event_type( ) == "error" and _should_send_error_created_hooks(event.project): process_resource_change_bound.delay(action="created", sender="Error", instance_id=event.event_id, instance=event) if is_new: process_resource_change_bound.delay(action="created", sender="Group", instance_id=event.group_id) from sentry.plugins.base import plugins for plugin in plugins.for_project(event.project): plugin_post_process_group(plugin_slug=plugin.slug, event=event, is_new=is_new, is_regresion=is_regression) from sentry import similarity with sentry_sdk.start_span( op="tasks.post_process_group.similarity"): safe_execute(similarity.record, event.project, [event], _with_transaction=False) # Patch attachments that were ingested on the standalone path. with sentry_sdk.start_span( op="tasks.post_process_group.update_existing_attachments"): try: update_existing_attachments(event) except Exception: logger.exception("Failed to update existing attachments") if not is_reprocessed: event_processed.send_robust( sender=post_process_group, project=event.project, event=event, primary_hash=kwargs.get("primary_hash"), ) with metrics.timer("tasks.post_process.delete_event_cache"): event_processing_store.delete_by_key(cache_key)
def query( self, projects, retention_window_start, group_queryset, environments, sort_by, limit, cursor, count_hits, paginator_options, search_filters, date_from, date_to, ): now = timezone.now() end = None end_params = [ _f for _f in [date_to, get_search_filter(search_filters, "date", "<")] if _f ] if end_params: end = min(end_params) if not end: end = now + ALLOWED_FUTURE_DELTA # This search is for some time window that ends with "now", # so if the requested sort is `date` (`last_seen`) and there # are no other Snuba-based search predicates, we can simply # return the results from Postgres. if (cursor is None and sort_by == "date" and not environments and # This handles tags and date parameters for search filters. not [ sf for sf in search_filters if sf.key.name not in self.postgres_only_fields.union(["date"]) ]): group_queryset = group_queryset.order_by("-last_seen") paginator = DateTimePaginator(group_queryset, "-last_seen", **paginator_options) # When its a simple django-only search, we count_hits like normal return paginator.get_result(limit, cursor, count_hits=count_hits) # TODO: Presumably we only want to search back to the project's max # retention date, which may be closer than 90 days in the past, but # apparently `retention_window_start` can be None(?), so we need a # fallback. retention_date = max([ _f for _f in [retention_window_start, now - timedelta(days=90)] if _f ]) start_params = [ date_from, retention_date, get_search_filter(search_filters, "date", ">") ] start = max([_f for _f in start_params if _f]) end = max([retention_date, end]) if start == retention_date and end == retention_date: # Both `start` and `end` must have been trimmed to `retention_date`, # so this entire search was against a time range that is outside of # retention. We'll return empty results to maintain backwards compatibility # with Django search (for now). return self.empty_result if start >= end: # TODO: This maintains backwards compatibility with Django search, but # in the future we should find a way to notify the user that their search # is invalid. return self.empty_result # Here we check if all the django filters reduce the set of groups down # to something that we can send down to Snuba in a `group_id IN (...)` # clause. max_candidates = options.get("snuba.search.max-pre-snuba-candidates") with sentry_sdk.start_span(op="snuba_group_query") as span: group_ids = list( group_queryset.values_list("id", flat=True)[:max_candidates + 1]) span.set_data("Max Candidates", max_candidates) span.set_data("Result Size", len(group_ids)) metrics.timing("snuba.search.num_candidates", len(group_ids)) too_many_candidates = False if not group_ids: # no matches could possibly be found from this point on metrics.incr("snuba.search.no_candidates", skip_internal=False) return self.empty_result elif len(group_ids) > max_candidates: # If the pre-filter query didn't include anything to significantly # filter down the number of results (from 'first_release', 'query', # 'status', 'bookmarked_by', 'assigned_to', 'unassigned', # 'subscribed_by', 'active_at_from', or 'active_at_to') then it # might have surpassed the `max_candidates`. In this case, # we *don't* want to pass candidates down to Snuba, and instead we # want Snuba to do all the filtering/sorting it can and *then* apply # this queryset to the results from Snuba, which we call # post-filtering. metrics.incr("snuba.search.too_many_candidates", skip_internal=False) too_many_candidates = True group_ids = [] sort_field = self.sort_strategies[sort_by] chunk_growth = options.get("snuba.search.chunk-growth-rate") max_chunk_size = options.get("snuba.search.max-chunk-size") chunk_limit = limit offset = 0 num_chunks = 0 hits = self.calculate_hits( group_ids, too_many_candidates, sort_field, projects, retention_window_start, group_queryset, environments, sort_by, limit, cursor, count_hits, paginator_options, search_filters, start, end, ) if count_hits and hits == 0: return self.empty_result paginator_results = self.empty_result result_groups = [] result_group_ids = set() max_time = options.get("snuba.search.max-total-chunk-time-seconds") time_start = time.time() # Do smaller searches in chunks until we have enough results # to answer the query (or hit the end of possible results). We do # this because a common case for search is to return 100 groups # sorted by `last_seen`, and we want to avoid returning all of # a project's groups and then post-sorting them all in Postgres # when typically the first N results will do. while (time.time() - time_start) < max_time: num_chunks += 1 # grow the chunk size on each iteration to account for huge projects # and weird queries, up to a max size chunk_limit = min(int(chunk_limit * chunk_growth), max_chunk_size) # but if we have group_ids always query for at least that many items chunk_limit = max(chunk_limit, len(group_ids)) # {group_id: group_score, ...} snuba_groups, total = self.snuba_search( start=start, end=end, project_ids=[p.id for p in projects], environment_ids=environments and [environment.id for environment in environments], sort_field=sort_field, cursor=cursor, group_ids=group_ids, limit=chunk_limit, offset=offset, search_filters=search_filters, ) metrics.timing("snuba.search.num_snuba_results", len(snuba_groups)) count = len(snuba_groups) more_results = count >= limit and (offset + limit) < total offset += len(snuba_groups) if not snuba_groups: break if group_ids: # pre-filtered candidates were passed down to Snuba, so we're # finished with filtering and these are the only results. Note # that because we set the chunk size to at least the size of # the group_ids, we know we got all of them (ie there are # no more chunks after the first) result_groups = snuba_groups if count_hits and hits is None: hits = len(snuba_groups) else: # pre-filtered candidates were *not* passed down to Snuba, # so we need to do post-filtering to verify Sentry DB predicates filtered_group_ids = group_queryset.filter( id__in=[gid for gid, _ in snuba_groups]).values_list("id", flat=True) group_to_score = dict(snuba_groups) for group_id in filtered_group_ids: if group_id in result_group_ids: # because we're doing multiple Snuba queries, which # happen outside of a transaction, there is a small possibility # of groups moving around in the sort scoring underneath us, # so we at least want to protect against duplicates continue group_score = group_to_score[group_id] result_group_ids.add(group_id) result_groups.append((group_id, group_score)) # break the query loop for one of three reasons: # * we started with Postgres candidates and so only do one Snuba query max # * the paginator is returning enough results to satisfy the query (>= the limit) # * there are no more groups in Snuba to post-filter # TODO do we actually have to rebuild this SequencePaginator every time # or can we just make it after we've broken out of the loop? paginator_results = SequencePaginator( [(score, id) for (id, score) in result_groups], reverse=True, **paginator_options).get_result(limit, cursor, known_hits=hits) if group_ids or len( paginator_results.results) >= limit or not more_results: break # HACK: We're using the SequencePaginator to mask the complexities of going # back and forth between two databases. This causes a problem with pagination # because we're 'lying' to the SequencePaginator (it thinks it has the entire # result set in memory when it does not). For this reason we need to make some # best guesses as to whether the `prev` and `next` cursors have more results. if len(paginator_results.results) == limit and more_results: # Because we are going back and forth between DBs there is a small # chance that we will hand the SequencePaginator exactly `limit` # items. In this case the paginator will assume there are no more # results, so we need to override the `next` cursor's results. paginator_results.next.has_results = True if cursor is not None and (not cursor.is_prev or len(paginator_results.results) > 0): # If the user passed a cursor, and it isn't already a 0 result `is_prev` # cursor, then it's worth allowing them to go back a page to check for # more results. paginator_results.prev.has_results = True metrics.timing("snuba.search.num_chunks", num_chunks) groups = Group.objects.in_bulk(paginator_results.results) paginator_results.results = [ groups[k] for k in paginator_results.results if k in groups ] return paginator_results
def serialize( self, transactions: Sequence[SnubaTransaction], errors: Sequence[SnubaError], root: Optional[SnubaTransaction], warning_extra: Dict[str, str], event_id: str, detailed: bool = False, ) -> Sequence[FullResponse]: """ For the full event trace, we return the results as a graph instead of a flattened list """ parent_map = self.construct_parent_map(transactions) error_map = self.construct_error_map(errors) parent_events: Dict[str, TraceEvent] = {} # TODO(3.7): Dictionary ordering in py3.6 is an implementation detail, using an OrderedDict because this way # we try to guarantee in py3.6 that the first item is the root. We can switch back to a normal dict when we're # on python 3.7. results_map: Dict[Optional[str], List[TraceEvent]] = OrderedDict() to_check: Deque[SnubaTransaction] = deque() if root: parent_events[root["id"]] = TraceEvent(root, None, 0) results_map[None] = [parent_events[root["id"]]] to_check.append(root) with sentry_sdk.start_span(op="building.trace", description="full trace"): iteration = 0 has_orphans = False while parent_map or to_check: if len(to_check) == 0: has_orphans = True # Grab any set of events from the parent map parent_span_id, current_events = parent_map.popitem() current_event, *siblings = current_events # If there were any siblings put them back if siblings: parent_map[parent_span_id] = siblings previous_event = parent_events[current_event["id"]] = TraceEvent( current_event, None, 0 ) # not using a defaultdict here as a DefaultOrderedDict isn't worth the effort if parent_span_id in results_map: results_map[parent_span_id].append(previous_event) else: results_map[parent_span_id] = [previous_event] else: current_event = to_check.popleft() previous_event = parent_events[current_event["id"]] # This is faster than doing a call to get_events, since get_event_by_id only makes a call to snuba # when non transaction events are included. with sentry_sdk.start_span(op="nodestore", description="get_event_by_id"): nodestore_event = eventstore.get_event_by_id( current_event["project.id"], current_event["id"] ) previous_event.nodestore_event = nodestore_event spans: NodeSpans = nodestore_event.data.get("spans", []) # Need to include the transaction as a span as well spans.append({"span_id": previous_event.event["trace.span"]}) for child in spans: if child["span_id"] in error_map: previous_event.errors.extend( [ self.serialize_error(error) for error in error_map.pop(child["span_id"]) ] ) # We need to connect back to an existing orphan trace if has_orphans and child["span_id"] in results_map: orphan_subtraces = results_map.pop(child["span_id"]) for orphan_subtrace in orphan_subtraces: orphan_subtrace.parent_event_id = previous_event.event["id"] previous_event.children.extend(orphan_subtraces) if child["span_id"] not in parent_map: continue # Avoid potential span loops by popping, so we don't traverse the same nodes twice child_events = parent_map.pop(child["span_id"]) for child_event in child_events: parent_events[child_event["id"]] = TraceEvent( child_event, current_event["id"], previous_event.generation + 1 if previous_event.generation is not None else None, ) # Add this event to its parent's children previous_event.children.append(parent_events[child_event["id"]]) to_check.append(child_event) # Limit iterations just to be safe iteration += 1 if iteration > MAX_TRACE_SIZE: sentry_sdk.set_tag("discover.trace-view.warning", "surpassed-trace-limit") logger.warning( "discover.trace-view.surpassed-trace-limit", extra=warning_extra, ) break root_traces: List[TraceEvent] = [] orphans: List[TraceEvent] = [] for index, result in enumerate(results_map.values()): for subtrace in result: self.update_children(subtrace) if index > 0 or root is None: orphans.extend(result) elif root: root_traces = result # We sort orphans and roots separately because we always want the root(s) as the first element(s) root_traces.sort(key=child_sort_key) orphans.sort(key=child_sort_key) return [trace.full_dict(detailed) for trace in root_traces] + [ orphan.full_dict(detailed) for orphan in orphans ]
def get(self, request, organization): try: # events-meta is still used by events v1 which doesn't require global views params = self.get_snuba_params(request, organization, check_global_views=False) except NoProjects: return Response([]) with sentry_sdk.start_span(op="discover.endpoint", description="find_lookup_keys") as span: possible_keys = ["transaction"] lookup_keys = { key: request.query_params.get(key) for key in possible_keys } if not any(lookup_keys.values()): return Response( { "detail": f"Must provide one of {possible_keys} in order to find related events" }, status=400, ) with self.handle_query_errors(): with sentry_sdk.start_span(op="discover.endpoint", description="filter_creation"): projects = self.get_projects(request, organization) query_kwargs = build_query_params_from_request( request, organization, projects, params.get("environment")) query_kwargs["limit"] = 5 try: # Need to escape quotes in case some "joker" has a transaction with quotes transaction_name = UNESCAPED_QUOTE_RE.sub( '\\"', lookup_keys["transaction"]) parsed_terms = parse_search_query( f'transaction:"{transaction_name}"') except ParseError: return Response({"detail": "Invalid transaction search"}, status=400) if query_kwargs.get("search_filters"): query_kwargs["search_filters"].extend(parsed_terms) else: query_kwargs["search_filters"] = parsed_terms with sentry_sdk.start_span(op="discover.endpoint", description="issue_search"): results = search.query(**query_kwargs) with sentry_sdk.start_span(op="discover.endpoint", description="serialize_results") as span: results = list(results) span.set_data("result_length", len(results)) context = serialize( results, request.user, GroupSerializer(environment_func=self._get_environment_func( request, organization.id)), ) return Response(context)
def serialize(self, parent_map, error_map, root, warning_extra, params, snuba_event=None, event_id=None): """ For the full event trace, we return the results as a graph instead of a flattened list """ parent_events = {} result = parent_events[root["id"]] = self.serialize_event( root, None, 0) with sentry_sdk.start_span(op="building.trace", description="full trace"): to_check = deque([root]) iteration = 0 while to_check: current_event = to_check.popleft() # This is faster than doing a call to get_events, since get_event_by_id only makes a call to snuba # when non transaction events are included. with sentry_sdk.start_span(op="nodestore", description="get_event_by_id"): event = eventstore.get_event_by_id( current_event["project.id"], current_event["id"]) previous_event = parent_events[current_event["id"]] previous_event.update({ event_key: event.data.get(event_key) for event_key in NODESTORE_KEYS }) spans = event.data.get("spans", []) # Need to include the transaction as a span as well spans.append({"span_id": previous_event["span_id"]}) for child in spans: if child["span_id"] in error_map: previous_event["errors"].extend( error_map.pop(child["span_id"])) if child["span_id"] not in parent_map: continue # Avoid potential span loops by popping, so we don't traverse the same nodes twice child_events = parent_map.pop(child["span_id"]) for child_event in child_events: parent_events[ child_event["id"]] = self.serialize_event( child_event, current_event["id"], previous_event["generation"] + 1) # Add this event to its parent's children previous_event["children"].append( parent_events[child_event["id"]]) to_check.append(child_event) # Limit iterations just to be safe iteration += 1 if iteration > MAX_TRACE_SIZE: logger.warning( "discover.trace-view.surpassed-trace-limit", extra=warning_extra, ) break return result
def serialize( self, transactions: Sequence[SnubaTransaction], errors: Sequence[SnubaError], root: Optional[SnubaTransaction], warning_extra: Dict[str, str], event_id: str, detailed: bool = False, ) -> Sequence[LightResponse]: """ Because the light endpoint could potentially have gaps between root and event we return a flattened list """ snuba_event, nodestore_event = self.get_current_transaction(transactions, errors, event_id) parent_map = self.construct_parent_map(transactions) error_map = self.construct_error_map(errors) trace_results: List[TraceEvent] = [] current_generation: Optional[int] = None root_id: Optional[str] = None with sentry_sdk.start_span(op="building.trace", description="light trace"): # We might not be necessarily connected to the root if we're on an orphan event if root is not None and root["id"] != snuba_event["id"]: # Get the root event and see if the current event's span is in the root event root_event = eventstore.get_event_by_id(root["project.id"], root["id"]) root_spans: NodeSpans = root_event.data.get("spans", []) root_span = find_event( root_spans, lambda item: item is not None and item["span_id"] == snuba_event["trace.parent_span"], ) # We only know to add the root if its the direct parent if root_span is not None: # For the light response, the parent will be unknown unless it is a direct descendent of the root root_id = root["id"] trace_results.append( TraceEvent( root, None, 0, ) ) current_generation = 1 elif root is not None and root["id"] == snuba_event["id"]: current_generation = 0 current_event = TraceEvent(snuba_event, root_id, current_generation) trace_results.append(current_event) spans: NodeSpans = nodestore_event.data.get("spans", []) # Need to include the transaction as a span as well spans.append({"span_id": snuba_event["trace.span"]}) for span in spans: if span["span_id"] in error_map: current_event.errors.extend( [self.serialize_error(error) for error in error_map.pop(span["span_id"])] ) if span["span_id"] in parent_map: child_events = parent_map.pop(span["span_id"]) trace_results.extend( [ TraceEvent( child_event, snuba_event["id"], ( current_event.generation + 1 if current_event.generation is not None else None ), ) for child_event in child_events ] ) return [result.to_dict() for result in trace_results]
def wrapper(*args, **kwargs) -> Any: with sentry_sdk.start_span(description=func.__name__, op=op) as span: span.set_data("filename", filename) return func(*args, **kwargs)
def get(self, request, organization): if not self.has_feature(organization, request): return Response(status=404) with sentry_sdk.start_span(op="discover.endpoint", description="filter_params") as span: span.set_tag("organization", organization) try: params = self.get_filter_params(request, organization) except NoProjects: return Response([]) params = self.quantize_date_params(request, params) has_global_views = features.has("organizations:global-views", organization, actor=request.user) if not has_global_views and len(params.get("project_id", [])) > 1: raise ParseError( detail="You cannot view events from multiple projects.") middle = params["start"] + timedelta( seconds=(params["end"] - params["start"]).total_seconds() * 0.5) start, middle, end = ( datetime.strftime(params["start"], DateArg.date_format), datetime.strftime(middle, DateArg.date_format), datetime.strftime(params["end"], DateArg.date_format), ) trend_function = request.GET.get("trendFunction", "p50()") function, columns = parse_function(trend_function) trend_column = self.trend_columns.get(function) if trend_column is None: raise ParseError(detail=u"{} is not a supported trend function". format(trend_function)) count_column = self.trend_columns.get("count_range") percentage_column = self.trend_columns["percentage"] selected_columns = request.GET.getlist("field")[:] query = request.GET.get("query") orderby = self.get_orderby(request) def data_fn(offset, limit): return discover.query( selected_columns=selected_columns + [ trend_column["format"].format( *columns, start=start, end=middle, index="1"), trend_column["format"].format( *columns, start=middle, end=end, index="2"), percentage_column["format"].format( alias=trend_column["alias"]), "minus({alias}2,{alias}1)".format( alias=trend_column["alias"]), count_column["format"].format( start=start, end=middle, index="1"), count_column["format"].format( start=middle, end=end, index="2"), percentage_column["format"].format( alias=count_column["alias"]), ], query=query, params=params, orderby=orderby, offset=offset, limit=limit, referrer="api.trends.get-percentage-change", auto_fields=True, use_aggregate_conditions=True, ) def on_results(events_results): def get_event_stats(query_columns, query, params, rollup, reference_event): return discover.top_events_timeseries( query_columns, selected_columns, query, params, orderby, rollup, min(5, len(events_results["data"])), organization, top_events=events_results, referrer="api.trends.get-event-stats", ) stats_results = (self.get_event_stats_data( request, organization, get_event_stats, top_events=True, query_column=trend_function, ) if len(events_results["data"]) > 0 else {}) return { "events": self.handle_results_with_meta(request, organization, params["project_id"], events_results), "stats": stats_results, } with self.handle_query_errors(): return self.paginate( request=request, paginator=GenericOffsetPaginator(data_fn=data_fn), on_results=on_results, default_per_page=5, max_per_page=5, )
def bulk_raw_query(snuba_param_list, referrer=None): headers = {} if referrer: headers["referer"] = referrer query_param_list = map(_prepare_query_params, snuba_param_list) def snuba_query(params): query_params, forward, reverse, thread_hub = params try: with timer("snuba_query"): referrer = headers.get("referer", "<unknown>") if SNUBA_INFO: logger.info("{}.body: {}".format(referrer, json.dumps(query_params))) query_params["debug"] = True body = json.dumps(query_params) with thread_hub.start_span( op="snuba", description=u"query {}".format(referrer)) as span: span.set_tag("referrer", referrer) for param_key, param_data in six.iteritems(query_params): span.set_data(param_key, param_data) return ( _snuba_pool.urlopen("POST", "/query", body=body, headers=headers), forward, reverse, ) except urllib3.exceptions.HTTPError as err: raise SnubaError(err) with sentry_sdk.start_span( op="start_snuba_query", description=u"running {} snuba queries".format( len(snuba_param_list)), ) as span: span.set_tag("referrer", headers.get("referer", "<unknown>")) if len(snuba_param_list) > 1: query_results = list( _query_thread_pool.map(snuba_query, [ params + (Hub(Hub.current), ) for params in query_param_list ])) else: # No need to submit to the thread pool if we're just performing a # single query query_results = [ snuba_query(query_param_list[0] + (Hub(Hub.current), )) ] results = [] for response, _, reverse in query_results: try: body = json.loads(response.data) if SNUBA_INFO: if "sql" in body: logger.info("{}.sql: {}".format( headers.get("referer", "<unknown>"), body["sql"])) if "error" in body: logger.info("{}.err: {}".format( headers.get("referer", "<unknown>"), body["error"])) except ValueError: if response.status != 200: logger.error("snuba.query.invalid-json") raise SnubaError("Failed to parse snuba error response") raise UnexpectedResponseError( u"Could not decode JSON response: {}".format(response.data)) if response.status != 200: if body.get("error"): error = body["error"] if response.status == 429: raise RateLimitExceeded(error["message"]) elif error["type"] == "schema": raise SchemaValidationError(error["message"]) elif error["type"] == "clickhouse": raise clickhouse_error_codes_map.get( error["code"], QueryExecutionError)(error["message"]) else: raise SnubaError(error["message"]) else: raise SnubaError(u"HTTP {}".format(response.status)) # Forward and reverse translation maps from model ids to snuba keys, per column body["data"] = [reverse(d) for d in body["data"]] results.append(body) return results
def _do_process_event( cache_key, start_time, event_id, process_task, data=None, data_has_changed=None, from_symbolicate=False, ): from sentry.plugins.base import plugins if data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "process"}, skip_internal=False ) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_project(project_id) event_id = data["event_id"] with sentry_sdk.start_span(op="tasks.store.process_event.get_project_from_cache"): project = Project.objects.get_from_cache(id=project_id) with metrics.timer("tasks.store.process_event.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id ) has_changed = bool(data_has_changed) with sentry_sdk.start_span(op="tasks.store.process_event.get_reprocessing_revision"): # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) # Stacktrace based event processors. with sentry_sdk.start_span(op="task.store.process_event.stacktraces"): with metrics.timer( "tasks.store.process_event.stacktraces", tags={"from_symbolicate": from_symbolicate} ): new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # Second round of datascrubbing after stacktrace and language-specific # processing. First round happened as part of ingest. # # *Right now* the only sensitive data that is added in stacktrace # processing are usernames in filepaths, so we run directly after # stacktrace processors. # # We do not yet want to deal with context data produced by plugins like # sessionstack or fullstory (which are in `get_event_preprocessors`), as # this data is very unlikely to be sensitive data. This is why scrubbing # happens somewhere in the middle of the pipeline. # # On the other hand, Javascript event error translation is happening after # this block because it uses `get_event_preprocessors` instead of # `get_event_enhancers`. # # We are fairly confident, however, that this should run *before* # re-normalization as it is hard to find sensitive data in partially # trimmed strings. if has_changed and options.get("processing.can-use-scrubbers"): with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"): with metrics.timer( "tasks.store.datascrubbers.scrub", tags={"from_symbolicate": from_symbolicate} ): new_data = safe_execute(scrub_data, project=project, event=data.data) # XXX(markus): When datascrubbing is finally "totally stable", we might want # to drop the event if it crashes to avoid saving PII if new_data is not None and features.has( "organizations:datascrubbers-v2", project.organization, actor=None ): data.data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): with sentry_sdk.start_span(op="task.store.process_event.preprocessors") as span: span.set_data("plugin", plugin.slug) span.set_data("from_symbolicate", from_symbolicate) with metrics.timer( "tasks.store.process_event.preprocessors", tags={"plugin": plugin.slug, "from_symbolicate": from_symbolicate}, ): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in processors or (): try: result = processor(data) except Exception: error_logger.exception("tasks.store.preprocessors.error") data.setdefault("_metrics", {})["flag.processing.error"] = True has_changed = True else: if result: data = result has_changed = True assert data["project"] == project_id, "Project cannot be mutated by plugins" # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer( remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS ) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke ourselves again. This happens when the reprocessing # revision changed while we were processing. _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project) return cache_key = event_processing_store.store(data) submit_save_event(project, cache_key, event_id, start_time, data)
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, referrer=None, auto_fields=False, auto_aggregations=False, use_aggregate_conditions=False, conditions=None, functions_acl=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. auto_aggregations (bool) Whether aggregates should be added automatically if they're used in conditions, and there's at least one aggregate already. use_aggregate_conditions (bool) Set to true if aggregates conditions should be used at all. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") # We clobber this value throughout this code, so copy the value selected_columns = selected_columns[:] with sentry_sdk.start_span(op="discover.discover", description="query.filter_transform") as span: span.set_data("query", query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: assert ( not auto_aggregations ), "Auto aggregations cannot be used without enabling aggregate conditions" snuba_filter.having = [] function_translations = {} with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if orderby is not None: orderby = list(orderby) if isinstance(orderby, (list, tuple)) else [orderby] snuba_filter.orderby = [get_function_alias(o) for o in orderby] resolved_fields = resolve_field_list( selected_columns, snuba_filter, auto_fields=auto_fields, auto_aggregations=auto_aggregations, functions_acl=functions_acl, ) snuba_filter.update_with(resolved_fields) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter, function_translations) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure # any referenced functions are in the aggregations. error_extra = ", and could not be automatically added" if auto_aggregations else "" if isinstance(having_clause[0], (list, tuple)): # Functions are of the form [fn, [args]] args_to_check = [[having_clause[0]]] conditions_not_in_aggregations = [] while len(args_to_check) > 0: args = args_to_check.pop() for arg in args: if arg[0] in [SNUBA_AND, SNUBA_OR]: args_to_check.extend(arg[1]) # Only need to iterate on arg[1] if its a list elif isinstance(arg[1], (list, tuple)): alias = arg[1][0] found = any( alias == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: conditions_not_in_aggregations.append(alias) if len(conditions_not_in_aggregations) > 0: raise InvalidSearchQuery( "Aggregate(s) {} used in a condition but are not in the selected columns{}." .format( ", ".join(conditions_not_in_aggregations), error_extra, )) else: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( "Aggregate {} used in a condition but is not a selected column{}." .format( having_clause[0], error_extra, )) if conditions is not None: snuba_filter.conditions.extend(conditions) with sentry_sdk.start_span(op="discover.discover", description="query.snuba_query"): result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) with sentry_sdk.start_span(op="discover.discover", description="query.transform_results") as span: span.set_data("result_count", len(result.get("data", []))) return transform_results(result, resolved_fields["functions"], translated_columns, snuba_filter, selected_columns)
def get_environments(self, request, organization): with sentry_sdk.start_span(op="PERF: Org.get_environments"): return get_environments(request, organization)
def dispatch(self, request, *args, **kwargs): """ Identical to rest framework's dispatch except we add the ability to convert arguments (for common URL params). """ with sentry_sdk.start_span(op="base.dispatch.setup", description=type(self).__name__): self.args = args self.kwargs = kwargs request = self.initialize_request(request, *args, **kwargs) self.load_json_body(request) self.request = request self.headers = self.default_response_headers # deprecate? # Tags that will ultimately flow into the metrics backend at the end of # the request (happens via middleware/stats.py). request._metric_tags = {} if settings.SENTRY_API_RESPONSE_DELAY: start_time = time.time() origin = request.META.get("HTTP_ORIGIN", "null") # A "null" value should be treated as no Origin for us. # See RFC6454 for more information on this behavior. if origin == "null": origin = None try: with sentry_sdk.start_span(op="base.dispatch.request", description=type(self).__name__): if origin and request.auth: allowed_origins = request.auth.get_allowed_origins() if not is_valid_origin(origin, allowed=allowed_origins): response = Response("Invalid origin: %s" % (origin, ), status=400) self.response = self.finalize_response( request, response, *args, **kwargs) return self.response self.initial(request, *args, **kwargs) # Get the appropriate handler method if request.method.lower() in self.http_method_names: handler = getattr(self, request.method.lower(), self.http_method_not_allowed) (args, kwargs) = self.convert_args(request, *args, **kwargs) self.args = args self.kwargs = kwargs else: handler = self.http_method_not_allowed if getattr(request, "access", None) is None: # setup default access request.access = access.from_request(request) with sentry_sdk.start_span( op="base.dispatch.execute", description="{}.{}".format( type(self).__name__, handler.__name__), ): response = handler(request, *args, **kwargs) except Exception as exc: response = self.handle_exception(request, exc) if origin: self.add_cors_headers(request, response) self.response = self.finalize_response(request, response, *args, **kwargs) if settings.SENTRY_API_RESPONSE_DELAY: duration = time.time() - start_time if duration < (settings.SENTRY_API_RESPONSE_DELAY / 1000.0): with sentry_sdk.start_span( op="base.dispatch.sleep", description=type(self).__name__, ) as span: span.set_data("SENTRY_API_RESPONSE_DELAY", settings.SENTRY_API_RESPONSE_DELAY) time.sleep(settings.SENTRY_API_RESPONSE_DELAY / 1000.0 - duration) return self.response
def loads(value: str, **kwargs) -> JSONData: with sentry_sdk.start_span(op="sentry.utils.json.loads"): return _default_decoder.decode(value)
def prefetch_basic(self): with sentry_sdk.start_span(op="stats", description=f"PREFETCH {self.cache_key}"): self._prefetch_basic()
def transform_results(result, translated_columns, snuba_filter, selected_columns=None): """ Transform internal names back to the public schema ones. When getting timeseries results via rollup, this function will zerofill the output results. """ if selected_columns is None: selected_columns = [] meta = [] for col in result["meta"]: # Translate back column names that were converted to snuba format col["name"] = translated_columns.get(col["name"], col["name"]) # Remove user fields as they will be replaced by the alias. meta.append(col) def get_row(row): transformed = {} for key, value in row.items(): if isinstance(value, float) and math.isnan(value): value = 0 transformed[translated_columns.get(key, key)] = value return transformed if len(translated_columns): result["data"] = [get_row(row) for row in result["data"]] rollup = snuba_filter.rollup if rollup and rollup > 0: with sentry_sdk.start_span( op="discover.discover", description="transform_results.zerofill") as span: span.set_data("result_count", len(result.get("data", []))) result["data"] = zerofill(result["data"], snuba_filter.start, snuba_filter.end, rollup, snuba_filter.orderby) for col in result["meta"]: if col["name"].startswith("histogram"): # The column name here has been translated, we need the original name for snuba_name, sentry_name in six.iteritems(translated_columns): if sentry_name == col["name"]: with sentry_sdk.start_span( op="discover.discover", description="transform_results.histogram_zerofill" ) as span: span.set_data("histogram_function", snuba_name) result["data"] = zerofill_histogram( result["data"], result["meta"], snuba_filter.orderby, sentry_name, snuba_name, ) break return result
def _request( self, method, path, headers=None, data=None, params=None, auth=None, json=True, allow_text=None, allow_redirects=None, timeout=None, ): if allow_text is None: allow_text = self.allow_text if allow_redirects is None: allow_redirects = self.allow_redirects if allow_redirects is None: # is still None allow_redirects = method.upper() == "GET" if timeout is None: timeout = 30 full_url = self.build_url(path) session = build_session() metrics.incr( u"%s.http_request" % self.datadog_prefix, sample_rate=1.0, tags={self.integration_type: self.name}, ) with sentry_sdk.start_span( op=u"{}.http".format(self.integration_type), transaction=u"{}.http_response.{}".format(self.integration_type, self.name), ) as span: try: resp = getattr(session, method.lower())( url=full_url, headers=headers, json=data if json else None, data=data if not json else None, params=params, auth=auth, verify=self.verify_ssl, allow_redirects=allow_redirects, timeout=timeout, ) resp.raise_for_status() except ConnectionError as e: self.track_response_data("connection_error", span, e) raise ApiHostError.from_exception(e) except Timeout as e: self.track_response_data("timeout", span, e) raise ApiTimeoutError.from_exception(e) except HTTPError as e: resp = e.response if resp is None: self.track_response_data("unknown", span, e) self.logger.exception( "request.error", extra={self.integration_type: self.name, "url": full_url} ) raise ApiError("Internal Error") self.track_response_data(resp.status_code, span, e) raise ApiError.from_response(resp) self.track_response_data(resp.status_code, span, None, resp) if resp.status_code == 204: return {} return BaseApiResponse.from_response(resp, allow_text=allow_text)
def top_events_timeseries( timeseries_columns, selected_columns, user_query, params, orderby, rollup, limit, organization, referrer=None, top_events=None, ): """ High-level API for doing arbitrary user timeseries queries for a limited number of top events Returns a dictionary of SnubaTSResult objects that have been zerofilled in case of gaps. Each value of the dictionary should match the result of a timeseries query timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query, usually matches the y-axis of the graph selected_columns (Sequence[str]) List of public aliases to fetch for the events query, this is to determine what the top events are user_query (str) Filter query string to create conditions from. needs to be user_query to not conflict with the function query params (Dict[str, str]) Filtering parameters with start, end, project_id, environment, orderby (Sequence[str]) The fields to order results by. rollup (int) The bucket width in seconds limit (int) The number of events to get timeseries for organization (Organization) Used to map group ids to short ids referrer (str|None) A referrer string to help locate the origin of this query. """ if top_events is None: with sentry_sdk.start_span(op="discover.discover", description="top_events.fetch_events"): top_events = query( selected_columns, query=user_query, params=params, orderby=orderby, limit=limit, referrer=referrer, use_aggregate_conditions=True, ) with sentry_sdk.start_span( op="discover.discover", description="top_events.filter_transform") as span: span.set_data("query", user_query) snuba_filter, translated_columns = get_timeseries_snuba_filter( list(set(timeseries_columns + selected_columns)), user_query, params, rollup, default_count=False, ) for field in selected_columns: # project is handled by filter_keys already if field in ["project", "project.id"]: continue if field == "issue": field = FIELD_ALIASES["issue"]["column_alias"] # Note that because orderby shouldn't be an array field its not included in the values values = list({ event.get(field) for event in top_events["data"] if field in event and not isinstance(event.get(field), list) }) if values: # timestamp needs special handling, creating a big OR instead if field == "timestamp": snuba_filter.conditions.append([["timestamp", "=", value] for value in values]) elif None in values: non_none_values = [ value for value in values if value is not None ] condition = [[["isNull", [resolve_discover_column(field)]], "=", 1]] if non_none_values: condition.append([ resolve_discover_column(field), "IN", non_none_values ]) snuba_filter.conditions.append(condition) else: snuba_filter.conditions.append( [resolve_discover_column(field), "IN", values]) with sentry_sdk.start_span(op="discover.discover", description="top_events.snuba_query"): result = raw_query( aggregations=snuba_filter.aggregations, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, selected_columns=snuba_filter.selected_columns, start=snuba_filter.start, end=snuba_filter.end, rollup=rollup, orderby="time", groupby=["time"] + snuba_filter.groupby, dataset=Dataset.Discover, limit=10000, referrer=referrer, ) with sentry_sdk.start_span( op="discover.discover", description="top_events.transform_results") as span: span.set_data("result_count", len(result.get("data", []))) result = transform_results(result, translated_columns, snuba_filter, selected_columns) translated_columns["project_id"] = "project" translated_groupby = [ translated_columns.get(groupby, groupby) for groupby in snuba_filter.groupby ] issues = {} if "issue" in selected_columns: issues = Group.issues_mapping( set([event["issue.id"] for event in top_events["data"]]), params["project_id"], organization, ) # so the result key is consistent translated_groupby.sort() results = {} # Using the top events add the order to the results for index, item in enumerate(top_events["data"]): result_key = create_result_key(item, translated_groupby, issues) results[result_key] = {"order": index, "data": []} for row in result["data"]: result_key = create_result_key(row, translated_groupby, issues) if result_key in results: results[result_key]["data"].append(row) else: logger.warning( "discover.top-events.timeseries.key-mismatch", extra={ "result_key": result_key, "top_event_keys": list(results.keys()) }, ) for key, item in six.iteritems(results): results[key] = SnubaTSResult( { "data": zerofill(item["data"], snuba_filter.start, snuba_filter.end, rollup, "time"), "order": item["order"], }, snuba_filter.start, snuba_filter.end, rollup, ) return results
def get(self, request: Request, organization: Organization) -> Response: with sentry_sdk.start_span(op="discover.endpoint", description="filter_params") as span: span.set_data("organization", organization) if not self.has_feature(organization, request): # We used to return a "v1" result here, keeping tags to keep an eye on its use span.set_data("using_v1_results", True) sentry_sdk.set_tag("stats.using_v1", organization.slug) return Response(status=404) top_events = 0 if "topEvents" in request.GET: try: top_events = int(request.GET.get("topEvents", 0)) except ValueError: return Response({"detail": "topEvents must be an integer"}, status=400) if top_events > MAX_TOP_EVENTS: return Response( { "detail": f"Can only get up to {MAX_TOP_EVENTS} top events" }, status=400, ) elif top_events <= 0: return Response( {"detail": "If topEvents needs to be at least 1"}, status=400) comparison_delta = None if "comparisonDelta" in request.GET: try: comparison_delta = timedelta( seconds=int(request.GET["comparisonDelta"])) except ValueError: return Response( {"detail": "comparisonDelta must be an integer"}, status=400) # The partial parameter determines whether or not partial buckets are allowed. # The last bucket of the time series can potentially be a partial bucket when # the start of the bucket does not align with the rollup. allow_partial_buckets = request.GET.get("partial") == "1" referrer = request.GET.get("referrer") referrer = (referrer if referrer in ALLOWED_EVENTS_STATS_REFERRERS else "api.organization-event-stats") def get_event_stats( query_columns: Sequence[str], query: str, params: Dict[str, str], rollup: int, zerofill_results: bool, comparison_delta: Optional[datetime], ) -> SnubaTSResult: if top_events > 0: return discover.top_events_timeseries( timeseries_columns=query_columns, selected_columns=self.get_field_list( organization, request), equations=self.get_equation_list(organization, request), user_query=query, params=params, orderby=self.get_orderby(request), rollup=rollup, limit=top_events, organization=organization, referrer=referrer + ".find-topn", allow_empty=False, zerofill_results=zerofill_results, include_other=True, use_snql=self.has_discover_snql(organization, request), ) return discover.timeseries_query( selected_columns=query_columns, query=query, params=params, rollup=rollup, referrer=referrer, zerofill_results=zerofill_results, comparison_delta=comparison_delta, use_snql=self.has_discover_snql(organization, request), ) try: return Response( self.get_event_stats_data( request, organization, get_event_stats, top_events, allow_partial_buckets=allow_partial_buckets, zerofill_results=not ( request.GET.get("withoutZerofill") == "1" and self.has_chart_interpolation(organization, request)), comparison_delta=comparison_delta, ), status=200, ) except ValidationError: return Response( {"detail": "Comparison period is outside retention window"}, status=400)
def handle_message(self, message): """ Parses the value from Kafka, and if valid passes the payload to the callback defined by the subscription. If the subscription has been removed, or no longer has a valid callback then just log metrics/errors and continue. :param message: :return: """ with sentry_sdk.push_scope() as scope: try: contents = self.parse_message_value(message.value()) except InvalidMessageError: # If the message is in an invalid format, just log the error # and continue logger.exception( "Subscription update could not be parsed", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) return scope.set_tag("query_subscription_id", contents["subscription_id"]) try: subscription = QuerySubscription.objects.get_from_cache( subscription_id=contents["subscription_id"]) except QuerySubscription.DoesNotExist: metrics.incr( "snuba_query_subscriber.subscription_doesnt_exist") logger.error( "Received subscription update, but subscription does not exist", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) return if subscription.type not in subscriber_registry: metrics.incr( "snuba_query_subscriber.subscription_type_not_registered") logger.error( "Received subscription update, but no subscription handler registered", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) return logger.info( "query-subscription-consumer.handle_message", extra={ "timestamp": contents["timestamp"], "query_subscription_id": contents["subscription_id"], "contents": contents, "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) callback = subscriber_registry[subscription.type] with sentry_sdk.start_span( op="process_message", transaction="query_subscription_consumer_process_message" ) as span, metrics.timer( "snuba_query_subscriber.callback.duration", instance=subscription.type): span.set_data("payload", contents) callback(contents, subscription)
def query_facet_performance( params: Mapping[str, str], tag_data: Mapping[str, Any], aggregate_column: Optional[str] = None, filter_query: Optional[str] = None, orderby: Optional[str] = None, referrer: Optional[str] = None, limit: Optional[int] = None, offset: Optional[int] = None, ) -> Dict: with sentry_sdk.start_span( op="discover.discover", description="facets.filter_transform" ) as span: span.set_data("query", filter_query) snuba_filter = discover.get_filter(filter_query, params) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = discover.resolve_discover_aliases(snuba_filter) translated_aggregate_column = discover.resolve_discover_column(aggregate_column) # Aggregate (avg) and count of all transactions for this query transaction_aggregate = tag_data["aggregate"] # Dynamically sample so at least 50000 transactions are selected sample_start_count = 50000 transaction_count = tag_data["count"] sampling_enabled = transaction_count > sample_start_count # log-e growth starting at 50,000 target_sample = max( sample_start_count * (math.log(transaction_count) - (math.log(sample_start_count) - 1)), transaction_count, ) dynamic_sample_rate = 0 if transaction_count <= 0 else (target_sample / transaction_count) sample_rate = min(max(dynamic_sample_rate, 0), 1) if sampling_enabled else None frequency_sample_rate = sample_rate if sample_rate else 1 # Exclude tags that have high cardinality are are generally unrelated to performance excluded_tags = [ "tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span", "project", "browser", "celery_task_id", "url"], ] with sentry_sdk.start_span(op="discover.discover", description="facets.aggregate_tags"): span.set_data("sample_rate", sample_rate) span.set_data("target_sample", target_sample) conditions = snuba_filter.conditions aggregate_comparison = transaction_aggregate * 1.005 if transaction_aggregate else 0 having = [excluded_tags] having.append(["aggregate", ">", aggregate_comparison]) resolved_orderby = [] if orderby is None else orderby snuba_filter.conditions.append([translated_aggregate_column, "IS NOT NULL", None]) tag_selected_columns = [ [ "sum", [ "minus", [ translated_aggregate_column, str(transaction_aggregate), ], ], "sumdelta", ], ["count", [], "count"], [ "divide", [ [ "divide", [["count", []], frequency_sample_rate], ], transaction_count, ], "frequency", ], ["divide", ["aggregate", transaction_aggregate], "comparison"], ["avg", [translated_aggregate_column], "aggregate"], ] results = discover.raw_query( selected_columns=tag_selected_columns, conditions=conditions, start=snuba_filter.start, end=snuba_filter.end, filter_keys=snuba_filter.filter_keys, orderby=resolved_orderby + ["tags_key"], groupby=["tags_key", "tags_value"], having=having, dataset=Dataset.Discover, referrer=f"{referrer}.tag_values".format(referrer, "tag_values"), sample=sample_rate, turbo=sample_rate is not None, limitby=[1, "tags_key"], limit=limit, offset=offset, ) results["meta"] = discover.transform_meta(results, {}) return results
def reprocess_event(project_id, event_id, start_time): from sentry.tasks.store import preprocess_event_from_reprocessing from sentry.ingest.ingest_consumer import CACHE_TIMEOUT # Take unprocessed data from old event and save it as unprocessed data # under a new event ID. The second step happens in pre-process. We could # save the "original event ID" instead and get away with writing less to # nodestore, but doing it this way makes the logic slightly simpler. node_id = _generate_unprocessed_event_node_id(project_id=project_id, event_id=event_id) with sentry_sdk.start_span(op="reprocess_events.nodestore.get"): data = nodestore.get(node_id) with sentry_sdk.start_span(op="reprocess_events.eventstore.get"): event = eventstore.get_event_by_id(project_id, event_id) if event is None: logger.error("reprocessing2.event.not_found", extra={ "project_id": project_id, "event_id": event_id }) return if data is None: logger.error( "reprocessing2.reprocessing_nodestore.not_found", extra={ "project_id": project_id, "event_id": event_id }, ) # We have no real data for reprocessing. We assume this event goes # straight to save_event, and hope that the event data can be # reingested like that. It's better than data loss. # # XXX: Ideally we would run a "save-lite" for this that only updates # the group ID in-place. Like a snuba merge message. data = dict(event.data) # Step 1: Fix up the event payload for reprocessing and put it in event # cache/event_processing_store set_path(data, "contexts", "reprocessing", "original_issue_id", value=event.group_id) cache_key = event_processing_store.store(data) # Step 2: Copy attachments into attachment cache queryset = models.EventAttachment.objects.filter( project_id=project_id, event_id=event_id).select_related("file") attachment_objects = [] for attachment_id, attachment in enumerate(queryset): with sentry_sdk.start_span( op="reprocess_event._copy_attachment_into_cache") as span: span.set_data("attachment_id", attachment.id) attachment_objects.append( _copy_attachment_into_cache( attachment_id=attachment_id, attachment=attachment, cache_key=cache_key, cache_timeout=CACHE_TIMEOUT, )) if attachment_objects: with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"): attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) preprocess_event_from_reprocessing(cache_key=cache_key, start_time=start_time, event_id=event_id)
def _do_symbolicate_event(cache_key, start_time, event_id, symbolicate_task, data=None): from sentry.lang.native.processing import get_symbolication_function if data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "symbolicate"}, skip_internal=False ) error_logger.error("symbolicate.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_project(project_id) event_id = data["event_id"] symbolication_function = get_symbolication_function(data) has_changed = False from_reprocessing = symbolicate_task is symbolicate_event_from_reprocessing try: with sentry_sdk.start_span(op="tasks.store.symbolicate_event.symbolication") as span: span.set_data("symbolicaton_function", symbolication_function.__name__) with metrics.timer("tasks.store.symbolicate_event.symbolication"): symbolicated_data = symbolication_function(data) span.set_data("symbolicated_data", bool(symbolicated_data)) if symbolicated_data: data = symbolicated_data has_changed = True except RetrySymbolication as e: if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT: error_logger.warning( "symbolicate.slow", extra={"project_id": project_id, "event_id": event_id} ) if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) error_logger.exception( "symbolicate.failed.infinite_retry", extra={"project_id": project_id, "event_id": event_id}, ) data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True else: # Requeue the task in the "sleep" queue retry_symbolicate_event.apply_async( args=(), kwargs={ "symbolicate_task_name": symbolicate_task.__name__, "task_kwargs": { "cache_key": cache_key, "event_id": event_id, "start_time": start_time, }, }, countdown=e.retry_after, ) return except Exception: error_logger.exception("tasks.store.symbolicate_event.symbolication") data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: cache_key = event_processing_store.store(data) process_task = process_event_from_reprocessing if from_reprocessing else process_event _do_process_event( cache_key=cache_key, start_time=start_time, event_id=event_id, process_task=process_task, data=data, data_has_changed=has_changed, from_symbolicate=True, )
def get(self, request, organization): if not self.has_feature(organization, request): return Response(status=404) try: params = self.get_snuba_params(request, organization) except NoProjects: return Response([]) with sentry_sdk.start_span(op="discover.endpoint", description="trend_dates"): middle_date = request.GET.get("middle") if middle_date: try: middle = parse_datetime_string(middle_date) except InvalidQuery: raise ParseError( detail=f"{middle_date} is not a valid date format") if middle <= params["start"] or middle >= params["end"]: raise ParseError( detail= "The middle date should be within the duration of the query" ) else: middle = params["start"] + timedelta( seconds=(params["end"] - params["start"]).total_seconds() * 0.5) middle = datetime.strftime(middle, DateArg.date_format) trend_type = request.GET.get("trendType", REGRESSION) if trend_type not in TREND_TYPES: raise ParseError( detail=f"{trend_type} is not a supported trend type") params["aliases"] = self.get_function_aliases(trend_type) trend_function = request.GET.get("trendFunction", "p50()") function, columns, alias = parse_function(trend_function) if len(columns) == 0: # Default to duration column = "transaction.duration" else: column = columns[0] trend_columns = self.get_trend_columns(function, column, middle) selected_columns = self.get_field_list(organization, request) orderby = self.get_orderby(request) query = request.GET.get("query") def data_fn(offset, limit): return discover.query( selected_columns=selected_columns + trend_columns, query=query, params=params, orderby=orderby, offset=offset, limit=limit, referrer="api.trends.get-percentage-change", auto_fields=True, auto_aggregations=True, use_aggregate_conditions=True, ) with self.handle_query_errors(): return self.paginate( request=request, paginator=GenericOffsetPaginator(data_fn=data_fn), on_results=self.build_result_handler(request, organization, params, trend_function, selected_columns, orderby, query), default_per_page=5, max_per_page=5, )
def _post_by_project(self, request, full_config_requested): project_ids = set(request.relay_request_data.get("projects") or ()) with start_span(op="relay_fetch_projects"): if project_ids: with metrics.timer( "relay_project_configs.fetching_projects.duration"): projects = { p.id: p for p in Project.objects.get_many_from_cache( project_ids) } else: projects = {} with start_span(op="relay_fetch_orgs"): # Preload all organizations and their options to prevent repeated # database access when computing the project configuration. org_ids = { project.organization_id for project in projects.values() } if org_ids: with metrics.timer( "relay_project_configs.fetching_orgs.duration"): orgs = Organization.objects.get_many_from_cache(org_ids) orgs = { o.id: o for o in orgs if request.relay.has_org_access(o) } else: orgs = {} with metrics.timer( "relay_project_configs.fetching_org_options.duration"): for org_id in orgs.keys(): OrganizationOption.objects.get_all_values(org_id) with start_span(op="relay_fetch_keys"): project_keys = {} for key in ProjectKey.objects.filter(project_id__in=project_ids): project_keys.setdefault(key.project_id, []).append(key) metrics.timing("relay_project_configs.projects_requested", len(project_ids)) metrics.timing("relay_project_configs.projects_fetched", len(projects)) metrics.timing("relay_project_configs.orgs_fetched", len(orgs)) configs = {} for project_id in project_ids: configs[str(project_id)] = {"disabled": True} project = projects.get(int(project_id)) if project is None: continue organization = orgs.get(project.organization_id) if organization is None: continue # Prevent organization from being fetched again in quotas. project.set_cached_field_value("organization", organization) with start_span(op="get_config"): with metrics.timer( "relay_project_configs.get_config.duration"): project_config = config.get_project_config( project, full_config=full_config_requested, project_keys=project_keys.get(project.id) or [], ) configs[str(project_id)] = project_config.to_dict() if full_config_requested: projectconfig_cache.set_many(configs) return Response({"configs": configs}, status=200)