def _create_in_snuba(subscription): conditions = resolve_discover_aliases(get_filter(subscription.query))[0].conditions try: environment = subscription.environments.all()[:1].get() except Environment.DoesNotExist: environment = None if environment: conditions.append(["environment", "=", environment.name]) conditions = apply_dataset_conditions(QueryDatasets(subscription.dataset), conditions) response = _snuba_pool.urlopen( "POST", "/%s/subscriptions" % (subscription.dataset,), body=json.dumps( { "project_id": subscription.project_id, "dataset": subscription.dataset, # We only care about conditions here. Filter keys only matter for # filtering to project and groups. Projects are handled with an # explicit param, and groups can't be queried here. "conditions": conditions, "aggregations": [ query_aggregation_to_snuba[QueryAggregations(subscription.aggregation)] ], "time_window": subscription.time_window, "resolution": subscription.resolution, } ), ) if response.status != 202: raise SnubaError("HTTP %s response from Snuba!" % response.status) return json.loads(response.data)["subscription_id"]
def _create_in_snuba(project, dataset, query, aggregation, time_window, resolution, environments): conditions = resolve_discover_aliases( {"conditions": get_filter(query).conditions})[0]["conditions"] if environments: conditions.append( ["environment", "IN", [env.name for env in environments]]) response = _snuba_pool.urlopen( "POST", "/%s/subscriptions" % (dataset.value, ), body=json.dumps({ "project_id": project.id, "dataset": dataset.value, # We only care about conditions here. Filter keys only matter for # filtering to project and groups. Projects are handled with an # explicit param, and groups can't be queried here. "conditions": conditions, "aggregations": [query_aggregation_to_snuba[aggregation]], "time_window": int(time_window.total_seconds()), "resolution": int(resolution.total_seconds()), }), ) if response.status != 202: raise SnubaError("HTTP %s response from Snuba!" % response.status) return json.loads(response.data)["subscription_id"]
def build_incident_query_params(incident, start=None, end=None, windowed_stats=False): params = {} params["start"], params["end"] = calculate_incident_time_range( incident, start, end, windowed_stats=windowed_stats) group_ids = list( IncidentGroup.objects.filter(incident=incident).values_list("group_id", flat=True)) if group_ids: params["group_ids"] = group_ids project_ids = list( IncidentProject.objects.filter(incident=incident).values_list( "project_id", flat=True)) if project_ids: params["project_id"] = project_ids snuba_filter = get_filter(incident.alert_rule.snuba_query.query, params) conditions = resolve_discover_aliases(snuba_filter)[0].conditions if incident.alert_rule: conditions = apply_dataset_conditions( QueryDatasets(incident.alert_rule.snuba_query.dataset), conditions) return { "start": snuba_filter.start, "end": snuba_filter.end, "conditions": conditions, "filter_keys": snuba_filter.filter_keys, "having": [], }
def _create_in_snuba(subscription): snuba_query = subscription.snuba_query snuba_filter = get_filter(snuba_query.query) snuba_filter.update_with( resolve_field_list([snuba_query.aggregate], snuba_filter, auto_fields=False)) snuba_filter = resolve_discover_aliases(snuba_filter)[0] if snuba_query.environment: snuba_filter.conditions.append( ["environment", "=", snuba_query.environment.name]) conditions = apply_dataset_conditions(QueryDatasets(snuba_query.dataset), snuba_filter.conditions) response = _snuba_pool.urlopen( "POST", "/%s/subscriptions" % (snuba_query.dataset, ), body=json.dumps({ "project_id": subscription.project_id, "dataset": snuba_query.dataset, "conditions": conditions, "aggregations": snuba_filter.aggregations, "time_window": snuba_query.time_window, "resolution": snuba_query.resolution, }), ) if response.status != 202: raise SnubaError("HTTP %s response from Snuba!" % response.status) return json.loads(response.data)["subscription_id"]
def _create_in_snuba(subscription): conditions = resolve_discover_aliases( {"conditions": get_filter(subscription.query).conditions})[0]["conditions"] environments = list(subscription.environments.all()) if environments: conditions.append( ["environment", "IN", [env.name for env in environments]]) response = _snuba_pool.urlopen( "POST", "/%s/subscriptions" % (subscription.dataset, ), body=json.dumps({ "project_id": subscription.project_id, "dataset": subscription.dataset, # We only care about conditions here. Filter keys only matter for # filtering to project and groups. Projects are handled with an # explicit param, and groups can't be queried here. "conditions": conditions, "aggregations": [ query_aggregation_to_snuba[QueryAggregations( subscription.aggregation)] ], "time_window": subscription.time_window, "resolution": subscription.resolution, }), ) if response.status != 202: raise SnubaError("HTTP %s response from Snuba!" % response.status) return json.loads(response.data)["subscription_id"]
def build_snuba_filter(dataset, query, aggregate, environment, params=None): snuba_filter = get_filter(query, params=params) snuba_filter.update_with( resolve_field_list([aggregate], snuba_filter, auto_fields=False)) snuba_filter = resolve_discover_aliases(snuba_filter)[0] if environment: snuba_filter.conditions.append(["environment", "=", environment.name]) snuba_filter.conditions = apply_dataset_conditions(dataset, snuba_filter.conditions) return snuba_filter
def query_tag_data( params: Mapping[str, str], referrer: str, filter_query: Optional[str] = None, aggregate_column: Optional[str] = None, ) -> Optional[Dict]: """ Fetch general data about all the transactions with this transaction name to feed into the facet query :return: Returns the row with aggregate and count if the query was successful Returns None if query was not successful which causes the endpoint to return early """ with sentry_sdk.start_span(op="discover.discover", description="facets.filter_transform") as span: span.set_data("query", filter_query) snuba_filter = get_filter(filter_query, params) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = discover.resolve_discover_aliases( snuba_filter) translated_aggregate_column = discover.resolve_discover_column( aggregate_column) with sentry_sdk.start_span(op="discover.discover", description="facets.frequent_tags"): # Get the average and count to use to filter the next request to facets tag_data = discover.query( selected_columns=[ "count()", f"avg({aggregate_column}) as aggregate", f"max({aggregate_column}) as max", f"min({aggregate_column}) as min", ], conditions=[ [translated_aggregate_column, "IS NOT NULL", None], ], query=filter_query, params=params, orderby=["-count"], referrer=f"{referrer}.all_transactions", limit=1, ) if len(tag_data["data"]) != 1: return None counts = [r["count"] for r in tag_data["data"]] aggregates = [r["aggregate"] for r in tag_data["data"]] # Return early to avoid doing more queries with 0 count transactions or aggregates for columns that don't exist if counts[0] == 0 or aggregates[0] is None: return None if not tag_data["data"][0]: return None return tag_data["data"][0]
def bulk_build_incident_query_params(incidents, start=None, end=None, windowed_stats=False): incident_groups = defaultdict(list) for incident_id, group_id in IncidentGroup.objects.filter( incident__in=incidents).values_list("incident_id", "group_id"): incident_groups[incident_id].append(group_id) incident_projects = defaultdict(list) for incident_id, project_id in IncidentProject.objects.filter( incident__in=incidents).values_list("incident_id", "project_id"): incident_projects[incident_id].append(project_id) attach_foreignkey(incidents, Incident.alert_rule) query_args_list = [] for incident in incidents: params = {} params["start"], params["end"] = calculate_incident_time_range( incident, start, end, windowed_stats=windowed_stats) group_ids = incident_groups[incident.id] if group_ids: params["group_ids"] = group_ids project_ids = incident_projects[incident.id] if project_ids: params["project_id"] = project_ids snuba_filter = get_filter(incident.alert_rule.snuba_query.query, params) conditions = resolve_discover_aliases(snuba_filter)[0].conditions if incident.alert_rule: conditions = apply_dataset_conditions( QueryDatasets(incident.alert_rule.snuba_query.dataset), conditions) snuba_args = { "start": snuba_filter.start, "end": snuba_filter.end, "conditions": conditions, "filter_keys": snuba_filter.filter_keys, "having": [], } query_args_list.append(snuba_args) return query_args_list
def bulk_build_incident_query_params(incidents, start=None, end=None, prewindow=False): incident_groups = defaultdict(list) for incident_id, group_id in IncidentGroup.objects.filter( incident__in=incidents).values_list("incident_id", "group_id"): incident_groups[incident_id].append(group_id) incident_projects = defaultdict(list) for incident_id, project_id in IncidentProject.objects.filter( incident__in=incidents).values_list("incident_id", "project_id"): incident_projects[incident_id].append(project_id) query_args_list = [] for incident in incidents: params = { "start": incident.date_started if start is None else start, "end": incident.current_end_date if end is None else end, } if prewindow: prewindow_time_range = calculate_incident_prewindow( params["start"], params["end"]) params["start"] = params["start"] - prewindow_time_range group_ids = incident_groups[incident.id] if group_ids: params["group_ids"] = group_ids project_ids = incident_projects[incident.id] if project_ids: params["project_id"] = project_ids snuba_filter = get_filter(incident.query, params) snuba_args = { "start": snuba_filter.start, "end": snuba_filter.end, "conditions": snuba_filter.conditions, "filter_keys": snuba_filter.filter_keys, "having": [], } snuba_args["conditions"] = resolve_discover_aliases( snuba_args)[0]["conditions"] query_args_list.append(snuba_args) return query_args_list
def query_facet_performance( params: Mapping[str, str], tag_data: Mapping[str, Any], aggregate_column: Optional[str] = None, filter_query: Optional[str] = None, orderby: Optional[str] = None, referrer: Optional[str] = None, limit: Optional[int] = None, offset: Optional[int] = None, all_tag_keys: Optional[bool] = None, tag_key: Optional[bool] = None, ) -> Dict: with sentry_sdk.start_span(op="discover.discover", description="facets.filter_transform") as span: span.set_data("query", filter_query) snuba_filter = discover.get_filter(filter_query, params) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = discover.resolve_discover_aliases( snuba_filter) translated_aggregate_column = discover.resolve_discover_column( aggregate_column) # Aggregate (avg) and count of all transactions for this query transaction_aggregate = tag_data["aggregate"] # Dynamically sample so at least 50000 transactions are selected sample_start_count = 50000 transaction_count = tag_data["count"] sampling_enabled = transaction_count > sample_start_count # log-e growth starting at 50,000 target_sample = max( sample_start_count * (math.log(transaction_count) - (math.log(sample_start_count) - 1)), transaction_count, ) dynamic_sample_rate = 0 if transaction_count <= 0 else (target_sample / transaction_count) sample_rate = min(max(dynamic_sample_rate, 0), 1) if sampling_enabled else None frequency_sample_rate = sample_rate if sample_rate else 1 # Exclude tags that have high cardinality are are generally unrelated to performance excluded_tags = [ "tags_key", "NOT IN", [ "trace", "trace.ctx", "trace.span", "project", "browser", "celery_task_id", "url" ], ] with sentry_sdk.start_span(op="discover.discover", description="facets.aggregate_tags"): span.set_data("sample_rate", sample_rate) span.set_data("target_sample", target_sample) conditions = snuba_filter.conditions aggregate_comparison = transaction_aggregate * 1.005 if transaction_aggregate else 0 having = [excluded_tags] if not all_tag_keys and not tag_key: having.append(["aggregate", ">", aggregate_comparison]) resolved_orderby = [] if orderby is None else orderby conditions.append([translated_aggregate_column, "IS NOT NULL", None]) if tag_key: conditions.append(["tags_key", "IN", [tag_key]]) tag_key_limit = limit if tag_key else 1 tag_selected_columns = [ [ "divide", [ [ "sum", [ "minus", [ translated_aggregate_column, str(transaction_aggregate), ], ], ], frequency_sample_rate, ], "sumdelta", ], ["count", [], "count"], [ "divide", [ [ "divide", [["count", []], frequency_sample_rate], ], transaction_count, ], "frequency", ], ["divide", ["aggregate", transaction_aggregate], "comparison"], ["avg", [translated_aggregate_column], "aggregate"], ] results = discover.raw_query( selected_columns=tag_selected_columns, conditions=conditions, start=snuba_filter.start, end=snuba_filter.end, filter_keys=snuba_filter.filter_keys, orderby=resolved_orderby + ["tags_key"], groupby=["tags_key", "tags_value"], having=having, dataset=Dataset.Discover, referrer=f"{referrer}.tag_values".format(referrer, "tag_values"), sample=sample_rate, turbo=sample_rate is not None, limitby=[tag_key_limit, "tags_key"], limit=limit, offset=offset, ) results["meta"] = discover.transform_meta(results, {}) return results
def query_top_tags( params: Mapping[str, str], tag_key: str, limit: int, referrer: str, orderby: Optional[List[str]], offset: Optional[int] = None, aggregate_column: Optional[str] = None, filter_query: Optional[str] = None, ) -> Optional[List[Any]]: """ Fetch counts by tag value, finding the top tag values for a tag key by a limit. :return: Returns the row with the value, the aggregate and the count if the query was successful Returns None if query was not successful which causes the endpoint to return early """ with sentry_sdk.start_span( op="discover.discover", description="facets.filter_transform" ) as span: span.set_data("query", filter_query) snuba_filter = get_filter(filter_query, params) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = discover.resolve_discover_aliases(snuba_filter) translated_aggregate_column = discover.resolve_discover_column(aggregate_column) with sentry_sdk.start_span(op="discover.discover", description="facets.top_tags"): if not orderby: orderby = ["-count"] for i, sort in enumerate(orderby): if "frequency" in sort: # Replacing frequency as it's the same underlying data dimension, this way we don't have to modify the existing histogram query. orderby[i] = sort.replace("frequency", "count") if "tags_value" not in orderby: orderby = orderby + ["tags_value"] # Get the average and count to use to filter the next request to facets tag_data = discover.query( selected_columns=[ "count()", f"avg({aggregate_column}) as aggregate", "array_join(tags.value) as tags_value", ], query=filter_query, params=params, orderby=orderby, conditions=[ [translated_aggregate_column, "IS NOT NULL", None], ["tags_key", "IN", [tag_key]], ], functions_acl=["array_join"], referrer=f"{referrer}.top_tags", limit=limit, offset=offset, ) if len(tag_data["data"]) <= 0: return None counts = [r["count"] for r in tag_data["data"]] # Return early to avoid doing more queries with 0 count transactions or aggregates for columns that don't exist if counts[0] == 0: return None if not tag_data["data"]: return None return tag_data["data"]