def data_fn(offset, limit): if use_snql: trend_query.offset = Offset(offset) trend_query.limit = Limit(limit) result = raw_snql_query( trend_query.get_snql_query(), referrer="api.trends.get-percentage-change.wip-snql", ) result = discover.transform_results( result, trend_query.function_alias_map, {}, None ) return result else: return discover.query( selected_columns=selected_columns + trend_columns, query=query, params=params, orderby=orderby, offset=offset, limit=limit, referrer="api.trends.get-percentage-change", auto_fields=True, auto_aggregations=True, use_aggregate_conditions=True, )
def transform_aliases_and_query(**kwargs): """ Convert aliases in selected_columns, groupby, aggregation, conditions, orderby and arrayjoin fields to their internal Snuba format and post the query to Snuba. Convert back translated aliases before returning snuba results. :deprecated: This method is deprecated. You should use sentry.snuba.discover instead. """ arrayjoin_map = {"error": "exception_stacks", "stack": "exception_frames"} translated_columns = {} derived_columns = set() selected_columns = kwargs.get("selected_columns") groupby = kwargs.get("groupby") aggregations = kwargs.get("aggregations") conditions = kwargs.get("conditions") filter_keys = kwargs["filter_keys"] arrayjoin = kwargs.get("arrayjoin") orderby = kwargs.get("orderby") having = kwargs.get("having", []) dataset = Dataset.Events if selected_columns: for (idx, col) in enumerate(selected_columns): if isinstance(col, list): # if list, means there are potentially nested functions and need to # iterate and translate potential columns parse_columns_in_functions(col) selected_columns[idx] = col translated_columns[col[2]] = col[2] derived_columns.add(col[2]) else: name = get_snuba_column_name(col) selected_columns[idx] = name translated_columns[name] = col if groupby: for (idx, col) in enumerate(groupby): if col not in derived_columns: name = get_snuba_column_name(col) else: name = col groupby[idx] = name translated_columns[name] = col for aggregation in aggregations or []: derived_columns.add(aggregation[2]) if isinstance(aggregation[1], six.string_types): aggregation[1] = get_snuba_column_name(aggregation[1]) elif isinstance(aggregation[1], (set, tuple, list)): aggregation[1] = [get_snuba_column_name(col) for col in aggregation[1]] for col in filter_keys.keys(): name = get_snuba_column_name(col) filter_keys[name] = filter_keys.pop(col) if conditions: aliased_conditions = [] for condition in conditions: field = condition[0] if not isinstance(field, (list, tuple)) and field in derived_columns: having.append(condition) else: aliased_conditions.append(condition) kwargs["conditions"] = aliased_conditions if having: kwargs["having"] = having if orderby: orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby] translated_orderby = [] for field_with_order in orderby: field = field_with_order.lstrip("-") translated_orderby.append( u"{}{}".format( "-" if field_with_order.startswith("-") else "", field if field in derived_columns else get_snuba_column_name(field), ) ) kwargs["orderby"] = translated_orderby kwargs["arrayjoin"] = arrayjoin_map.get(arrayjoin, arrayjoin) kwargs["dataset"] = dataset result = dataset_query(**kwargs) return transform_results(result, translated_columns, kwargs)
def query_facet_performance( params: Mapping[str, str], tag_data: Mapping[str, Any], referrer: str, aggregate_column: Optional[str] = None, filter_query: Optional[str] = None, orderby: Optional[str] = None, limit: Optional[int] = None, offset: Optional[int] = None, all_tag_keys: Optional[bool] = None, tag_key: Optional[bool] = None, ) -> Dict: with sentry_sdk.start_span( op="discover.discover", description="facets.filter_transform" ) as span: span.set_data("query", filter_query) snuba_filter = get_filter(filter_query, params) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = discover.resolve_discover_aliases(snuba_filter) translated_aggregate_column = discover.resolve_discover_column(aggregate_column) # Aggregate (avg) and count of all transactions for this query transaction_aggregate = tag_data["aggregate"] # Dynamically sample so at least 50000 transactions are selected sample_start_count = 50000 transaction_count = tag_data["count"] sampling_enabled = transaction_count > sample_start_count # log-e growth starting at 50,000 target_sample = max( sample_start_count * (math.log(transaction_count) - (math.log(sample_start_count) - 1)), transaction_count, ) dynamic_sample_rate = 0 if transaction_count <= 0 else (target_sample / transaction_count) sample_rate = min(max(dynamic_sample_rate, 0), 1) if sampling_enabled else None frequency_sample_rate = sample_rate if sample_rate else 1 # Exclude tags that have high cardinality are are generally unrelated to performance excluded_tags = [ "tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span", "project", "browser", "celery_task_id", "url"], ] with sentry_sdk.start_span(op="discover.discover", description="facets.aggregate_tags"): span.set_data("sample_rate", sample_rate) span.set_data("target_sample", target_sample) conditions = snuba_filter.conditions aggregate_comparison = transaction_aggregate * 1.005 if transaction_aggregate else 0 having = [excluded_tags] if not all_tag_keys and not tag_key: having.append(["aggregate", ">", aggregate_comparison]) resolved_orderby = [] if orderby is None else orderby conditions.append([translated_aggregate_column, "IS NOT NULL", None]) if tag_key: conditions.append(["tags_key", "IN", [tag_key]]) tag_key_limit = limit if tag_key else 1 tag_selected_columns = [ [ "divide", [ ["sum", [["minus", [translated_aggregate_column, transaction_aggregate]]]], frequency_sample_rate, ], "sumdelta", ], ["count", [], "count"], [ "divide", [["divide", [["count", []], frequency_sample_rate]], transaction_count], "frequency", ], ["divide", ["aggregate", transaction_aggregate], "comparison"], ["avg", [translated_aggregate_column], "aggregate"], ] limitby = [tag_key_limit, "tags_key"] if not tag_key else None results = discover.raw_query( selected_columns=tag_selected_columns, conditions=conditions, start=snuba_filter.start, end=snuba_filter.end, filter_keys=snuba_filter.filter_keys, orderby=resolved_orderby + ["tags_key", "tags_value"], groupby=["tags_key", "tags_value"], having=having, dataset=Dataset.Discover, referrer=f"{referrer}.tag_values".format(referrer, "tag_values"), sample=sample_rate, turbo=sample_rate is not None, limitby=limitby, limit=limit, offset=offset, ) results = discover.transform_results(results, {}, translated_columns, snuba_filter) return results
def query_trace_data( trace_id: str, params: Mapping[str, str] ) -> Tuple[Sequence[SnubaTransaction], Sequence[SnubaError]]: transaction_query = discover.prepare_discover_query( selected_columns=[ "id", "transaction.status", "transaction.op", "transaction.duration", "transaction", "timestamp", # project gets the slug, and project.id gets added automatically "project", "trace.span", "trace.parent_span", 'to_other(trace.parent_span, "", 0, 1) AS root', ], # We want to guarantee at least getting the root, and hopefully events near it with timestamp # id is just for consistent results orderby=["-root", "timestamp", "id"], params=params, query=f"event.type:transaction trace:{trace_id}", ) error_query = discover.prepare_discover_query( selected_columns=[ "id", "project", "timestamp", "trace.span", "transaction", "issue", "title", "tags[level]", ], # Don't add timestamp to this orderby as snuba will have to split the time range up and make multiple queries orderby=["id"], params=params, query=f"!event.type:transaction trace:{trace_id}", auto_fields=False, ) snuba_params = [ SnubaQueryParams( dataset=Dataset.Discover, start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, having=snuba_filter.having, orderby=snuba_filter.orderby, limit=MAX_TRACE_SIZE, ) for snuba_filter in [transaction_query.filter, error_query.filter] ] results = bulk_raw_query( snuba_params, referrer="api.trace-view.get-events", ) transformed_results = [ discover.transform_results(result, query.fields["functions"], query.columns, query.filter)[ "data" ] for result, query in zip(results, [transaction_query, error_query]) ] return cast(Sequence[SnubaTransaction], transformed_results[0]), cast( Sequence[SnubaError], transformed_results[1] )
def query( selected_columns, query, params, equations=None, orderby=None, offset=None, limit=50, referrer=None, auto_fields=False, auto_aggregations=False, use_aggregate_conditions=False, conditions=None, extra_snql_condition=None, functions_acl=None, use_snql=False, ): """ """ metrics_compatible = not equations if metrics_compatible: try: metrics_query = MetricsQueryBuilder( params, query=query, selected_columns=selected_columns, equations=[], orderby=orderby, # Auto fields will add things like id back in if enabled auto_fields=False, auto_aggregations=auto_aggregations, use_aggregate_conditions=use_aggregate_conditions, functions_acl=functions_acl, limit=limit, offset=offset, ) # Getting the 0th result for now, will need to consolidate multiple query results later results = metrics_query.run_query(referrer + ".metrics-enhanced") results = discover.transform_results( results, metrics_query.function_alias_map, {}, None ) results = resolve_tags(results, metrics_query) results["meta"]["isMetricsData"] = True return results # raise Invalid Queries since the same thing will happen with discover except InvalidSearchQuery as error: raise error # any remaining errors mean we should try again with discover except IncompatibleMetricsQuery: metrics_compatible = False # Either metrics failed, or this isn't a query we can enhance with metrics if not metrics_compatible: results = discover.query( selected_columns, query, params, equations=equations, orderby=orderby, offset=offset, limit=limit, referrer=referrer, auto_fields=auto_fields, auto_aggregations=auto_aggregations, use_aggregate_conditions=use_aggregate_conditions, conditions=conditions, extra_snql_condition=extra_snql_condition, functions_acl=functions_acl, use_snql=use_snql, ) results["meta"]["isMetricsData"] = False return results return {}
def timeseries_query( selected_columns: Sequence[str], query: str, params: Dict[str, str], rollup: int, referrer: str, zerofill_results: bool = True, comparison_delta: Optional[timedelta] = None, functions_acl: Optional[List[str]] = None, use_snql: Optional[bool] = False, ) -> SnubaTSResult: """ High-level API for doing arbitrary user timeseries queries against events. this API should match that of sentry.snuba.discover.timeseries_query """ metrics_compatible = False equations, columns = categorize_columns(selected_columns) if comparison_delta is None and not equations: metrics_compatible = True if metrics_compatible: try: metrics_query = TimeseriesMetricQueryBuilder( params, rollup, query=query, selected_columns=columns, functions_acl=functions_acl, ) result = metrics_query.run_query(referrer + ".metrics-enhanced") result = discover.transform_results(result, metrics_query.function_alias_map, {}, None) result["data"] = ( discover.zerofill( result["data"], params["start"], params["end"], rollup, "time", ) if zerofill_results else result["data"] ) return SnubaTSResult( {"data": result["data"], "isMetricsData": True}, params["start"], params["end"], rollup, ) # raise Invalid Queries since the same thing will happen with discover except InvalidSearchQuery as error: raise error # any remaining errors mean we should try again with discover except IncompatibleMetricsQuery: metrics_compatible = False # This isn't a query we can enhance with metrics if not metrics_compatible: return discover.timeseries_query( selected_columns, query, params, rollup, referrer, zerofill_results, comparison_delta, functions_acl, use_snql, ) return SnubaTSResult()