def _resolve_failure_count( self, _: Mapping[str, Union[str, Column, SelectType, int, float]], alias: Optional[str] = None, ) -> SelectType: statuses = [ indexer.resolve(status) for status in constants.NON_FAILURE_STATUS ] return self._resolve_count_if( Function( "equals", [ Column("metric_id"), self.resolve_metric("transaction.duration"), ], ), Function( "notIn", [ self.builder.column("transaction.status"), list(status for status in statuses if status is not None), ], ), alias, )
def _snql_on_session_status_factory(session_status, metric_ids, alias=None): return Function( aggregate, [ Column("value"), Function( "and", [ Function( "equals", [ Column( f"tags[{resolve_weak('session.status')}]"), resolve_weak(session_status), ], ), Function("in", [Column("metric_id"), list(metric_ids)]), ], ), ], alias, )
def test_sub_query(self) -> None: inner_query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count") ]).set_groupby([Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ])) query = (Query("discover", inner_query).set_select( [Function("avg", [Column("count")], "avg_count")]).set_orderby([ OrderBy(Function("avg", [Column("count")], "avg_count"), Direction.ASC) ]).set_limit(1000)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["data"] == [{"avg_count": 1.0}]
def _resolve_user_misery_function( self, args: Mapping[str, Union[str, Column, SelectType, int, float]], alias: Optional[str] = None, ) -> SelectType: return Function( "divide", [ Function( "plus", [ self.builder.resolve_function("count_miserable(user)"), constants.MISERY_ALPHA, ], ), Function( "plus", [ Function( "nullIf", [self.builder.resolve_function("count_unique(user)"), 0] ), constants.MISERY_ALPHA + constants.MISERY_BETA, ], ), ], alias, )
def test_counter_sum_aggregation_on_session_status(self): for status, func in [ ("init", init_sessions), ("crashed", crashed_sessions), ("errored_preaggr", errored_preaggr_sessions), ]: assert func(self.metric_ids, alias=status) == Function( "sumIf", [ Column("value"), Function( "and", [ Function( "equals", [ Column( f"tags[{resolve_weak('session.status')}]" ), resolve_weak(status), ], ), Function( "in", [Column("metric_id"), list(self.metric_ids)]), ], ), ], status, )
def limiting_conditions(self) -> Optional[List[Condition]]: if not self.initialized or not self._groups: # First query may run without limiting conditions # When there are no groups there is nothing to limit return None group_columns = [ col for col in self._groupby if col not in self.skip_columns ] if not group_columns: return [] # Create conditions from the groups in group by group_values = [ Function("tuple", [row[column.name] for column in group_columns]) for row in self._groups ] return [ # E.g. (release, environment) IN [(1, 2), (3, 4), ...] Condition(Function("tuple", group_columns), Op.IN, group_values) ] + [ # These conditions are redundant but might lead to better query performance # Eg. [release IN [1, 3]], [environment IN [2, 4]] Condition(column, Op.IN, [row[column.name] for row in self._groups]) for column in group_columns ]
def expected_query(match, select, extra_groupby, metric_name): function, column, alias = select return Query( dataset="metrics", match=Entity(match), select=[ Function( OP_TO_SNUBA_FUNCTION[match][alias], [ Column("value"), Function( "equals", [Column("metric_id"), resolve_weak(metric_name)]), ], alias=f"{alias}({metric_name})", ) ], groupby=[Column("tags[8]"), Column("tags[2]")] + extra_groupby, where=[ Condition(Column("org_id"), Op.EQ, 1), Condition(Column("project_id"), Op.IN, [1]), Condition(Column("timestamp"), Op.GTE, datetime(2021, 5, 28, 0, tzinfo=pytz.utc)), Condition(Column("timestamp"), Op.LT, datetime(2021, 8, 26, 0, tzinfo=pytz.utc)), Condition(Column("tags[6]"), Op.IN, [10]), Condition(Column("metric_id"), Op.IN, [resolve_weak(metric_name)]), ], limit=Limit(MAX_POINTS), offset=Offset(0), granularity=Granularity(query_definition.rollup), )
def test_set_uniq_aggregation_on_session_status(self): for status, func in [ ("init", all_users), ("crashed", crashed_users), ]: assert func(self.metric_ids, alias=status) == Function( "uniqIf", [ Column("value"), Function( "and", [ Function( "equals", [ Column( f"tags[{resolve_weak('session.status')}]" ), resolve_weak(status), ], ), Function( "in", [Column("metric_id"), list(self.metric_ids)]), ], ), ], status, )
def percentage(arg1_snql, arg2_snql, metric_ids, alias=None): return Function( "multiply", [ 100, Function("minus", [1, Function("divide", [arg1_snql, arg2_snql])]), ], alias, )
def test_percentage_in_snql(self): alias = "foo.percentage" init_session_snql = init_sessions(self.metric_ids, "init_sessions") crashed_session_snql = crashed_sessions(self.metric_ids, "crashed_sessions") assert percentage( crashed_session_snql, init_session_snql, alias=alias) == Function("minus", [ 1, Function("divide", [crashed_session_snql, init_session_snql]) ], alias)
def get_column_for_status(function_name: str, prefix: str, status: str) -> Function: return Function( f"{function_name}If", [ Column("value"), Function( "equals", [Column(tag_key_session_status), indexer.resolve(status)], ), ], alias=f"{prefix}_{status}", )
def __build_conditional_aggregate_for_metric( self, entity: MetricEntity) -> Function: snuba_function = OP_TO_SNUBA_FUNCTION[entity][self.op] return Function( snuba_function, [ Column("value"), Function("equals", [Column("metric_id"), resolve_weak(self.metric_name)]), ], alias=f"{self.op}({self.metric_name})", )
def resolve_team_key_transaction_alias( builder: QueryBuilder, resolve_metric_index: bool = False ) -> SelectType: org_id = builder.params.get("organization_id") project_ids = builder.params.get("project_id") team_ids = builder.params.get("team_id") if org_id is None or team_ids is None or project_ids is None: raise TypeError("Team key transactions parameters cannot be None") team_key_transactions = list( TeamKeyTransaction.objects.filter( organization_id=org_id, project_team__in=ProjectTeam.objects.filter( project_id__in=project_ids, team_id__in=team_ids ), ) .order_by("transaction", "project_team__project_id") .values_list("project_team__project_id", "transaction") .distinct("transaction", "project_team__project_id")[ : fields.MAX_QUERYABLE_TEAM_KEY_TRANSACTIONS ] ) count = len(team_key_transactions) if resolve_metric_index: team_key_transactions = [ (project, indexer.resolve(transaction)) for project, transaction in team_key_transactions ] # NOTE: this raw count is not 100% accurate because if it exceeds # `MAX_QUERYABLE_TEAM_KEY_TRANSACTIONS`, it will not be reflected sentry_sdk.set_tag("team_key_txns.count", count) sentry_sdk.set_tag( "team_key_txns.count.grouped", format_grouped_length(count, [10, 100, 250, 500]) ) if count == 0: return Function("toInt8", [0], constants.TEAM_KEY_TRANSACTION_ALIAS) return Function( "in", [ (builder.column("project_id"), builder.column("transaction")), team_key_transactions, ], constants.TEAM_KEY_TRANSACTION_ALIAS, )
def sessions_errored_set(metric_ids, alias=None): return Function( "uniqIf", [ Column("value"), Function( "in", [ Column("metric_id"), list(metric_ids), ], ), ], alias, )
def expected_query(match, select, extra_groupby): function, column, alias = select return Query( dataset="metrics", match=Entity(match), select=[Function(function, [Column(column)], alias)], groupby=[ Column("metric_id"), Column("tags[8]"), Column("tags[2]") ] + extra_groupby, where=[ Condition(Column("org_id"), Op.EQ, 1), Condition(Column("project_id"), Op.IN, [1]), Condition(Column("metric_id"), Op.IN, [9, 11, 7]), Condition(Column("timestamp"), Op.GTE, datetime(2021, 5, 28, 0, tzinfo=pytz.utc)), Condition(Column("timestamp"), Op.LT, datetime(2021, 8, 26, 0, tzinfo=pytz.utc)), Condition(Column("tags[6]"), Op.IN, [10]), ], limit=Limit(MAX_POINTS), offset=Offset(0), granularity=Granularity(query_definition.rollup), )
def _translate_conditions(org_id: int, input_: Any) -> Any: if isinstance(input_, Column): # The only filterable tag keys are release and environment. assert input_.name in ("release", "environment") # It greatly simplifies code if we just assume that they exist. # Alternative would be: # * if tag key or value does not exist in AND-clause, return no data # * if tag key or value does not exist in OR-clause, remove condition return Column(resolve_tag_key(input_.name)) if isinstance(input_, str): # Assuming this is the right-hand side, we need to fetch a tag value. # It's OK if the tag value resolves to None, the snuba query will then # return no results, as is intended behavior return indexer.resolve(input_) if isinstance(input_, Function): return Function(function=input_.function, parameters=_translate_conditions( org_id, input_.parameters)) if isinstance(input_, Condition): return Condition( lhs=_translate_conditions(org_id, input_.lhs), op=input_.op, rhs=_translate_conditions(org_id, input_.rhs), ) if isinstance(input_, (int, float)): return input_ assert isinstance(input_, (tuple, list)), input_ return [_translate_conditions(org_id, item) for item in input_]
def resolve_tags(input_: Any) -> Any: """Translate tags in snuba condition This assumes that all strings are either tag names or tag values, so do not pass Column("metric_id") or Column("project_id") into this function. """ if isinstance(input_, list): return [resolve_tags(item) for item in input_] if isinstance(input_, Function): if input_.function == "ifNull": # This was wrapped automatically by QueryBuilder, remove wrapper return resolve_tags(input_.parameters[0]) return Function( function=input_.function, parameters=input_.parameters and [resolve_tags(item) for item in input_.parameters], ) if isinstance(input_, Condition): return Condition(lhs=resolve_tags(input_.lhs), op=input_.op, rhs=resolve_tags(input_.rhs)) if isinstance(input_, BooleanCondition): return input_.__class__(conditions=[resolve_tags(item) for item in input_.conditions]) if isinstance(input_, Column): # HACK: Some tags already take the form "tags[...]" in discover, take that into account: if input_.subscriptable == "tags": name = input_.key else: name = input_.name return Column(name=resolve_tag_key(name)) if isinstance(input_, str): return resolve_weak(input_) return input_
def test_set_sum_aggregation_for_errored_sessions(self): alias = "whatever" assert sessions_errored_set(self.metric_ids, alias) == Function( "uniqIf", [ Column("value"), Function( "in", [ Column("metric_id"), list(self.metric_ids), ], ), ], alias, )
def test_join_query(self) -> None: ev = Entity("events", "ev") gm = Entity("groupedmessage", "gm") join = Join([Relationship(ev, "grouped", gm)]) query = (Query("discover", join).set_select([ Column("group_id", ev), Column("status", gm), Function("avg", [Column("retention_days", ev)], "avg"), ]).set_groupby([Column("group_id", ev), Column("status", gm)]).set_where([ Condition(Column("project_id", ev), Op.EQ, self.project_id), Condition(Column("project_id", gm), Op.EQ, self.project_id), Condition(Column("timestamp", ev), Op.GTE, self.base_time), Condition(Column("timestamp", ev), Op.LT, self.next_time), ])) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200 assert data["data"] == []
def _to_column( query_func: SessionsQueryFunction, column_condition: SelectableExpression = 1) -> SelectableExpression: """ Converts query a function into an expression that can be directly plugged into anywhere columns are used (like the select argument of a Query) """ parameters = (Column("value"), column_condition) # distribution columns if query_func in _DURATION_PERCENTILES: return Function( alias="percentiles", function="quantilesIf(0.5,0.75,0.9,0.95,0.99)", parameters=parameters, ) if query_func == "avg(session.duration)": return Function( alias="avg", function="avgIf", parameters=parameters, ) if query_func == "max(session.duration)": return Function( alias="max", function="maxIf", parameters=parameters, ) # counters if query_func == "sum(session)": return Function( alias="sum", function="sumIf", parameters=parameters, ) # sets if query_func == "count_unique(user)": return Function( alias="count_unique", function="uniqIf", parameters=parameters, ) raise ValueError("Unmapped metrics column", query_func)
def _resolve_web_vital_function( self, args: Mapping[str, Union[str, Column, SelectType, int, float]], alias: str, ) -> SelectType: column = args["column"] metric_id = args["metric_id"] quality = args["quality"].lower() if column not in [ "measurements.lcp", "measurements.fcp", "measurements.fp", "measurements.fid", "measurements.cls", ]: raise InvalidSearchQuery( "count_web_vitals only supports measurements") measurement_rating = self.builder.resolve_column("measurement_rating") quality_id = indexer.resolve(quality) if quality_id is None: return Function( # This matches the type from doing `select toTypeName(count()) ...` from clickhouse "toUInt64", [0], alias, ) return Function( "countIf", [ Column("value"), Function( "and", [ Function("equals", [measurement_rating, quality_id]), Function("equals", [Column("metric_id"), metric_id]), ], ), ], alias, )
def _resolve_count_miserable_function( self, args: Mapping[str, Union[str, Column, SelectType, int, float]], alias: Optional[str] = None, ) -> SelectType: metric_true = indexer.resolve(constants.METRIC_TRUE_TAG_VALUE) # Nobody is miserable, we can return 0 if metric_true is None: return Function( "toUInt64", [0], alias, ) return Function( "uniqIf", [ Column("value"), Function( "and", [ Function( "equals", [ Column("metric_id"), args["metric_id"], ], ), Function( "equals", [ self.builder.column( constants.METRIC_MISERABLE_TAG_KEY), metric_true ], ), ], ), ], alias, )
def _resolve_count_if( self, metric_condition: Function, condition: Function, alias: Optional[str] = None, ) -> SelectType: return Function( "countIf", [ Column("value"), Function( "and", [ metric_condition, condition, ], ), ], alias, )
def _resolve_percentile( self, args: Mapping[str, Union[str, Column, SelectType, int, float]], alias: str, fixed_percentile: float, ) -> SelectType: return Function( "arrayElement", [ Function( f"quantilesIf({fixed_percentile})", [ Column("value"), Function("equals", [Column("metric_id"), args["metric_id"]]), ], ), 1, ], alias, )
def test_invalid_time_conditions(self) -> None: query = (Query("events", Entity("events")).set_select( [Function("count", [], "count")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.next_time), Condition(Column("timestamp"), Op.LT, self.base_time), ])) response = self.post("/events/snql", data=query.snuba()) resp = json.loads(response.data) assert response.status_code == 400, resp
def test_escape_edge_cases(self) -> None: query = (Query("events", Entity("events")).set_select( [Function("count", [], "times_seen")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), Condition(Column("environment"), Op.EQ, "\\' \n \\n \\"), ])) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data
def test_tags_in_groupby(self) -> None: query = (Query("events", Entity("events")).set_select([ Function("count", [], "times_seen"), Function("min", [Column("timestamp")], "first_seen"), Function("max", [Column("timestamp")], "last_seen"), ]).set_groupby([Column("tags[k8s-app]")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), Condition(Column("tags[k8s-app]"), Op.NEQ, ""), Condition(Column("type"), Op.NEQ, "transaction"), ]).set_orderby([ OrderBy( Function("max", [Column("timestamp")], "last_seen"), Direction.DESC, ) ]).set_limit(1000)) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data
def _resolve_apdex_function( self, _: Mapping[str, Union[str, Column, SelectType, int, float]], alias: Optional[str] = None, ) -> SelectType: metric_true = indexer.resolve(constants.METRIC_TRUE_TAG_VALUE) # Nothing is satisfied or tolerated, the score must be 0 if metric_true is None: return Function( "toUInt64", [0], alias, ) satisfied = Function("equals", [ self.builder.column(constants.METRIC_SATISFIED_TAG_KEY), metric_true ]) tolerable = Function("equals", [ self.builder.column(constants.METRIC_TOLERATED_TAG_KEY), metric_true ]) metric_condition = Function( "equals", [Column("metric_id"), self.resolve_metric("transaction.duration")]) return Function( "divide", [ Function( "plus", [ self._resolve_count_if(metric_condition, satisfied), Function( "divide", [ self._resolve_count_if(metric_condition, tolerable), 2 ], ), ], ), Function("countIf", [metric_condition]), ], alias, )
def test_arrayjoin(self) -> None: query = (Query("events", Entity("events")).set_select([ Function("count", [], "times_seen"), Function("min", [Column("timestamp")], "first_seen"), Function("max", [Column("timestamp")], "last_seen"), ]).set_groupby([Column("exception_frames.filename")]).set_array_join( [Column("exception_frames.filename")]).set_where([ Condition(Column("exception_frames.filename"), Op.LIKE, "%.java"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ]).set_orderby([ OrderBy( Function("max", [Column("timestamp")], "last_seen"), Direction.DESC, ) ]).set_limit(1000)) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert len(data["data"]) == 6
def test_simple_query(self) -> None: query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count")]).set_groupby( [Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ]).set_orderby([ OrderBy(Function("count", [], "count"), Direction.ASC) ]).set_limit(1000).set_consistent(True).set_debug(True)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["stats"]["consistent"] assert data["data"] == [{ "count": 1, "tags[custom_tag]": "custom_value", "project_id": self.project_id, }]