def test_query(query: Query, exception: Optional[Exception]) -> None: if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): query.validate() return query.validate()
def test_invalid_subquery() -> None: with pytest.raises( InvalidQueryError, match=re.escape( "inner query is invalid: query must have at least one expression in select" ), ): Query("discover", Query(dataset="discover", match=Entity("events"))).set_select( [Column("event_id"), Column("title")]) with pytest.raises( InvalidQueryError, match=re.escape( "inner query is invalid: query must have at least one expression in select" ), ): Query( "discover", Query( dataset="discover", match=Entity("events"), select=[Column("title"), Column("timestamp")], ), ).set_match(Query(dataset="discover", match=Entity("events"))).set_select([ Function("uniq", [Column("new_event")], "uniq_event"), Column("title") ])
def test_invalid_query() -> None: with pytest.raises(InvalidQueryError, match=re.escape("queries must have a valid dataset")): Query(dataset=1, match=Entity("events")) # type: ignore with pytest.raises(InvalidQueryError, match=re.escape("queries must have a valid dataset")): Query(dataset="", match=Entity("events")) with pytest.raises(InvalidQueryError, match=re.escape("queries must have a valid Entity")): Query(dataset="discover", match="events") # type: ignore with pytest.raises( InvalidConditionError, match=re.escape( "invalid condition: LHS of a condition must be a Column, CurriedFunction or Function, not <class 'snuba_sdk.aliased_expression.AliasedExpression'>" ), ): (Query("discover", Entity("events")).set_select( [AliasedExpression(Column("transaction"), "tn")]).set_where([ Condition(AliasedExpression(Column("project_id"), "pi"), Op.IN, (1, )) ] # type: ignore ))
def test_sub_query(self) -> None: inner_query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count") ]).set_groupby([Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ])) query = (Query("discover", inner_query).set_select( [Function("avg", [Column("count")], "avg_count")]).set_orderby([ OrderBy(Function("avg", [Column("count")], "avg_count"), Direction.ASC) ]).set_limit(1000)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["data"] == [{"avg_count": 1.0}]
def test_simple(self) -> None: query = Query( dataset="sessions", match=Entity("org_sessions"), select=[Column("org_id"), Column("project_id")], groupby=[Column("org_id"), Column("project_id")], where=[ Condition(Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), Condition(Column("started"), Op.LT, datetime.utcnow()), ], granularity=Granularity(3600), ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 2 assert data["data"][0]["org_id"] == self.org_id assert data["data"][0]["project_id"] == self.project_id assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id2
def test_invalid_query() -> None: with pytest.raises(InvalidQuery, match=re.escape("queries must have a valid dataset")): Query(dataset=1, match=Entity("events")) # type: ignore with pytest.raises(InvalidQuery, match=re.escape("queries must have a valid dataset")): Query(dataset="", match=Entity("events")) with pytest.raises(InvalidQuery, match=re.escape("queries must have a valid Entity")): Query(dataset="discover", match="events") # type: ignore
def test_orderby(self) -> None: self.project_id3 = next(self.id_iter) self.org_id2 = next(self.id_iter) self.generate_session_events(self.org_id2, self.project_id3) query = Query( dataset="sessions", match=Entity("org_sessions"), select=[Column("org_id"), Column("project_id")], groupby=[Column("org_id"), Column("project_id")], where=[ Condition(Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), Condition(Column("started"), Op.LT, datetime.utcnow()), ], granularity=Granularity(3600), orderby=[OrderBy(Column("org_id"), Direction.ASC)], ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 3 assert data["data"][0]["org_id"] == self.org_id assert data["data"][0]["project_id"] == self.project_id assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id2 assert data["data"][2]["org_id"] == self.org_id2 assert data["data"][2]["project_id"] == self.project_id3 query = query.set_orderby( [OrderBy(Column("org_id"), Direction.DESC)], ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 3 assert data["data"][0]["org_id"] == self.org_id2 assert data["data"][0]["project_id"] == self.project_id3 assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id assert data["data"][2]["org_id"] == self.org_id assert data["data"][2]["project_id"] == self.project_id2
def _get_full_hierarchical_hashes(group: Group, hash: str) -> Optional[Sequence[str]]: query = ( Query("events", Entity("events")) .set_select( [ Column("hierarchical_hashes"), ] ) .set_where( _get_group_filters(group) + [ Condition( Function( "has", [Column("hierarchical_hashes"), hash], ), Op.EQ, 1, ), ] ) ) data = snuba.raw_snql_query(query, referrer="group_split.get_full_hierarchical_hashes")["data"] if not data: return None return data[0]["hierarchical_hashes"]
def test_translate_query(query: Query, clauses: Sequence[str], extras: Optional[Sequence[Tuple[str, bool]]]) -> None: joined = " ".join(clauses) body: MutableMapping[str, Any] = {"dataset": "discover", "query": joined} if extras: body.update({k: v for k, v in extras}) assert query.snuba() == json.dumps(body)
def test_join_validate_match( conditions: ConditionGroup, entity: Entity, exception: Optional[Exception], ) -> None: other_join_entity = Entity("test_b", "tb", None, SCHEMA) join2_conditions = [ Condition(Column("required1", other_join_entity), Op.IN, [1, 2, 3]), Condition(Column("required2", other_join_entity), Op.EQ, 1), Condition(Column("time", other_join_entity), Op.GTE, BEFORE), Condition(Column("time", other_join_entity), Op.LT, AFTER), *conditions, ] query = Query( dataset="test", match=Join([Relationship(entity, "has", other_join_entity)]), select=[ Column("test1", entity), Column("required1", other_join_entity) ], where=join2_conditions, ) if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): validate_required_columns(query) else: validate_required_columns(query)
def _check_releases_have_health_data( organization_id: int, project_ids: List[int], release_versions: List[str], start: datetime, end: datetime, ) -> Set[str]: """ Returns a set of all release versions that have health data within a given period of time. """ if not release_versions: return set() query = Query( dataset="sessions", match=Entity("sessions"), select=[Column("release")], groupby=[Column("release")], where=[ Condition(Column("started"), Op.GTE, start), Condition(Column("started"), Op.LT, end), Condition(Column("org_id"), Op.EQ, organization_id), Condition(Column("project_id"), Op.IN, project_ids), Condition(Column("release"), Op.IN, release_versions), ], ) data = snuba.raw_snql_query(query, referrer="snuba.sessions.check_releases_have_health_data")[ "data" ] return {row["release"] for row in data}
def test_entity_validate_match( conditions: ConditionGroup, entity: Entity, exception: Optional[Exception], ) -> None: query = Query(dataset="test", match=entity, select=[Column("test1"), Column("required1")]) query = query.set_where(conditions) if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): validate_required_columns(query) else: validate_required_columns(query)
def test_join_query(self) -> None: ev = Entity("events", "ev") gm = Entity("groupedmessage", "gm") join = Join([Relationship(ev, "grouped", gm)]) query = (Query("discover", join).set_select([ Column("group_id", ev), Column("status", gm), Function("avg", [Column("retention_days", ev)], "avg"), ]).set_groupby([Column("group_id", ev), Column("status", gm)]).set_where([ Condition(Column("project_id", ev), Op.EQ, self.project_id), Condition(Column("project_id", gm), Op.EQ, self.project_id), Condition(Column("timestamp", ev), Op.GTE, self.base_time), Condition(Column("timestamp", ev), Op.LT, self.next_time), ])) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200 assert data["data"] == []
def visit(self, query: main.Query) -> QVisited: fields = query.get_fields() returns = {} for field in fields: returns[field] = getattr(self, f"_visit_{field}")(getattr(query, field)) return self._combine(query, returns)
def _combine(self, query: main.Query, returns: Mapping[str, str]) -> str: clause_order = query.get_fields() # These fields are encoded outside of the SQL to_skip = ( "dataset", "consistent", "turbo", "debug", "dry_run", "legacy", "parent_api", "team", "feature", ) separator = "\n" if (self.pretty and not self.is_inner) else " " formatted = separator.join( [returns[c] for c in clause_order if c not in to_skip and returns[c]] ) if self.pretty and not self.is_inner: prefix = "" for skip in to_skip: if returns.get(skip): prefix += f"-- {skip.upper()}: {returns[skip]}\n" formatted = f"{prefix}{formatted}" return formatted
def get_levels_overview(group): query = (Query("events", Entity("events")).set_select([ Column("primary_hash"), Function("max", [Function("length", [Column("hierarchical_hashes")])], "num_levels"), _current_level_expr(group), ]).set_where(_get_group_filters(group)).set_groupby( [Column("primary_hash")])) res = snuba.raw_snql_query( query, referrer="api.group_hashes_levels.get_levels_overview") if not res["data"]: raise NoEvents() if len(res["data"]) > 1: raise MergedIssues() assert len(res["data"]) == 1 fields = res["data"][0] if fields["num_levels"] <= 0: raise NotHierarchical() # TODO: Cache this if it takes too long. This is called from multiple # places, grouping overview and then again in the new-issues endpoint. return LevelsOverview( current_level=fields["current_level"] - 1, only_primary_hash=fields["primary_hash"], num_levels=fields["num_levels"], )
def _get_hash_for_parent_level(group: Group, id: int, levels_overview: LevelsOverview) -> str: # If this is violated, there cannot be a 1:1 mapping between level and hash. assert 0 <= id < levels_overview.current_level # This cache never needs explicit invalidation because during every level # change, the group ID changes. # # No idea if the query is slow, caching just because I can. cache_key = f"group-parent-level-hash:{group.id}:{id}" return_hash: str = cache.get(cache_key) if return_hash is None: query = (Query("events", Entity("events")).set_select([ Function("arrayElement", [Column("hierarchical_hashes"), id + 1], "hash") ]).set_where(_get_group_filters(group)).set_limit(1)) return_hash: str = get_path(snuba.raw_snql_query(query), "data", 0, "hash") # type: ignore cache.set(cache_key, return_hash) assert return_hash return return_hash
def get_snql_query(self) -> Query: return Query( dataset=self.dataset.value, match=Entity(self.dataset.value), select=self.select, where=self.where, groupby=self.groupby, limit=self.limit, )
def test_pretty_print_query( query: Query, clauses: Sequence[str], extras: Optional[Sequence[Tuple[str, bool]]]) -> None: joined = "\n".join(clauses) prefix = "-- DATASET: discover\n" if extras: for key, value in extras: prefix += f"-- {key.upper()}: {value}\n" expected = f"{prefix}{joined}" assert query.print() == expected
def get_snql_query(self) -> Query: return Query( dataset=self.dataset.value, match=Entity(self.dataset.value), select=self.select, where=self.where, having=self.having, groupby=self.groupby, orderby=[OrderBy(self.time_column, Direction.ASC)], granularity=self.granularity, limit=self.limit, )
def test_escape_edge_cases(self) -> None: query = (Query("events", Entity("events")).set_select( [Function("count", [], "times_seen")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), Condition(Column("environment"), Op.EQ, "\\' \n \\n \\"), ])) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data
def build_project_usage_outcomes(start__stop, project): start, stop = start__stop # XXX(epurkhiser): Tsdb used to use day buckets, where the end would # represent a whole day. Snuba queries more accurately thus we must # capture the entire last day end = stop + timedelta(days=1) query = Query( dataset=Dataset.Outcomes.value, match=Entity("outcomes"), select=[ Column("outcome"), Column("category"), Function("sum", [Column("quantity")], "total"), ], where=[ Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, end), Condition(Column("project_id"), Op.EQ, project.id), Condition(Column("org_id"), Op.EQ, project.organization_id), Condition( Column("outcome"), Op.IN, [Outcome.ACCEPTED, Outcome.FILTERED, Outcome.RATE_LIMITED]), Condition( Column("category"), Op.IN, [*DataCategory.error_categories(), DataCategory.TRANSACTION], ), ], groupby=[Column("outcome"), Column("category")], granularity=Granularity(ONE_DAY), ) data = raw_snql_query(query, referrer="reports.outcomes")["data"] return ( # Accepted errors sum(row["total"] for row in data if row["category"] in DataCategory.error_categories() and row["outcome"] == Outcome.ACCEPTED), # Dropped errors sum(row["total"] for row in data if row["category"] in DataCategory.error_categories() and row["outcome"] == Outcome.RATE_LIMITED), # accepted transactions sum(row["total"] for row in data if row["category"] == DataCategory.TRANSACTION and row["outcome"] == Outcome.ACCEPTED), # Dropped transactions sum(row["total"] for row in data if row["category"] == DataCategory.TRANSACTION and row["outcome"] == Outcome.RATE_LIMITED), )
def run_outcomes_query_timeseries(query: QueryDefinition) -> ResultSet: snql_query = Query( dataset=query.dataset.value, match=Entity(query.match), select=query.select_params, groupby=query.group_by + [Column(TS_COL)], where=query.conditions, limit=Limit(10000), offset=Offset(0), granularity=Granularity(query.rollup), ) result_timeseries = raw_snql_query(snql_query, referrer="outcomes.timeseries") return _format_rows(result_timeseries["data"], query)
def get_snql_query(self) -> Query: self.validate_having_clause() return Query( dataset=self.dataset.value, match=Entity(self.dataset.value), select=self.select, where=self.where, having=self.having, groupby=self.groupby, orderby=self.orderby, limit=self.limit, offset=self.offset, )
def test_entity_validate_match(query: Query, exception: Optional[Exception]) -> None: query = query.set_where([ Condition(Column("required1"), Op.IN, [1, 2, 3]), Condition(Column("required2"), Op.EQ, 1), Condition(Column("time"), Op.GTE, BEFORE), Condition(Column("time"), Op.LT, AFTER), ], ) if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): validate_match(query, SEARCHER) else: validate_match(query, SEARCHER)
def test_basic(self) -> None: now = datetime.now() self._insert_event_for_time(now) query = (Query(dataset="events", match=Entity("events")).set_select([ Function("count", [], "count") ]).set_groupby([Column("project_id")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project.id), Condition(Column("timestamp"), Op.GTE, now - timedelta(days=1)), Condition(Column("timestamp"), Op.LT, now + timedelta(days=1)), ])) result = snuba.raw_snql_query(query) assert len(result["data"]) == 1 assert result["data"][0] == {"count": 1, "project_id": self.project.id}
def _get_project_releases_count( organization_id: int, project_ids: Sequence[int], scope: str, stats_period: Optional[str] = None, environments: Optional[Sequence[str]] = None, ) -> int: """ Fetches the total count of releases/project combinations """ if stats_period is None: stats_period = "24h" # Special rule that we support sorting by the last 24h only. if scope.endswith("_24h"): stats_period = "24h" _, stats_start, _ = get_rollup_starts_and_buckets(stats_period) where = [ Condition(Column("started"), Op.GTE, stats_start), Condition(Column("started"), Op.LT, datetime.now()), Condition(Column("project_id"), Op.IN, project_ids), Condition(Column("org_id"), Op.EQ, organization_id), ] if environments is not None: where.append(Condition(Column("environment"), Op.IN, environments)) having = [] # Filter out releases with zero users when sorting by either `users` or `crash_free_users` if scope in ["users", "crash_free_users"]: having.append(Condition(Column("users"), Op.GT, 0)) query = Query( dataset="sessions", match=Entity("sessions"), select=[ Function( "uniqExact", [Column("release"), Column("project_id")], alias="count") ], where=where, having=having, ) data = snuba.raw_snql_query( query, referrer="snuba.sessions.get_project_releases_count")["data"] return data[0]["count"] if data else 0
def get_snql_query(self) -> Query: self.validate_having_clause() return Query( dataset=self.dataset.value, match=Entity(self.dataset.value, sample=self.sample_rate), select=self.columns, array_join=self.array_join, where=self.where, having=self.having, groupby=self.groupby, orderby=self.orderby, limit=self.limit, offset=self.offset, limitby=self.limitby, turbo=self.turbo, )
def test_cache(self): """Minimal test to verify if use_cache works""" results = snuba.raw_snql_query( Query( "events", Entity("events"), select=[Column("event_id")], where=[ Condition(Column("project_id"), Op.EQ, self.project.id), Condition(Column("timestamp"), Op.GTE, timezone.now() - timedelta(days=1)), Condition(Column("timestamp"), Op.LT, timezone.now()), ], limit=Limit(1), ), use_cache=True, ) assert results["data"] == []
def query_p95(interval): start, stop = interval query = Query( dataset=Dataset.Transactions.value, match=Entity("transactions"), select=[ Column("transaction_name"), Function("quantile(0.95)", [Column("duration")], "p95"), ], where=[ Condition(Column("finish_ts"), Op.GTE, start), Condition(Column("finish_ts"), Op.LT, stop + timedelta(days=1)), Condition(Column("transaction_name"), Op.IN, transaction_names), Condition(Column("project_id"), Op.EQ, project.id), ], groupby=[Column("transaction_name")], ) return raw_snql_query(query, referrer="reports.key_transactions.p95")