def column_expr( self, column_name, query: Query, parsing_context: ParsingContext, table_alias: str = "", ): detected_dataset = detect_table(query, self.__events_columns, self.__transactions_columns) if detected_dataset == TRANSACTIONS: if column_name == "time": return self.time_expr("finish_ts", query.get_granularity(), table_alias) if column_name == "type": return "'transaction'" if column_name == "timestamp": return "finish_ts" if column_name == "username": return "user_name" if column_name == "email": return "user_email" if column_name == "transaction": return "transaction_name" if column_name == "message": return "transaction_name" if column_name == "title": return "transaction_name" if column_name == "group_id": # TODO: We return 0 here instead of NULL so conditions like group_id # in (1, 2, 3) will work, since Clickhouse won't run a query like: # SELECT (NULL AS group_id) FROM transactions WHERE group_id IN (1, 2, 3) # When we have the query AST, we should solve this by transforming the # nonsensical conditions instead. return "0" if column_name == "geo_country_code": column_name = "contexts[geo.country_code]" if column_name == "geo_region": column_name = "contexts[geo.region]" if column_name == "geo_city": column_name = "contexts[geo.city]" if self.__events_columns.get(column_name): return "NULL" else: if column_name == "time": return self.time_expr("timestamp", query.get_granularity(), table_alias) if column_name == "release": column_name = "tags[sentry:release]" if column_name == "dist": column_name = "tags[sentry:dist]" if column_name == "user": column_name = "tags[sentry:user]" if self.__transactions_columns.get(column_name): return "NULL" return get_dataset(detected_dataset).column_expr( column_name, query, parsing_context)
def test_full_query(): query = Query( { "selected_columns": ["c1", "c2", "c3"], "conditions": [["c1", "=", "a"]], "arrayjoin": "tags", "having": [["c4", "=", "c"]], "groupby": ["project_id"], "aggregations": [["count()", "", "count"]], "orderby": "event_id", "limitby": (100, "environment"), "sample": 10, "limit": 100, "offset": 50, "totals": True, "granularity": 60, }, TableSource("my_table", ColumnSet([])), ) assert query.get_selected_columns() == ["c1", "c2", "c3"] assert query.get_aggregations() == [["count()", "", "count"]] assert query.get_groupby() == ["project_id"] assert query.get_conditions() == [["c1", "=", "a"]] assert query.get_arrayjoin() == "tags" assert query.get_having() == [["c4", "=", "c"]] assert query.get_orderby() == "event_id" assert query.get_limitby() == (100, "environment") assert query.get_sample() == 10 assert query.get_limit() == 100 assert query.get_offset() == 50 assert query.has_totals() is True assert query.get_granularity() == 60 assert query.get_data_source().format_from() == "my_table"
def test_query_extension_processing( raw_data: dict, expected_conditions: Sequence[Condition], expected_granularity: int, ): state.set_config('max_days', 1) extension = TimeSeriesExtension( default_granularity=60, default_window=datetime.timedelta(days=5), timestamp_column='timestamp', ) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query( {"conditions": []}, TableSource("my_table", ColumnSet([])), ) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions assert query.get_granularity() == expected_granularity
def column_expr(self, column_name, query: Query, parsing_context: ParsingContext, table_alias: str = ""): if column_name in self.__time_group_columns: return self.__time_expr(column_name, query.get_granularity(), table_alias) else: return super().column_expr(column_name, query, parsing_context, table_alias)
def test_edit_query(): query = Query( { "selected_columns": ["c1", "c2", "c3"], "conditions": [["c1", "=", "a"]], "arrayjoin": "tags", "having": [["c4", "=", "c"]], "groupby": ["project_id"], "aggregations": [["count()", "", "count"]], "orderby": "event_id", "limitby": (100, "environment"), "sample": 10, "limit": 100, "offset": 50, "totals": True, }, TableSource("my_table", ColumnSet([])), ) query.set_selected_columns(["c4"]) assert query.get_selected_columns() == ["c4"] query.set_aggregations([["different_agg()", "", "something"]]) assert query.get_aggregations() == [["different_agg()", "", "something"]] query.add_groupby(["more", "more2"]) assert query.get_groupby() == ["project_id", "more", "more2"] query.add_conditions([["c5", "=", "9"]]) assert query.get_conditions() == [ ["c1", "=", "a"], ["c5", "=", "9"], ] query.set_conditions([["c6", "=", "10"]]) assert query.get_conditions() == [ ["c6", "=", "10"], ] query.set_arrayjoin("not_tags") assert query.get_arrayjoin() == "not_tags" query.set_granularity(7200) assert query.get_granularity() == 7200 query.set_prewhere([["pc6", "=", "10"]]) assert query.get_prewhere() == [["pc6", "=", "10"]]
def column_expr( self, column_name, query: Query, parsing_context: ParsingContext, table_alias: str = "", ): # We want to permit functions here, so we need to make sure we're not trying # to look up lists in the dictionary or it will fail with a type error. if isinstance(column_name, str) and column_name in self.__time_group_columns: real_column = self.__time_group_columns[column_name] return self.time_expr(real_column, query.get_granularity(), table_alias) else: return super().column_expr(column_name, query, parsing_context, table_alias)
def test_query_extension_processing( raw_data: dict, expected_conditions: Sequence[Condition], expected_ast_condition: Expression, expected_granularity: int, ): state.set_config("max_days", 1) extension = TimeSeriesExtension( default_granularity=60, default_window=timedelta(days=5), timestamp_column="timestamp", ) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),) request_settings = HTTPRequestSettings() extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions assert query.get_condition_from_ast() == expected_ast_condition assert query.get_granularity() == expected_granularity
def __init__( self, query: Query, settings: RequestSettings, ) -> None: # Snuba query structure # Referencing them here directly since it makes it easier # to process this query independently from the Snuba Query # and there is no risk in doing so since they are immutable. self.__selected_columns = query.get_selected_columns_from_ast() self.__condition = query.get_condition_from_ast() self.__groupby = query.get_groupby_from_ast() self.__having = query.get_having_from_ast() self.__orderby = query.get_orderby_from_ast() self.__data_source = query.get_data_source() self.__arrayjoin = query.get_arrayjoin_from_ast() self.__granularity = query.get_granularity() self.__limit = query.get_limit() self.__limitby = query.get_limitby() self.__offset = query.get_offset() if self.__having: assert self.__groupby, "found HAVING clause with no GROUP BY" # Clickhouse specific fields. Some are still in the Snuba # query and have to be moved. self.__turbo = settings.get_turbo() self.__final = query.get_final() self.__sample = query.get_sample() self.__hastotals = query.has_totals() # TODO: Pre where processing will become a step in Clickhouse Query processing # instead of being pulled from the Snuba Query self.__prewhere = query.get_prewhere_ast() self.__settings = settings self.__formatted_query: Optional[str] = None