def test_format_expressions(pre_format: Query, expected_query: Query) -> None: copy = deepcopy(pre_format) BasicFunctionsProcessor().process_query(copy, HTTPRequestSettings()) assert (copy.get_selected_columns_from_ast() == expected_query.get_selected_columns_from_ast()) assert copy.get_groupby_from_ast() == expected_query.get_groupby_from_ast() assert copy.get_condition_from_ast( ) == expected_query.get_condition_from_ast()
def test_project_extension_query_processing( raw_data: dict, expected_conditions: Sequence[Condition], expected_ast_conditions: Expression, ): extension = ProjectExtension( processor=ProjectExtensionProcessor(project_column="project_id") ) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),) request_settings = HTTPRequestSettings() extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions assert query.get_condition_from_ast() == expected_ast_conditions
def test_format_expressions(query_body: MutableMapping[str, Any], expected_query: Query) -> None: events = get_dataset("events") query = parse_query(query_body, events) # We cannot just run == on the query objects. The content of the two # objects is different, being one the AST and the ont the AST + raw body assert (query.get_selected_columns_from_ast() == expected_query.get_selected_columns_from_ast()) assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast( ) assert query.get_condition_from_ast( ) == expected_query.get_condition_from_ast() assert query.get_arrayjoin_from_ast( ) == expected_query.get_arrayjoin_from_ast() assert query.get_having_from_ast() == expected_query.get_having_from_ast() assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast( )
def __get_filter_tags(self, query: Query) -> List[str]: """ Identifies the tag names we can apply the arrayFilter optimization on. Which means: if the tags_key column is in the select clause and there are one or more top level conditions on the tags_key column. """ if not state.get_config("ast_tag_processor_enabled", 0): return [] select_clause = query.get_selected_columns_from_ast() or [] tags_key_found = any(col.column_name == "tags_key" for expression in select_clause for col in expression if isinstance(col, Column)) if not tags_key_found: return [] def extract_tags_from_condition( cond: Optional[Expression], ) -> Optional[List[str]]: if not cond: return [] if any( is_binary_condition(cond, BooleanFunctions.OR) for cond in cond): return None return self.__extract_top_level_tag_conditions(cond) cond_tags_key = extract_tags_from_condition( query.get_condition_from_ast()) if cond_tags_key is None: # This means we found an OR. Cowardly we give up even though there could # be cases where this condition is still optimizable. return [] having_tags_key = extract_tags_from_condition( query.get_having_from_ast()) if having_tags_key is None: # Same as above return [] return cond_tags_key + having_tags_key
def test_query_extension_processing( raw_data: dict, expected_conditions: Sequence[Condition], expected_ast_condition: Expression, expected_granularity: int, ): state.set_config("max_days", 1) extension = TimeSeriesExtension( default_granularity=60, default_window=timedelta(days=5), timestamp_column="timestamp", ) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),) request_settings = HTTPRequestSettings() extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions assert query.get_condition_from_ast() == expected_ast_condition assert query.get_granularity() == expected_granularity
def __init__( self, query: Query, settings: RequestSettings, ) -> None: # Snuba query structure # Referencing them here directly since it makes it easier # to process this query independently from the Snuba Query # and there is no risk in doing so since they are immutable. self.__selected_columns = query.get_selected_columns_from_ast() self.__condition = query.get_condition_from_ast() self.__groupby = query.get_groupby_from_ast() self.__having = query.get_having_from_ast() self.__orderby = query.get_orderby_from_ast() self.__data_source = query.get_data_source() self.__arrayjoin = query.get_arrayjoin_from_ast() self.__granularity = query.get_granularity() self.__limit = query.get_limit() self.__limitby = query.get_limitby() self.__offset = query.get_offset() if self.__having: assert self.__groupby, "found HAVING clause with no GROUP BY" # Clickhouse specific fields. Some are still in the Snuba # query and have to be moved. self.__turbo = settings.get_turbo() self.__final = query.get_final() self.__sample = query.get_sample() self.__hastotals = query.has_totals() # TODO: Pre where processing will become a step in Clickhouse Query processing # instead of being pulled from the Snuba Query self.__prewhere = query.get_prewhere_ast() self.__settings = settings self.__formatted_query: Optional[str] = None
class TestProjectExtensionWithGroups(BaseTest): def setup_method(self, test_method): super().setup_method(test_method) raw_data = {"project": 2} self.extension = ProjectExtension( processor=ProjectWithGroupsProcessor(project_column="project_id") ) self.valid_data = validate_jsonschema(raw_data, self.extension.get_schema()) self.query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),) def test_with_turbo(self): request_settings = HTTPRequestSettings(turbo=True) self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) assert self.query.get_conditions() == [("project_id", "IN", [2])] assert self.query.get_condition_from_ast() == build_in("project_id", [2]) def test_without_turbo_with_projects_needing_final(self): request_settings = HTTPRequestSettings() replacer.set_project_needs_final(2) self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) assert self.query.get_conditions() == [("project_id", "IN", [2])] assert self.query.get_condition_from_ast() == build_in("project_id", [2]) assert self.query.get_final() def test_without_turbo_without_projects_needing_final(self): request_settings = HTTPRequestSettings() self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) assert self.query.get_conditions() == [("project_id", "IN", [2])] assert self.query.get_condition_from_ast() == build_in("project_id", [2]) assert not self.query.get_final() def test_when_there_are_not_many_groups_to_exclude(self): request_settings = HTTPRequestSettings() state.set_config("max_group_ids_exclude", 5) replacer.set_project_exclude_groups(2, [100, 101, 102]) self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) expected = [ ("project_id", "IN", [2]), (["assumeNotNull", ["group_id"]], "NOT IN", [100, 101, 102]), ] assert self.query.get_conditions() == expected assert self.query.get_condition_from_ast() == FunctionCall( None, BooleanFunctions.AND, ( FunctionCall( None, "notIn", ( FunctionCall( None, "assumeNotNull", (Column(None, "group_id", None),) ), FunctionCall( None, "tuple", ( Literal(None, 100), Literal(None, 101), Literal(None, 102), ), ), ), ), build_in("project_id", [2]), ), ) assert not self.query.get_final() def test_when_there_are_too_many_groups_to_exclude(self): request_settings = HTTPRequestSettings() state.set_config("max_group_ids_exclude", 2) replacer.set_project_exclude_groups(2, [100, 101, 102]) self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) assert self.query.get_conditions() == [("project_id", "IN", [2])] assert self.query.get_condition_from_ast() == build_in("project_id", [2]) assert self.query.get_final()
def test_replace_expression(): """ Create a query with the new AST and replaces a function with a different function replaces f1(...) with tag(f1) """ column1 = Column(None, "c1", "t1") column2 = Column(None, "c2", "t1") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2,)) condition = binary_condition( None, ConditionFunctions.EQ, function_1, Literal(None, "1") ) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( {}, TableSource("my_table", ColumnSet([])), selected_columns=[function_1], array_join=None, condition=condition, groupby=[function_1], having=None, order_by=[orderby], ) def replace(exp: Expression) -> Expression: if isinstance(exp, FunctionCall) and exp.function_name == "f1": return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),)) return exp query.transform_expressions(replace) expected_query = Query( {}, TableSource("my_table", ColumnSet([])), selected_columns=[FunctionCall("alias", "tag", (Literal(None, "f1"),))], array_join=None, condition=binary_condition( None, ConditionFunctions.EQ, FunctionCall("alias", "tag", (Literal(None, "f1"),)), Literal(None, "1"), ), groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))], having=None, order_by=[orderby], ) assert ( query.get_selected_columns_from_ast() == expected_query.get_selected_columns_from_ast() ) assert query.get_condition_from_ast() == expected_query.get_condition_from_ast() assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast() assert query.get_having_from_ast() == expected_query.get_having_from_ast() assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast() assert list(query.get_all_expressions()) == list( expected_query.get_all_expressions() )