def test_edit_query(): query = Query( { "selected_columns": ["c1", "c2", "c3"], "conditions": [["c1", "=", "a"]], "arrayjoin": "tags", "having": [["c4", "=", "c"]], "groupby": ["project_id"], "aggregations": [["count()", "", "count"]], "orderby": "event_id", "limitby": (100, "environment"), "sample": 10, "limit": 100, "offset": 50, "totals": True, }, TableSource("my_table", ColumnSet([])), ) query.set_selected_columns(["c4"]) assert query.get_selected_columns() == ["c4"] query.set_aggregations([["different_agg()", "", "something"]]) assert query.get_aggregations() == [["different_agg()", "", "something"]] query.add_groupby(["more", "more2"]) assert query.get_groupby() == ["project_id", "more", "more2"] query.add_conditions([["c5", "=", "9"]]) assert query.get_conditions() == [ ["c1", "=", "a"], ["c5", "=", "9"], ] query.set_conditions([["c6", "=", "10"]]) assert query.get_conditions() == [ ["c6", "=", "10"], ] query.set_arrayjoin("not_tags") assert query.get_arrayjoin() == "not_tags" query.set_granularity(7200) assert query.get_granularity() == 7200 query.set_prewhere([["pc6", "=", "10"]]) assert query.get_prewhere() == [["pc6", "=", "10"]]
def process_query(self, query: Query, request_settings: RequestSettings,) -> None: max_prewhere_conditions: int = ( self.__max_prewhere_conditions or settings.MAX_PREWHERE_CONDITIONS ) prewhere_keys = query.get_data_source().get_prewhere_candidates() if not prewhere_keys: return prewhere_conditions: Sequence[Condition] = [] # Add any condition to PREWHERE if: # - It is a single top-level condition (not OR-nested), and # - Any of its referenced columns are in prewhere_keys conditions = query.get_conditions() if not conditions: return prewhere_candidates = [ (util.columns_in_expr(cond[0]), cond) for cond in conditions if util.is_condition(cond) and any(col in prewhere_keys for col in util.columns_in_expr(cond[0])) ] # Use the condition that has the highest priority (based on the # position of its columns in the prewhere keys list) prewhere_candidates = sorted( [ ( min( prewhere_keys.index(col) for col in cols if col in prewhere_keys ), cond, ) for cols, cond in prewhere_candidates ], key=lambda priority_and_col: priority_and_col[0], ) if prewhere_candidates: prewhere_conditions = [cond for _, cond in prewhere_candidates][ :max_prewhere_conditions ] query.set_conditions( list(filter(lambda cond: cond not in prewhere_conditions, conditions)) ) query.set_prewhere(prewhere_conditions)
def test_referenced_columns(): # a = 1 AND b = 1 dataset = get_dataset("events") source = dataset.get_dataset_schemas().get_read_schema().get_data_source() body = {"conditions": [["a", "=", "1"], ["b", "=", "1"]]} query = Query(body, source) assert query.get_all_referenced_columns() == set(["a", "b"]) assert query.get_columns_referenced_in_conditions() == set(["a", "b"]) assert query.get_columns_referenced_in_having() == set([]) # a = 1 AND (b = 1 OR c = 1) body = { "conditions": [["a", "=", "1"], [["b", "=", "1"], ["c", "=", "1"]]] } query = Query(body, source) assert query.get_all_referenced_columns() == set(["a", "b", "c"]) assert query.get_columns_referenced_in_conditions() == set(["a", "b", "c"]) assert query.get_columns_referenced_in_having() == set([]) # a = 1 AND (b = 1 OR foo(c) = 1) body = { "conditions": [["a", "=", "1"], [["b", "=", "1"], [["foo", ["c"]], "=", "1"]]] } query = Query(body, source) assert query.get_all_referenced_columns() == set(["a", "b", "c"]) assert query.get_columns_referenced_in_conditions() == set(["a", "b", "c"]) assert query.get_columns_referenced_in_having() == set([]) # a = 1 AND (b = 1 OR foo(c, bar(d)) = 1) body = { "conditions": [ ["a", "=", "1"], [["b", "=", "1"], [["foo", ["c", ["bar", ["d"]]]], "=", "1"]], ] } query = Query(body, source) assert query.get_all_referenced_columns() == set(["a", "b", "c", "d"]) assert query.get_columns_referenced_in_conditions() == set( ["a", "b", "c", "d"]) assert query.get_columns_referenced_in_having() == set([]) # Other fields, including expressions in selected columns body = { "arrayjoin": "tags_key", "groupby": ["time", "group_id"], "orderby": "-time", "selected_columns": [ "group_id", "time", ["foo", ["c", ["bar", ["d"]]]], # foo(c, bar(d)) ], "aggregations": [["uniq", "tags_value", "values_seen"]], } query = Query(body, source) assert query.get_all_referenced_columns() == set( ["tags_key", "tags_value", "time", "group_id", "c", "d"]) assert query.get_columns_referenced_in_conditions() == set([]) assert query.get_columns_referenced_in_having() == set([]) body = { "conditions": [["a", "=", "1"]], "having": [ ["b", "=", "1"], [["c", "=", "1"], [["foo", ["d", ["bar", ["e"]]]], "=", "1"]], ], } query = Query(body, source) query.set_prewhere([["pc6", "=", "10"]]) assert query.get_all_referenced_columns() == set( ["a", "b", "c", "d", "e", "pc6"]) assert query.get_columns_referenced_in_having() == set( ["b", "c", "d", "e"])