def process_functions(exp: Expression) -> Expression: if isinstance(exp, FunctionCall): if exp.function_name == "isHandled": self.validate_parameters(exp) return FunctionCall( exp.alias, "arrayExists", ( Lambda( None, ("x", ), binary_condition( BooleanFunctions.OR, FunctionCall(None, "isNull", (Argument(None, "x"), )), binary_condition( ConditionFunctions.EQ, FunctionCall( None, "assumeNotNull", (Argument(None, "x"), ), ), Literal(None, 1), ), ), ), Column(None, None, self.__column), ), ) if exp.function_name == "notHandled": self.validate_parameters(exp) return FunctionCall( exp.alias, "arrayExists", ( Lambda( None, ("x", ), binary_condition( BooleanFunctions.AND, FunctionCall(None, "isNotNull", (Argument(None, "x"), )), binary_condition( ConditionFunctions.EQ, FunctionCall( None, "assumeNotNull", (Argument(None, "x"), ), ), Literal(None, 0), ), ), ), Column(None, None, self.__column), ), ) return exp
def _process_expressions(self, exp: Expression) -> Expression: if isinstance(exp, Column) and exp.column_name in self.columns: return FunctionCall( exp.alias, "arrayMap", ( Lambda( None, ("x", ), FunctionCall( None, "replaceAll", ( FunctionCall(None, "toString", (Argument(None, "x"), )), Literal(None, "-"), Literal(None, ""), ), ), ), Column(None, None, exp.column_name), ), ) return exp
def unpack_array_condition_builder( lhs: Expression, func: str, literal: Any, alias: Optional[str], ) -> Expression: function_name = ("arrayExists" if FUNCTION_TO_OPERATOR[func] in POSITIVE_OPERATORS else "arrayAll") # This is an expression like: # arrayExists(x -> assumeNotNull(notLike(x, rhs)), lhs) return FunctionCall( alias, function_name, ( Lambda( None, ("x", ), FunctionCall( None, "assumeNotNull", (FunctionCall( None, func, ( Argument(None, "x"), preprocess_condition_function_literal( func, literal), ), ), ), ), ), lhs, ), )
def parse(exp: Expression) -> Expression: result = ARRAY_JOIN_MATCH.match(exp) if result: function_name = result.string("function_name") column = result.expression("column") assert isinstance(column, Column) op_literal = result.expression("op") assert isinstance(op_literal, Literal) op = str(op_literal.value) value = result.expression("value") return FunctionCall( None, function_name, ( Lambda( None, ("x",), FunctionCall( None, "assumeNotNull", ( FunctionCall( None, OPERATOR_TO_FUNCTION[op], (Argument(None, "x"), value,), ), ), ), ), column, ), ) return exp
def unpack_array_condition_builder(lhs: Expression, op: str, literal: Any) -> Expression: function_name = "arrayExists" if op in POSITIVE_OPERATORS else "arrayAll" # This is an expression like: # arrayExists(x -> assumeNotNull(notLike(x, rhs)), lhs) return FunctionCall( None, function_name, ( Lambda( None, ("x", ), FunctionCall( None, "assumeNotNull", (FunctionCall( None, OPERATOR_TO_FUNCTION[op], (Argument(None, "x"), preprocess_literal(op, literal)), ), ), ), ), lhs, ), )
def filter_key_values(key_values: Expression, keys: Sequence[LiteralExpr]) -> Expression: """ Filter an array of key value pairs based on a sequence of keys (tag keys in this case). """ return FunctionCallExpr( None, "arrayFilter", ( Lambda( None, ("pair", ), in_condition( # A pair here is a tuple with two elements (key # and value) and the index of the first element in # Clickhouse is 1 instead of 0. tupleElement( None, Argument(None, "pair"), LiteralExpr(None, 1), ), keys, ), ), key_values, ), )
def visit_lambda(self, exp: Lambda) -> SubExpression: transformed = exp.transformation.accept(self) return replace( transformed, main_expression=Lambda(exp.alias, exp.parameters, transformed.main_expression), )
def filter_column(column: Expression, keys: Sequence[LiteralExpr]) -> Expression: return FunctionCallExpr( None, "arrayFilter", (Lambda(None, ("x", ), in_condition(Argument(None, "x"), keys)), column), )
def attempt_map( self, expression: Lambda, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[Lambda]: return Lambda( alias=expression.alias, parameters=expression.parameters, transformation=expression.transformation.accept(children_translator), )
def test_not_handled_processor() -> None: columnset = ColumnSet([]) unprocessed = Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall("result", "notHandled", tuple(),), ), ], ) expected = Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall( "result", "arrayExists", ( Lambda( None, ("x",), binary_condition( BooleanFunctions.AND, FunctionCall(None, "isNotNull", (Argument(None, "x"),)), binary_condition( ConditionFunctions.EQ, FunctionCall( None, "assumeNotNull", (Argument(None, "x"),) ), Literal(None, 0), ), ), ), Column(None, None, "exception_stacks.mechanism_handled"), ), ), ), ], ) processor = handled_functions.HandledFunctionsProcessor( "exception_stacks.mechanism_handled", columnset ) processor.process_query(unprocessed, HTTPRequestSettings()) assert expected.get_selected_columns() == unprocessed.get_selected_columns() ret = unprocessed.get_selected_columns()[1].expression.accept( ClickhouseExpressionFormatter() ) assert ret == ( "(arrayExists((x -> isNotNull(x) AND equals(assumeNotNull(x), 0)), exception_stacks.mechanism_handled) AS result)" )
def test_uuid_array_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) UUIDArrayColumnProcessor(set(["column1", "column2" ])).process_query(unprocessed_query, HTTPRequestSettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column2", FunctionCall( None, "arrayMap", ( Lambda( None, ("x", ), FunctionCall( None, "replaceAll", ( FunctionCall(None, "toString", (Argument(None, "x"), )), Literal(None, "-"), Literal(None, ""), ), ), ), Column(None, None, "column2"), ), ), ) ] assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def filter_keys(column: Expression, keys: Sequence[LiteralExpr]) -> Expression: """ Filter a Column array based on a sequence of keys. """ return FunctionCallExpr( None, "arrayFilter", ( Lambda(None, ("tag",), in_condition(None, Argument(None, "tag"), keys),), column, ), )
def filter_expression( columns: Expression, single_filtered: Dict[LiteralExpr, Sequence[str]], multiple_filtered: Dict[Tuple[LiteralExpr, ...], Sequence[Tuple[str, ...]]], ) -> Expression: argument_name = "arg" argument = Argument(None, argument_name) conditions: List[Expression] = [] for index in single_filtered: conditions.append( binary_condition( ConditionFunctions.IN, tupleElement(None, argument, index), FunctionCallExpr( None, "tuple", tuple( LiteralExpr(None, f) for f in single_filtered[index]), ), )) for indices in multiple_filtered: conditions.append( binary_condition( ConditionFunctions.IN, FunctionCallExpr( None, "tuple", tuple( tupleElement(None, argument, index) for index in indices), ), FunctionCallExpr( None, "tuple", tuple( FunctionCallExpr( None, "tuple", tuple(LiteralExpr(None, t) for t in tuples), ) for tuples in multiple_filtered[indices]), ), )) return FunctionCallExpr( None, "arrayFilter", (Lambda(None, (argument_name, ), combine_and_conditions(conditions)), columns), )
def _transform_array_condition(array_columns: Set[str], exp: Expression) -> Expression: if not is_condition(exp) or not isinstance(exp, FunctionCall): return exp elif len(exp.parameters) < 2: return exp lhs = exp.parameters[0] if not isinstance(lhs, Column): return exp aliased_name = ( f"{lhs.table_name + '.' if lhs.table_name is not None else ''}{lhs.column_name}" ) if aliased_name not in array_columns: return exp function_name = ( "arrayExists" if FUNCTION_TO_OPERATOR[exp.function_name] in POSITIVE_OPERATORS else "arrayAll" ) # This is an expression like: # arrayExists(x -> assumeNotNull(notLike(x, rhs)), lhs) return FunctionCall( None, function_name, ( Lambda( None, ("x",), FunctionCall( None, "assumeNotNull", ( FunctionCall( None, exp.function_name, (Argument(None, "x"), exp.parameters[1]), ), ), ), ), lhs, ), )
def zip_columns(column1: ColumnExpr, column2: ColumnExpr) -> Expression: """ Turns two array columns into an array of pairs """ return FunctionCallExpr( None, "arrayMap", ( Lambda( None, ("x", "y"), FunctionCallExpr( None, "tuple", (Argument(None, "x"), Argument(None, "y"),), ), ), column1, column2, ), )
def _process_expressions(self, exp: Expression) -> Expression: if isinstance(exp, Column) and exp.column_name in self.columns: return FunctionCall( exp.alias, "arrayMap", ( Lambda( None, ("x",), FunctionCall( None, "lower", (FunctionCall(None, "hex", (Argument(None, "x"),)),), ), ), Column(None, None, exp.column_name), ), ) return exp
def zip_columns(*columns: ColumnExpr) -> Expression: if len(columns) not in {2, 3}: raise NotImplementedError("Can only zip between 2 and 3 columns.") arguments = ("x", "y", "z")[:len(columns)] return FunctionCallExpr( None, "arrayMap", ( Lambda( None, arguments, FunctionCallExpr( None, "tuple", tuple(Argument(None, arg) for arg in arguments)), ), *columns, ), )
def test_hash() -> None: """ Ensures expressions are hashable """ column1 = Column(None, "c1", "t1") column2 = Column(None, "c2", "t1") function_1 = FunctionCall(None, "f1", (column1, column2)) literal = Literal(None, "blablabla") function_2 = CurriedFunctionCall(None, function_1, (column1, )) lm = Lambda(None, ("x", "y"), FunctionCall(None, "test", (Argument(None, "x")))) s = set() s.add(column1) s.add(column2) s.add(function_1) s.add(literal) s.add(function_2) s.add(lm) assert len(s) == 6
), ]), build_query(selected_columns=[ # top level tuple alias persists some_tuple(alias="foo"), equals( # alias of the tuple of internal function is removed (it is not useful) tupleElement(None, some_tuple(alias=None), Literal(None, 1)), Literal(None, 300), ), ]), id="simple happy path", ), pytest.param( build_query( selected_columns=[Lambda(None, ("a", ), some_tuple(alias="foo"))]), build_query( selected_columns=[Lambda(None, ("a", ), some_tuple(alias=None))]), id="simple lambda", ), pytest.param( build_query(selected_columns=[ identity( identity( identity( equals( # alias of the tuple of internal function is removed (it is not useful) tupleElement(None, some_tuple( alias="ayyy"), Literal(None, 1)), Literal(None, 300), ))))
), ), ( [["exception_frames.filename", "LIKE", "%foo%"]], FunctionCall( None, "arrayExists", ( Lambda( None, ("x",), FunctionCall( None, "assumeNotNull", ( FunctionCall( None, ConditionFunctions.LIKE, (Argument(None, "x"), Literal(None, "%foo%")), ), ), ), ), Column(None, None, "exception_frames.filename"), ), ), ), # Test scalar condition on array column is expanded as an iterator. ( [["exception_frames.filename", "NOT LIKE", "%foo%"]], FunctionCall( None,
["equals", ["exception_stacks.type", "'c'"]], ], ]), binary_condition( BooleanFunctions.OR, FunctionCall( alias=None, function_name="arrayExists", parameters=( Lambda( alias=None, parameters=("x", ), transformation=FunctionCall( alias=None, function_name="assumeNotNull", parameters=(binary_condition( ConditionFunctions.EQ, Argument(alias=None, name="x"), Literal(alias=None, value="b"), ), ), ), ), Column(alias=None, table_name=None, column_name="exception_stacks.type"), ), ), FunctionCall( alias=None, function_name="arrayExists", parameters=(
def array_join_col(ops=None, groups=None, op_groups=None): conditions: List[Expression] = [] argument_name = "arg" argument = Argument(None, argument_name) if ops: conditions.append( binary_condition( ConditionFunctions.IN, tupleElement(None, argument, Literal(None, 1)), FunctionCall(None, "tuple", tuple(Literal(None, op) for op in ops)), )) if groups: conditions.append( binary_condition( ConditionFunctions.IN, tupleElement(None, argument, Literal(None, 2)), FunctionCall(None, "tuple", tuple(Literal(None, group) for group in groups)), )) if op_groups: conditions.append( binary_condition( ConditionFunctions.IN, FunctionCall( None, "tuple", ( tupleElement(None, argument, Literal(None, 1)), tupleElement(None, argument, Literal(None, 2)), ), ), FunctionCall( None, "tuple", tuple( FunctionCall(None, "tuple", (Literal(None, op), Literal(None, group))) for op, group in op_groups), ), )) cols = FunctionCall( None, "arrayMap", ( Lambda( None, ("x", "y", "z"), FunctionCall( None, "tuple", tuple(Argument(None, arg) for arg in ("x", "y", "z"))), ), Column(None, None, "spans.op"), Column(None, None, "spans.group"), Column(None, None, "spans.exclusive_time"), ), ) if conditions: cols = FunctionCall( None, "arrayFilter", ( Lambda(None, (argument_name, ), combine_and_conditions(conditions)), cols, ), ) return arrayJoin("snuba_all_spans", cols)
"or", binary_condition( "equals", FunctionCall( None, "arrayExists", ( Lambda( None, ("x",), FunctionCall( None, "assumeNotNull", ( FunctionCall( None, "equals", ( Argument(None, "x"), Literal(None, "RuntimeException"), ), ), ), ), ), Column("_snuba_a", None, "a"), ), ), Literal(None, 1), ), binary_condition( "equals",
), FunctionCall( None, "arraySlice", ( FunctionCall( None, "arrayMap", ( Lambda( None, ("x", ), FunctionCall( None, "replaceAll", ( FunctionCall(None, "toString", (Argument(None, "x"), )), Literal(None, "-"), Literal(None, ""), ), ), ), Column(None, None, "column1"), ), ), Literal(None, 0), Literal(None, 2), ), ), "arraySlice(arrayMap((x -> replaceAll(toString(x), '-', '')), column1), 0, 2)", id=
"exception_stacks.type"), ), ], condition=with_required( FunctionCall( None, "arrayExists", ( Lambda( None, ("x", ), FunctionCall( None, "assumeNotNull", (FunctionCall( None, "like", ( Argument(None, "x"), Literal(None, "Arithmetic%"), ), ), ), ), ), Column( "_snuba_exception_stacks.type", None, "exception_stacks.type", ), ), )), limit=1000,
FunctionCall( "alias", "f", ( Column(None, "table", "col"), Literal(None, 123), FunctionCall(None, "f1", (Column(None, None, "col2"), )), ), ), CurriedFunctionCall( None, FunctionCall(None, "f", (Column(None, None, "col"), Literal(None, 12))), (Column(None, None, "col3"), ), ), Lambda(None, ("a", "b"), FunctionCall(None, "f", (Argument(None, "a"), ))), ] @pytest.mark.parametrize("expression", test_data) def test_default_translation(expression: Expression) -> None: """ Ensures that a translation that relies on the default translation rules produces a deep copy of the original expression. """ translated = expression.accept( SnubaClickhouseMappingTranslator(TranslationMappers())) assert translated == expression
Column(None, "t1", "c2"), ), ), """f1( fnested( t1.c1 ), t1.c2 )""", ), ( Lambda( None, ("a", "b", "c"), FunctionCall( None, "some_func", (Argument(None, "a"), Argument(None, "b"), Argument(None, "c")), ), ), """(a,b,c -> some_func( a, b, c ) )""", ), ]
function_name="f", parameters=(Column(alias="b", table_name=None, column_name="b"), ), ), ), ), id="Curried with parameter to expand", ), pytest.param( Column(alias=None, table_name=None, column_name="a"), { "a": Lambda( alias="a", parameters=tuple(), transformation=FunctionCall( alias="b", function_name="f", parameters=(Column( alias=None, table_name=None, column_name="c"), ), ), ), "c": Column(alias="c", table_name=None, column_name="x"), }, True, Lambda( alias="a", parameters=tuple(), transformation=FunctionCall( alias="b", function_name="f", parameters=(Column(alias="c", table_name=None,
FunctionCall(None, "f0", (Column(None, "param1", "table1"), )), ( FunctionCall(None, "f1", (Column(None, "param2", "table1"), )), Column(None, "param3", "table1"), ), ), "f0(table1.param1)(f1(table1.param2), table1.param3)", ), # Curried function call with hierarchy ( FunctionCall( None, "arrayExists", ( Lambda( None, ("x", "y"), FunctionCall(None, "testFunc", (Argument(None, "x"), Argument(None, "y"))), ), Column(None, "test", None), ), ), "arrayExists((x, y -> testFunc(x, y)), test)", ), # Lambda expression ] @pytest.mark.parametrize("expression, expected", test_expressions) def test_format_expressions(expression: Expression, expected: str) -> None: visitor = ClickhouseExpressionFormatter() assert expression.accept(visitor) == expected
def test_handled_processor() -> None: columnset = ColumnSet([]) unprocessed = Query( {}, TableSource("events", columnset), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall( "result", "isHandled", tuple(), ), ), ], ) expected = Query( {}, TableSource("events", columnset), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall( "result", "arrayExists", ( Lambda( None, ("x", ), binary_condition( None, BooleanFunctions.OR, FunctionCall(None, "isNull", (Argument(None, "x"), )), binary_condition( None, ConditionFunctions.EQ, FunctionCall(None, "assumeNotNull", (Argument(None, "x"), )), Literal(None, 1), ), ), ), Column(None, None, "exception_stacks.mechanism_handled"), ), ), ), ], ) processor = handled_functions.HandledFunctionsProcessor( "exception_stacks.mechanism_handled", columnset) processor.process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == ( "(arrayExists((x -> (isNull(x) OR equals(assumeNotNull(x), 1))), exception_stacks.mechanism_handled) AS result)" )