def test_formatting() -> None: """ Validates the formatting of the arrayFilter expressions. """ assert tupleElement( "tags_key", arrayJoin( "snuba_all_tags", zip_columns( Column(None, None, "tags.key"), Column(None, None, "tags.value"), ), ), Literal(None, 1), ).accept(ClickhouseExpressionFormatter()) == ( "(tupleElement((arrayJoin(arrayMap((x, y -> tuple(x, y)), " "tags.key, tags.value)) AS snuba_all_tags), 1) AS tags_key)" ) assert tupleElement( "tags_key", arrayJoin( "snuba_all_tags", filter_key_values( zip_columns( Column(None, None, "tags.key"), Column(None, None, "tags.value"), ), [Literal(None, "t1"), Literal(None, "t2")], ), ), Literal(None, 1), ).accept(ClickhouseExpressionFormatter()) == ( "(tupleElement((arrayJoin(arrayFilter((pair -> in(" "tupleElement(pair, 1), tuple('t1', 't2'))), " "arrayMap((x, y -> tuple(x, y)), tags.key, tags.value))) AS snuba_all_tags), 1) AS tags_key)" )
def filter_expression( columns: Expression, single_filtered: Dict[LiteralExpr, Sequence[str]], multiple_filtered: Dict[Tuple[LiteralExpr, ...], Sequence[Tuple[str, ...]]], ) -> Expression: argument_name = "arg" argument = Argument(None, argument_name) conditions: List[Expression] = [] for index in single_filtered: conditions.append( binary_condition( ConditionFunctions.IN, tupleElement(None, argument, index), FunctionCallExpr( None, "tuple", tuple( LiteralExpr(None, f) for f in single_filtered[index]), ), )) for indices in multiple_filtered: conditions.append( binary_condition( ConditionFunctions.IN, FunctionCallExpr( None, "tuple", tuple( tupleElement(None, argument, index) for index in indices), ), FunctionCallExpr( None, "tuple", tuple( FunctionCallExpr( None, "tuple", tuple(LiteralExpr(None, t) for t in tuples), ) for tuples in multiple_filtered[indices]), ), )) return FunctionCallExpr( None, "arrayFilter", (Lambda(None, (argument_name, ), combine_and_conditions(conditions)), columns), )
def filter_key_values(key_values: Expression, keys: Sequence[LiteralExpr]) -> Expression: """ Filter an array of key value pairs based on a sequence of keys (tag keys in this case). """ return FunctionCallExpr( None, "arrayFilter", ( Lambda( None, ("pair", ), in_condition( # A pair here is a tuple with two elements (key # and value) and the index of the first element in # Clickhouse is 1 instead of 0. tupleElement( None, Argument(None, "pair"), LiteralExpr(None, 1), ), keys, ), ), key_values, ), )
def _filtered_mapping_pairs( alias: Optional[str], column_name: str, pair_alias: str, filtered_tags: Sequence[LiteralExpr], array_index: LiteralExpr, ) -> Expression: # (arrayJoin(arrayFilter( # pair -> tupleElement(pair, 1) IN (tags), # arrayMap((x,y) -> (x,y), tags.key, tags.value) # )) as all_tags).1 return tupleElement( alias, arrayJoin( pair_alias, filter_key_values( zip_columns( ColumnExpr(None, None, key_column(column_name)), ColumnExpr(None, None, val_column(column_name)), ), filtered_tags, ), ), array_index, )
def unfiltered_mapping_tuples( alias: Optional[str], tuple_alias: str, tuple_index: LiteralExpr, column_names: Sequence[str], ) -> Expression: return tupleElement( alias, arrayJoin( tuple_alias, zip_columns( *[ColumnExpr(None, None, column) for column in column_names]), ), tuple_index, )
def _unfiltered_mapping_pairs(alias: Optional[str], column_name: str, pair_alias: str, tuple_index: LiteralExpr) -> Expression: # (arrayJoin( # arrayMap((x,y) -> (x,y), tags.key, tags.value) # as all_tags).1 return tupleElement( alias, arrayJoin( pair_alias, zip_columns( ColumnExpr(None, None, key_column(column_name)), ColumnExpr(None, None, val_column(column_name)), ), ), tuple_index, )
def filtered_mapping_tuples( alias: Optional[str], tuple_alias: str, tuple_index: LiteralExpr, column_names: Sequence[str], single_filtered: Dict[LiteralExpr, Sequence[str]], multiple_filtered: Dict[Tuple[LiteralExpr, ...], Sequence[Tuple[str, ...]]], ) -> Expression: return tupleElement( alias, arrayJoin( tuple_alias, filter_expression( zip_columns(*[ ColumnExpr(None, None, column) for column in column_names ]), single_filtered, multiple_filtered, ), ), tuple_index, )
def array_join_col(ops=None, groups=None, op_groups=None): conditions: List[Expression] = [] argument_name = "arg" argument = Argument(None, argument_name) if ops: conditions.append( binary_condition( ConditionFunctions.IN, tupleElement(None, argument, Literal(None, 1)), FunctionCall(None, "tuple", tuple(Literal(None, op) for op in ops)), )) if groups: conditions.append( binary_condition( ConditionFunctions.IN, tupleElement(None, argument, Literal(None, 2)), FunctionCall(None, "tuple", tuple(Literal(None, group) for group in groups)), )) if op_groups: conditions.append( binary_condition( ConditionFunctions.IN, FunctionCall( None, "tuple", ( tupleElement(None, argument, Literal(None, 1)), tupleElement(None, argument, Literal(None, 2)), ), ), FunctionCall( None, "tuple", tuple( FunctionCall(None, "tuple", (Literal(None, op), Literal(None, group))) for op, group in op_groups), ), )) cols = FunctionCall( None, "arrayMap", ( Lambda( None, ("x", "y", "z"), FunctionCall( None, "tuple", tuple(Argument(None, arg) for arg in ("x", "y", "z"))), ), Column(None, None, "spans.op"), Column(None, None, "spans.group"), Column(None, None, "spans.exclusive_time"), ), ) if conditions: cols = FunctionCall( None, "arrayFilter", ( Lambda(None, (argument_name, ), combine_and_conditions(conditions)), cols, ), ) return arrayJoin("snuba_all_spans", cols)
span_processor_tests = [ pytest.param( build_query(), [], None, id="no spans columns in select clause", ), pytest.param( build_query(selected_columns=[ spans_op_col, spans_group_col, spans_exclusive_time_col ]), [ SelectedExpression( "spans_op", tupleElement("spans_op", array_join_col(), Literal(None, 1))), SelectedExpression( "spans_group", tupleElement("spans_group", array_join_col(), Literal(None, 2)), ), SelectedExpression( "spans_exclusive_time", tupleElement("spans_exclusive_time", array_join_col(), Literal(None, 3)), ), ], None, id="simple array join with all op, group, exclusive_time", ), pytest.param(
def some_tuple(alias: str | None): return literals_tuple(alias, [Literal(None, "duration"), Literal(None, 300)]) def identity(expression: Expression) -> Expression: return dsl_identity(expression, None) TEST_QUERIES = [ pytest.param( build_query(selected_columns=[ some_tuple(alias="foo"), equals( tupleElement(None, some_tuple(alias="doo"), Literal(None, 1)), Literal(None, 300), ), ]), build_query(selected_columns=[ # top level tuple alias persists some_tuple(alias="foo"), equals( # alias of the tuple of internal function is removed (it is not useful) tupleElement(None, some_tuple(alias=None), Literal(None, 1)), Literal(None, 300), ), ]), id="simple happy path", ), pytest.param(
"aggregations": [], "groupby": [], "selected_columns": ["tags_key", "tags_value"], "conditions": [["col", "IN", ["t1", "t2"]]], }, ClickhouseQuery( None, selected_columns=[ SelectedExpression( name="tags_key", expression=tupleElement( "_snuba_tags_key", arrayJoin( "snuba_all_tags", zip_columns( Column(None, None, "tags.key"), Column(None, None, "tags.value"), ), ), Literal(None, 1), ), ), SelectedExpression( name="tags_value", expression=tupleElement( "_snuba_tags_value", arrayJoin( "snuba_all_tags", zip_columns( Column(None, None, "tags.key"), Column(None, None, "tags.value"),