def test_formatting() -> None:
    """
    Validates the formatting of the arrayFilter expressions.
    """
    assert tupleElement(
        "tags_key",
        arrayJoin(
            "snuba_all_tags",
            zip_columns(
                Column(None, None, "tags.key"), Column(None, None, "tags.value"),
            ),
        ),
        Literal(None, 1),
    ).accept(ClickhouseExpressionFormatter()) == (
        "(tupleElement((arrayJoin(arrayMap((x, y -> tuple(x, y)), "
        "tags.key, tags.value)) AS snuba_all_tags), 1) AS tags_key)"
    )

    assert tupleElement(
        "tags_key",
        arrayJoin(
            "snuba_all_tags",
            filter_key_values(
                zip_columns(
                    Column(None, None, "tags.key"), Column(None, None, "tags.value"),
                ),
                [Literal(None, "t1"), Literal(None, "t2")],
            ),
        ),
        Literal(None, 1),
    ).accept(ClickhouseExpressionFormatter()) == (
        "(tupleElement((arrayJoin(arrayFilter((pair -> in("
        "tupleElement(pair, 1), tuple('t1', 't2'))), "
        "arrayMap((x, y -> tuple(x, y)), tags.key, tags.value))) AS snuba_all_tags), 1) AS tags_key)"
    )
Пример #2
0
def filter_expression(
    columns: Expression,
    single_filtered: Dict[LiteralExpr, Sequence[str]],
    multiple_filtered: Dict[Tuple[LiteralExpr, ...], Sequence[Tuple[str,
                                                                    ...]]],
) -> Expression:
    argument_name = "arg"
    argument = Argument(None, argument_name)

    conditions: List[Expression] = []

    for index in single_filtered:
        conditions.append(
            binary_condition(
                ConditionFunctions.IN,
                tupleElement(None, argument, index),
                FunctionCallExpr(
                    None,
                    "tuple",
                    tuple(
                        LiteralExpr(None, f) for f in single_filtered[index]),
                ),
            ))

    for indices in multiple_filtered:
        conditions.append(
            binary_condition(
                ConditionFunctions.IN,
                FunctionCallExpr(
                    None,
                    "tuple",
                    tuple(
                        tupleElement(None, argument, index)
                        for index in indices),
                ),
                FunctionCallExpr(
                    None,
                    "tuple",
                    tuple(
                        FunctionCallExpr(
                            None,
                            "tuple",
                            tuple(LiteralExpr(None, t) for t in tuples),
                        ) for tuples in multiple_filtered[indices]),
                ),
            ))

    return FunctionCallExpr(
        None,
        "arrayFilter",
        (Lambda(None, (argument_name, ),
                combine_and_conditions(conditions)), columns),
    )
Пример #3
0
def filter_key_values(key_values: Expression,
                      keys: Sequence[LiteralExpr]) -> Expression:
    """
    Filter an array of key value pairs based on a sequence of keys
    (tag keys in this case).
    """
    return FunctionCallExpr(
        None,
        "arrayFilter",
        (
            Lambda(
                None,
                ("pair", ),
                in_condition(
                    # A pair here is a tuple with two elements (key
                    # and value) and the index of the first element in
                    # Clickhouse is 1 instead of 0.
                    tupleElement(
                        None,
                        Argument(None, "pair"),
                        LiteralExpr(None, 1),
                    ),
                    keys,
                ),
            ),
            key_values,
        ),
    )
Пример #4
0
def _filtered_mapping_pairs(
    alias: Optional[str],
    column_name: str,
    pair_alias: str,
    filtered_tags: Sequence[LiteralExpr],
    array_index: LiteralExpr,
) -> Expression:
    # (arrayJoin(arrayFilter(
    #       pair -> tupleElement(pair, 1) IN (tags),
    #       arrayMap((x,y) -> (x,y), tags.key, tags.value)
    #  )) as all_tags).1
    return tupleElement(
        alias,
        arrayJoin(
            pair_alias,
            filter_key_values(
                zip_columns(
                    ColumnExpr(None, None, key_column(column_name)),
                    ColumnExpr(None, None, val_column(column_name)),
                ),
                filtered_tags,
            ),
        ),
        array_index,
    )
Пример #5
0
def unfiltered_mapping_tuples(
    alias: Optional[str],
    tuple_alias: str,
    tuple_index: LiteralExpr,
    column_names: Sequence[str],
) -> Expression:
    return tupleElement(
        alias,
        arrayJoin(
            tuple_alias,
            zip_columns(
                *[ColumnExpr(None, None, column) for column in column_names]),
        ),
        tuple_index,
    )
Пример #6
0
def _unfiltered_mapping_pairs(alias: Optional[str], column_name: str,
                              pair_alias: str,
                              tuple_index: LiteralExpr) -> Expression:
    # (arrayJoin(
    #   arrayMap((x,y) -> (x,y), tags.key, tags.value)
    #  as all_tags).1
    return tupleElement(
        alias,
        arrayJoin(
            pair_alias,
            zip_columns(
                ColumnExpr(None, None, key_column(column_name)),
                ColumnExpr(None, None, val_column(column_name)),
            ),
        ),
        tuple_index,
    )
Пример #7
0
def filtered_mapping_tuples(
    alias: Optional[str],
    tuple_alias: str,
    tuple_index: LiteralExpr,
    column_names: Sequence[str],
    single_filtered: Dict[LiteralExpr, Sequence[str]],
    multiple_filtered: Dict[Tuple[LiteralExpr, ...], Sequence[Tuple[str,
                                                                    ...]]],
) -> Expression:
    return tupleElement(
        alias,
        arrayJoin(
            tuple_alias,
            filter_expression(
                zip_columns(*[
                    ColumnExpr(None, None, column) for column in column_names
                ]),
                single_filtered,
                multiple_filtered,
            ),
        ),
        tuple_index,
    )
Пример #8
0
def array_join_col(ops=None, groups=None, op_groups=None):
    conditions: List[Expression] = []

    argument_name = "arg"
    argument = Argument(None, argument_name)

    if ops:
        conditions.append(
            binary_condition(
                ConditionFunctions.IN,
                tupleElement(None, argument, Literal(None, 1)),
                FunctionCall(None, "tuple",
                             tuple(Literal(None, op) for op in ops)),
            ))

    if groups:
        conditions.append(
            binary_condition(
                ConditionFunctions.IN,
                tupleElement(None, argument, Literal(None, 2)),
                FunctionCall(None, "tuple",
                             tuple(Literal(None, group) for group in groups)),
            ))

    if op_groups:
        conditions.append(
            binary_condition(
                ConditionFunctions.IN,
                FunctionCall(
                    None,
                    "tuple",
                    (
                        tupleElement(None, argument, Literal(None, 1)),
                        tupleElement(None, argument, Literal(None, 2)),
                    ),
                ),
                FunctionCall(
                    None,
                    "tuple",
                    tuple(
                        FunctionCall(None, "tuple", (Literal(None, op),
                                                     Literal(None, group)))
                        for op, group in op_groups),
                ),
            ))

    cols = FunctionCall(
        None,
        "arrayMap",
        (
            Lambda(
                None,
                ("x", "y", "z"),
                FunctionCall(
                    None, "tuple",
                    tuple(Argument(None, arg) for arg in ("x", "y", "z"))),
            ),
            Column(None, None, "spans.op"),
            Column(None, None, "spans.group"),
            Column(None, None, "spans.exclusive_time"),
        ),
    )

    if conditions:
        cols = FunctionCall(
            None,
            "arrayFilter",
            (
                Lambda(None,
                       (argument_name, ), combine_and_conditions(conditions)),
                cols,
            ),
        )

    return arrayJoin("snuba_all_spans", cols)
Пример #9
0
span_processor_tests = [
    pytest.param(
        build_query(),
        [],
        None,
        id="no spans columns in select clause",
    ),
    pytest.param(
        build_query(selected_columns=[
            spans_op_col, spans_group_col, spans_exclusive_time_col
        ]),
        [
            SelectedExpression(
                "spans_op",
                tupleElement("spans_op", array_join_col(), Literal(None, 1))),
            SelectedExpression(
                "spans_group",
                tupleElement("spans_group", array_join_col(), Literal(None,
                                                                      2)),
            ),
            SelectedExpression(
                "spans_exclusive_time",
                tupleElement("spans_exclusive_time", array_join_col(),
                             Literal(None, 3)),
            ),
        ],
        None,
        id="simple array join with all op, group, exclusive_time",
    ),
    pytest.param(
Пример #10
0
def some_tuple(alias: str | None):
    return literals_tuple(alias,
                          [Literal(None, "duration"),
                           Literal(None, 300)])


def identity(expression: Expression) -> Expression:
    return dsl_identity(expression, None)


TEST_QUERIES = [
    pytest.param(
        build_query(selected_columns=[
            some_tuple(alias="foo"),
            equals(
                tupleElement(None, some_tuple(alias="doo"), Literal(None, 1)),
                Literal(None, 300),
            ),
        ]),
        build_query(selected_columns=[
            # top level tuple alias persists
            some_tuple(alias="foo"),
            equals(
                # alias of the tuple of internal function is removed (it is not useful)
                tupleElement(None, some_tuple(alias=None), Literal(None, 1)),
                Literal(None, 300),
            ),
        ]),
        id="simple happy path",
    ),
    pytest.param(
Пример #11
0
     "aggregations": [],
     "groupby": [],
     "selected_columns": ["tags_key", "tags_value"],
     "conditions": [["col", "IN", ["t1", "t2"]]],
 },
 ClickhouseQuery(
     None,
     selected_columns=[
         SelectedExpression(
             name="tags_key",
             expression=tupleElement(
                 "_snuba_tags_key",
                 arrayJoin(
                     "snuba_all_tags",
                     zip_columns(
                         Column(None, None, "tags.key"),
                         Column(None, None, "tags.value"),
                     ),
                 ),
                 Literal(None, 1),
             ),
         ),
         SelectedExpression(
             name="tags_value",
             expression=tupleElement(
                 "_snuba_tags_value",
                 arrayJoin(
                     "snuba_all_tags",
                     zip_columns(
                         Column(None, None, "tags.key"),
                         Column(None, None, "tags.value"),