示例#1
0
 def visit_join_clause(
         self, node: JoinClause[Entity]) -> Mapping[str, SubqueryDraft]:
     combined = {
         **node.left_node.accept(self),
         **node.right_node.accept(self)
     }
     for condition in node.keys:
         combined[condition.left.table_alias].add_select_expression(
             SelectedExpression(
                 # Setting a name for the selected columns in a subquery
                 # is not terribly useful, as this name would not
                 # be used anywhere.
                 # The external query references subquery columns by
                 # their aliases as this is what Clickhouse will do.
                 aliasify_column(condition.left.column),
                 Column(
                     aliasify_column(condition.left.column),
                     None,
                     condition.left.column,
                 ),
             ))
         combined[condition.right.table_alias].add_select_expression(
             SelectedExpression(
                 aliasify_column(condition.right.column),
                 Column(
                     aliasify_column(condition.right.column),
                     None,
                     condition.right.column,
                 ),
             ))
     return combined
示例#2
0
def test_query_data_source() -> None:
    """
    Tests using the Query as a data source
    """

    query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "col1",
                Column(alias="col1", table_name=None, column_name="col1")),
            SelectedExpression(
                "some_func",
                FunctionCall(
                    "some_func",
                    "f",
                    (Column(alias="col1", table_name=None,
                            column_name="col1"), ),
                ),
            ),
            SelectedExpression(
                None, Column(alias="col2", table_name=None,
                             column_name="col2")),
        ],
    )
    assert query.get_columns() == ColumnSet([("col1", Any()),
                                             ("some_func", Any()),
                                             ("_invalid_alias_2", Any())])
示例#3
0
 def process_functions(exp: Expression) -> Expression:
     if isinstance(exp, FunctionCall):
         if exp.function_name == "isHandled":
             self.validate_parameters(exp)
             return FunctionCall(
                 exp.alias,
                 "arrayExists",
                 (
                     Lambda(
                         None,
                         ("x", ),
                         binary_condition(
                             BooleanFunctions.OR,
                             FunctionCall(None, "isNull",
                                          (Argument(None, "x"), )),
                             binary_condition(
                                 ConditionFunctions.EQ,
                                 FunctionCall(
                                     None,
                                     "assumeNotNull",
                                     (Argument(None, "x"), ),
                                 ),
                                 Literal(None, 1),
                             ),
                         ),
                     ),
                     Column(None, None, self.__column),
                 ),
             )
         if exp.function_name == "notHandled":
             self.validate_parameters(exp)
             return FunctionCall(
                 exp.alias,
                 "arrayExists",
                 (
                     Lambda(
                         None,
                         ("x", ),
                         binary_condition(
                             BooleanFunctions.AND,
                             FunctionCall(None, "isNotNull",
                                          (Argument(None, "x"), )),
                             binary_condition(
                                 ConditionFunctions.EQ,
                                 FunctionCall(
                                     None,
                                     "assumeNotNull",
                                     (Argument(None, "x"), ),
                                 ),
                                 Literal(None, 0),
                             ),
                         ),
                     ),
                     Column(None, None, self.__column),
                 ),
             )
     return exp
示例#4
0
def test_not_handled_processor() -> None:
    columnset = ColumnSet([])
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result", FunctionCall("result", "notHandled", tuple(),),
            ),
        ],
    )

    expected = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "arrayExists",
                    (
                        Lambda(
                            None,
                            ("x",),
                            binary_condition(
                                BooleanFunctions.AND,
                                FunctionCall(None, "isNotNull", (Argument(None, "x"),)),
                                binary_condition(
                                    ConditionFunctions.EQ,
                                    FunctionCall(
                                        None, "assumeNotNull", (Argument(None, "x"),)
                                    ),
                                    Literal(None, 0),
                                ),
                            ),
                        ),
                        Column(None, None, "exception_stacks.mechanism_handled"),
                    ),
                ),
            ),
        ],
    )
    processor = handled_functions.HandledFunctionsProcessor(
        "exception_stacks.mechanism_handled", columnset
    )
    processor.process_query(unprocessed, HTTPRequestSettings())

    assert expected.get_selected_columns() == unprocessed.get_selected_columns()

    ret = unprocessed.get_selected_columns()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(arrayExists((x -> isNotNull(x) AND equals(assumeNotNull(x), 0)), exception_stacks.mechanism_handled) AS result)"
    )
示例#5
0
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    UUIDArrayColumnProcessor(set(["column1", "column2"
                                  ])).process_query(unprocessed_query,
                                                    HTTPRequestSettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column2",
            FunctionCall(
                None,
                "arrayMap",
                (
                    Lambda(
                        None,
                        ("x", ),
                        FunctionCall(
                            None,
                            "replaceAll",
                            (
                                FunctionCall(None, "toString",
                                             (Argument(None, "x"), )),
                                Literal(None, "-"),
                                Literal(None, ""),
                            ),
                        ),
                    ),
                    Column(None, None, "column2"),
                ),
            ),
        )
    ]

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
示例#6
0
def test_failure_rate_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None,
                               expression=Column(None, None, "column2")),
            SelectedExpression("perf", FunctionCall("perf", "failure_rate",
                                                    ())),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None,
                               expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    FunctionCall(
                        None,
                        "countIf",
                        (binary_condition(
                            None,
                            ConditionFunctions.NOT_IN,
                            Column(None, None, "transaction_status"),
                            FunctionCall(
                                None,
                                "tuple",
                                (
                                    Literal(alias=None, value=0),
                                    Literal(alias=None, value=1),
                                    Literal(alias=None, value=2),
                                ),
                            ),
                        ), ),
                    ),
                    count(),
                    "perf",
                ),
            ),
        ],
    )

    failure_rate_processor(ColumnSet([])).process_query(
        unprocessed, HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == (
        "(divide(countIf(notIn(transaction_status, tuple(0, 1, 2))), count()) AS perf)"
    )
示例#7
0
def test_failure_rate_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    FunctionCall(
                        None,
                        "countIf",
                        (
                            combine_and_conditions(
                                [
                                    binary_condition(
                                        None,
                                        ConditionFunctions.NEQ,
                                        Column(None, None, "transaction_status"),
                                        Literal(None, code),
                                    )
                                    for code in [0, 1, 2]
                                ]
                            ),
                        ),
                    ),
                    count(),
                    "perf",
                ),
            ),
        ],
    )

    failure_rate_processor(ColumnSet([])).process_query(
        unprocessed, HTTPRequestSettings()
    )
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(divide(countIf(notEquals(transaction_status, 0) AND notEquals(transaction_status, 1) AND notEquals(transaction_status, 2)), count()) AS perf)"
    )
示例#8
0
    def add_conditions(
        self,
        timestamp: datetime,
        offset: Optional[int],
        query: Union[CompositeQuery[Entity], Query],
    ) -> None:
        # TODO: Support composite queries with multiple entities.
        from_clause = query.get_from_clause()
        if not isinstance(from_clause, Entity):
            raise InvalidSubscriptionError("Only simple queries are supported")
        entity = get_entity(from_clause.key)
        required_timestamp_column = entity.required_time_column
        if required_timestamp_column is None:
            raise InvalidSubscriptionError(
                "Entity must have a timestamp column for subscriptions")

        conditions_to_add: List[Expression] = [
            binary_condition(
                ConditionFunctions.EQ,
                Column(None, None, "project_id"),
                Literal(None, self.project_id),
            ),
            binary_condition(
                ConditionFunctions.GTE,
                Column(None, None, required_timestamp_column),
                Literal(None, (timestamp - self.time_window)),
            ),
            binary_condition(
                ConditionFunctions.LT,
                Column(None, None, required_timestamp_column),
                Literal(None, timestamp),
            ),
        ]

        if offset is not None:
            conditions_to_add.append(
                binary_condition(
                    ConditionFunctions.LTE,
                    FunctionCall(
                        None,
                        "ifnull",
                        (Column(None, None, "offset"), Literal(None, 0)),
                    ),
                    Literal(None, offset),
                ))

        new_condition = combine_and_conditions(conditions_to_add)
        condition = query.get_condition()
        if condition:
            new_condition = binary_condition(BooleanFunctions.AND, condition,
                                             new_condition)

        query.set_ast_condition(new_condition)
示例#9
0
def test_first_level_conditions() -> None:
    c1 = binary_condition(
        ConditionFunctions.EQ,
        Column(None, "table1", "column1"),
        Literal(None, "test"),
    )
    c2 = binary_condition(
        ConditionFunctions.EQ,
        Column(None, "table2", "column2"),
        Literal(None, "test"),
    )
    c3 = binary_condition(
        ConditionFunctions.EQ,
        Column(None, "table3", "column3"),
        Literal(None, "test"),
    )

    cond = binary_condition(
        BooleanFunctions.AND,
        binary_condition(BooleanFunctions.AND, c1, c2),
        c3,
    )
    assert get_first_level_and_conditions(cond) == [c1, c2, c3]

    cond = binary_condition(
        BooleanFunctions.AND,
        FunctionCall(None, "equals",
                     (FunctionCall(None, "and", (c1, c2)), Literal(None, 1))),
        c3,
    )
    assert get_first_level_and_conditions(cond) == [c1, c2, c3]

    cond = binary_condition(
        BooleanFunctions.OR,
        binary_condition(BooleanFunctions.AND, c1, c2),
        c3,
    )
    assert get_first_level_or_conditions(cond) == [
        binary_condition(BooleanFunctions.AND, c1, c2),
        c3,
    ]

    cond = binary_condition(
        ConditionFunctions.EQ,
        binary_condition(BooleanFunctions.OR, c1,
                         binary_condition(BooleanFunctions.AND, c2, c3)),
        Literal(None, 1),
    )
    assert get_first_level_or_conditions(cond) == [
        c1,
        binary_condition(BooleanFunctions.AND, c2, c3),
    ]
def test_timeseries_format_expressions(
    granularity: int,
    condition: Optional[FunctionCall],
    exp_column: FunctionCall,
    exp_condition: Optional[FunctionCall],
    formatted_column: str,
    formatted_condition: str,
) -> None:
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("my_time", Column("my_time", None, "time")),
        ],
        condition=condition,
        groupby=[Column("my_time", None, "time")],
        granularity=granularity,
    )
    expected = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(exp_column.alias, exp_column),
        ],
        condition=exp_condition,
    )

    entity = TransactionsEntity()
    processors = entity.get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            processor.process_query(unprocessed, HTTPRequestSettings())

    assert expected.get_selected_columns() == unprocessed.get_selected_columns(
    )
    assert expected.get_condition() == unprocessed.get_condition()

    ret = unprocessed.get_selected_columns()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == formatted_column
    if condition:
        query_condition = unprocessed.get_condition()
        assert query_condition is not None
        ret = query_condition.accept(ClickhouseExpressionFormatter())
        assert formatted_condition == ret

    assert extract_granularity_from_query(unprocessed,
                                          "finish_ts") == granularity
示例#11
0
def test_aliased_cols() -> None:
    """
    Test iteration whan columns have aliases. This is the expression
    f2(t1.c2, f1(t1.c1, t1.c2 as a2)) as af1
    """
    column1 = Column(None, "c1", "t1")
    column2 = Column("a2", "c2", "t1")
    function_1 = FunctionCall(None, "f1", (column1, column2))
    column3 = Column(None, "c2", "t1")
    function_2 = FunctionCall("af1", "f2", (column3, function_1))

    expected = [column3, column1, column2, function_1, function_2]
    assert list(function_2) == expected
示例#12
0
def build_time_range(query_from: datetime, query_to: datetime) -> Expression:
    return build_and(
        FunctionCall(
            None,
            "greaterOrEquals",
            (Column(None, None, "timestamp"), Literal(None, query_from)),
        ),
        FunctionCall(
            None,
            "less",
            (Column(None, None, "timestamp"), Literal(None, query_to)),
        ),
    )
示例#13
0
def test_iterate_over_query() -> None:
    """
    Creates a query with the new AST and iterate over all expressions.
    """
    column1 = Column(None, "t1", "c1")
    column2 = Column(None, "t1", "c2")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2, ))

    condition = binary_condition(ConditionFunctions.EQ, column1,
                                 Literal(None, "1"))

    prewhere = binary_condition(ConditionFunctions.EQ, column2,
                                Literal(None, "2"))

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[SelectedExpression("alias", function_1)],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        prewhere=prewhere,
        having=None,
        order_by=[orderby],
    )

    expected_expressions = [
        # selected columns
        column1,
        column2,
        function_1,
        # condition
        column1,
        Literal(None, "1"),
        condition,
        # groupby
        column1,
        column2,
        function_1,
        # order by
        column2,
        function_2,
        # prewhere
        column2,
        Literal(None, "2"),
        prewhere,
    ]

    assert list(query.get_all_expressions()) == expected_expressions
示例#14
0
def test_column_function_translation() -> None:
    assert ColumnToFunction(
        None,
        "ip_address",
        "coalesce",
        (Column(None, None, "ip_address_v4"), Column(None, None, "ip_address_v6")),
    ).attempt_map(
        Column("ip_address", None, "ip_address"),
        SnubaClickhouseMappingTranslator(TranslationMappers()),
    ) == FunctionCall(
        "ip_address",
        "coalesce",
        (Column(None, None, "ip_address_v4"), Column(None, None, "ip_address_v6")),
    )
def test_timeseries_format_expressions(
    granularity: int,
    condition: Optional[FunctionCall],
    exp_column: FunctionCall,
    exp_condition: Optional[FunctionCall],
    formatted_column: str,
    formatted_condition: str,
) -> None:
    unprocessed = Query(
        {},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("my_time", Column("my_time", None, "time")),
        ],
        condition=condition,
        granularity=granularity,
    )
    expected = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(exp_column.alias, exp_column),
        ],
        condition=exp_condition,
    )

    entity = TransactionsEntity()
    processors = entity.get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            processor.process_query(unprocessed, HTTPRequestSettings())

    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())
    assert expected.get_condition_from_ast(
    ) == unprocessed.get_condition_from_ast()

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == formatted_column
    if condition:
        ret = unprocessed.get_condition_from_ast().accept(
            ClickhouseExpressionFormatter())
        assert formatted_condition == ret
示例#16
0
def test_expressions_from_basic_condition() -> None:
    """
    Iterates over the expressions in a basic condition
    f(t1.c1) = t1.c2
    """

    c = Column(None, "t1", "c1")
    f1 = FunctionCall(None, "f", (c,))
    c2 = Column(None, "t1", "c2")

    condition = binary_condition(ConditionFunctions.EQ, f1, c2)
    ret = list(condition)
    expected = [c, f1, c2, condition]

    assert ret == expected
示例#17
0
def test_classify_and_replace() -> None:
    condition = binary_condition(ConditionFunctions.EQ,
                                 Column(None, "ev", "project_id"),
                                 Literal(None, 1))
    assert _classify_single_column_condition(
        condition, {"ev": EntityKey.EVENTS}) == (
            QualifiedCol(EntityKey.EVENTS, "project_id"),
            "ev",
        )

    assert condition.transform(
        partial(_replace_col, "ev", "project_id", "gr",
                "project_id")) == binary_condition(
                    ConditionFunctions.EQ, Column(None, "gr", "project_id"),
                    Literal(None, 1))
def build_time_condition(time_columns: str, from_date: datetime,
                         to_date: datetime) -> Expression:
    return binary_condition(
        BooleanFunctions.AND,
        binary_condition(
            ConditionFunctions.GTE,
            Column(f"_snuba_{time_columns}", None, time_columns),
            Literal(None, from_date),
        ),
        binary_condition(
            ConditionFunctions.LT,
            Column(f"_snuba_{time_columns}", None, time_columns),
            Literal(None, to_date),
        ),
    )
示例#19
0
def test_format_clickhouse_specific_query() -> None:
    """
    Adds a few of the Clickhosue specific fields to the query.
    """

    query = Query(
        {
            "sample": 0.1,
            "totals": True,
            "limitby": (10, "environment")
        },
        TableSource("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column1", Column(None, None, "column1")),
            SelectedExpression("column2", Column(None, "table1", "column2")),
        ],
        condition=binary_condition(
            None,
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, "blabla"),
        ),
        groupby=[
            Column(None, None, "column1"),
            Column(None, "table1", "column2")
        ],
        having=binary_condition(
            None,
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, 123),
        ),
        order_by=[
            OrderBy(OrderByDirection.ASC, Column(None, None, "column1"))
        ],
        array_join=Column(None, None, "column1"),
    )

    query.set_final(True)
    query.set_offset(50)
    query.set_limit(100)

    request_settings = HTTPRequestSettings()
    clickhouse_query = AstSqlQuery(query, request_settings)

    expected = {
        "from": "FROM my_table FINAL SAMPLE 0.1",
        "group": "GROUP BY (column1, table1.column2) WITH TOTALS",
        "having": "HAVING eq(column1, 123)",
        "array_join": "ARRAY JOIN column1",
        "limit": "LIMIT 100 OFFSET 50",
        "limitby": "LIMIT 10 BY environment",
        "order": "ORDER BY column1 ASC",
        "select": "SELECT column1, table1.column2",
        "where": "WHERE eq(column1, 'blabla')",
    }

    assert clickhouse_query.sql_data() == expected
示例#20
0
 def __replace_with_hash(self, condition: Expression) -> Expression:
     match = self.__optimizable_pattern.match(condition)
     if (
         match is None
         or match.string(KEY_COL_MAPPING_PARAM) != f"{self.__column_name}.key"
     ):
         return condition
     rhs = match.expression("right_hand_side")
     assert isinstance(rhs, LiteralExpr)
     key = match.string(KEY_MAPPING_PARAM).translate(ESCAPE_TRANSLATION)
     return FunctionExpr(
         alias=condition.alias,
         function_name="has",
         parameters=(
             Column(
                 alias=None,
                 table_name=match.optional_string(TABLE_MAPPING_PARAM),
                 column_name=self.__hash_map_name,
             ),
             FunctionExpr(
                 alias=None,
                 function_name="cityHash64",
                 parameters=(LiteralExpr(None, f"{key}={rhs.value}"),),
             ),
         ),
     )
示例#21
0
def function_column(col_name: str, function_name: str) -> ColumnToFunction:
    return ColumnToFunction(
        None,
        col_name,
        function_name,
        (Column(None, None, col_name), ),
    )
示例#22
0
 def cast_column_to_nullable(exp: Expression) -> Expression:
     if isinstance(exp, Column):
         if exp.column_name in self.mismatched_null_columns:
             # depending on the order of the storage, this dictionary will contain
             # either the nullable or non-nullable version of the column. No matter
             # which one is in there, due to the mismatch on the merge table it needs to
             # be cast as nullable anyways
             mismatched_column = self.mismatched_null_columns[
                 exp.column_name]
             col_is_nullable = _col_is_nullable(mismatched_column)
             col_type = mismatched_column.type.for_schema()
             cast_str = col_type if col_is_nullable else f"Nullable({col_type})"
             return FunctionCall(
                 exp.alias,
                 "cast",
                 (
                     # move the alias up to the cast function
                     Column(
                         None,
                         table_name=exp.table_name,
                         column_name=exp.column_name,
                     ),
                     Literal(None, cast_str),
                 ),
             )
     return exp
示例#23
0
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None:
    state.set_config("max_group_ids_exclude", 5)
    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.ERRORS).process_query(
                                           query, HTTPQuerySettings())

    assert query.get_condition() == build_and(
        FunctionCall(
            None,
            "notIn",
            (
                FunctionCall(None, "assumeNotNull",
                             (Column(None, None, "group_id"), )),
                FunctionCall(
                    None,
                    "tuple",
                    (
                        Literal(None, 100),
                        Literal(None, 101),
                        Literal(None, 102),
                    ),
                ),
            ),
        ),
        build_in("project_id", [2]),
    )
    assert not query.get_from_clause().final
示例#24
0
def test_not_in_condition() -> None:
    not_in_condition = binary_condition(
        ConditionFunctions.NOT_IN,
        Column(None, None, "tags_key"),
        literals_tuple(None, [Literal(None, "t1"), Literal(None, "t2")]),
    )
    assert is_not_in_condition(not_in_condition)

    match = is_not_in_condition_pattern(ColumnPattern(None, String("tags_key"))).match(
        not_in_condition
    )
    assert match is not None
    assert match.expression("tuple") == literals_tuple(
        None, [Literal(None, "t1"), Literal(None, "t2")]
    )
    assert match.expression("lhs") == Column(None, None, "tags_key")
示例#25
0
def test_functions(
    default_validators: Mapping[str, FunctionCallValidator],
    entity_validators: Mapping[str, FunctionCallValidator],
    exception: Optional[Type[InvalidExpressionException]],
) -> None:
    fn_cached = functions.default_validators
    functions.default_validators = default_validators

    entity_return = MagicMock()
    entity_return.return_value = entity_validators
    events_entity = get_entity(EntityKey.EVENTS)
    cached = events_entity.get_function_call_validators
    setattr(events_entity, "get_function_call_validators", entity_return)
    data_source = QueryEntity(EntityKey.EVENTS, ColumnSet([]))

    expression = FunctionCall(
        None, "f", (Column(alias=None, table_name=None, column_name="col"), ))
    if exception is None:
        FunctionCallsValidator().validate(expression, data_source)
    else:
        with pytest.raises(exception):
            FunctionCallsValidator().validate(expression, data_source)

    # TODO: This should use fixture to do this
    setattr(events_entity, "get_function_call_validators", cached)
    functions.default_validators = fn_cached
示例#26
0
        def process_condition(exp: Expression) -> Expression:
            result = CONDITION_PATTERN.match(exp)
            if result is not None:
                key_column = result.optional_string(KEY_COL_MAPPING_PARAM)
                if key_column == "tags.key":
                    rhs = result.optional_string(KEY_MAPPING_PARAM)
                    table_name = result.optional_string(TABLE_MAPPING_PARAM)
                    replacement = FunctionCall(
                        exp.alias,
                        "has",
                        (Column(None, table_name, "tags.key"), Literal(None, rhs)),
                    )

                    assert isinstance(exp, FunctionCall)
                    if exp.function_name == ConditionFunctions.EQ:
                        replacement = FunctionCall(exp.alias, "not", (replacement,))

                    prev_value = query.get_experiment_value(
                        "empty-string-tag-condition"
                    )
                    if prev_value is not None:
                        return replacement if prev_value == "true" else exp

                    if settings.TESTING or random.random() < 0.5:
                        query.add_experiment("empty-string-tag-condition", "true")
                        return replacement
                    else:
                        query.add_experiment("empty-string-tag-condition", "false")

            return exp
示例#27
0
def test_mapping_complex_expression() -> None:
    """
    Maps over an Expression container:
    f0(t1.c1, fB(f())) -> f0(t1.c1, fB(f(f() as a)))
    """

    f5 = FunctionCall("a", "f", ())
    f4 = FunctionCall(None, "f", (f5, ))
    f3 = FunctionCall(None, "f", ())

    def replace_expr(e: Expression) -> Expression:
        if isinstance(e, FunctionCall) and e.function_name == "f":
            return f4
        return e

    c1 = Column(None, "c1", "t1")
    f2 = FunctionCall(None, "fB", (f3, ))
    f1 = FunctionCall(None, "f0", (c1, f2))

    # Only the external function is going to be replaced since, when map returns a new
    # column, we expect the func to have takern care of its own children.
    f1 = f1.transform(replace_expr)
    iterate = list(f1)
    expected = [
        c1,
        f5,
        f4,
        FunctionCall(None, "fB", (f4, )),
        FunctionCall(None, "f0", (c1, FunctionCall(None, "fB", (f4, )))),
    ]

    assert iterate == expected
示例#28
0
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None:
    state.set_config("max_group_ids_exclude", 5)
    set_project_exclude_groups(2, [100, 101, 102], ReplacerState.EVENTS)

    PostReplacementConsistencyEnforcer(
        "project_id", ReplacerState.EVENTS
    ).process_query(query, HTTPRequestSettings())

    assert query.get_condition_from_ast() == FunctionCall(
        None,
        BooleanFunctions.AND,
        (
            FunctionCall(
                None,
                "notIn",
                (
                    FunctionCall(
                        None, "assumeNotNull", (Column(None, None, "group_id"),)
                    ),
                    FunctionCall(
                        None,
                        "tuple",
                        (Literal(None, 100), Literal(None, 101), Literal(None, 102),),
                    ),
                ),
            ),
            build_in("project_id", [2]),
        ),
    )
    assert not query.get_from_clause().final
示例#29
0
 def do_post_processing(
     self,
     project_ids: Sequence[int],
     query: Query,
     request_settings: RequestSettings,
 ) -> None:
     if not request_settings.get_turbo():
         final, exclude_group_ids = get_projects_query_flags(
             project_ids, self.__replacer_state_name)
         if not final and exclude_group_ids:
             # If the number of groups to exclude exceeds our limit, the query
             # should just use final instead of the exclusion set.
             max_group_ids_exclude = get_config(
                 "max_group_ids_exclude",
                 settings.REPLACER_MAX_GROUP_IDS_TO_EXCLUDE)
             if len(exclude_group_ids) > max_group_ids_exclude:
                 query.set_final(True)
             else:
                 query.add_conditions([(["assumeNotNull", ["group_id"]],
                                        "NOT IN", exclude_group_ids)])
                 query.add_condition_to_ast(
                     not_in_condition(
                         None,
                         FunctionCall(None, "assumeNotNull",
                                      (Column(None, "group_id", None), )),
                         [Literal(None, p) for p in exclude_group_ids],
                     ))
         else:
             query.set_final(final)
示例#30
0
def test_processing_functions() -> None:
    in_condition = binary_condition(
        None,
        ConditionFunctions.IN,
        Column(None, "tag_keys", None),
        literals_tuple(
            None,
            [Literal(None, "t1"), Literal(None, "t2")]),
    )
    assert is_in_condition(in_condition)

    eq_condition = binary_condition(None, ConditionFunctions.EQ,
                                    Column(None, "test", None),
                                    Literal(None, "1"))
    assert is_binary_condition(eq_condition, ConditionFunctions.EQ)
    assert not is_binary_condition(eq_condition, ConditionFunctions.NEQ)