示例#1
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        def process_condition(exp: Expression) -> Expression:
            result = CONDITION_PATTERN.match(exp)
            if result is not None:
                key_column = result.optional_string(KEY_COL_MAPPING_PARAM)
                if key_column == "tags.key":
                    rhs = result.optional_string(KEY_MAPPING_PARAM)
                    table_name = result.optional_string(TABLE_MAPPING_PARAM)
                    replacement = FunctionCall(
                        exp.alias,
                        "has",
                        (Column(None, table_name, "tags.key"), Literal(None, rhs)),
                    )

                    assert isinstance(exp, FunctionCall)
                    if exp.function_name == ConditionFunctions.EQ:
                        replacement = FunctionCall(exp.alias, "not", (replacement,))

                    prev_value = query.get_experiment_value(
                        "empty-string-tag-condition"
                    )
                    if prev_value is not None:
                        return replacement if prev_value == "true" else exp

                    if settings.TESTING or random.random() < 0.5:
                        query.add_experiment("empty-string-tag-condition", "true")
                        return replacement
                    else:
                        query.add_experiment("empty-string-tag-condition", "false")

            return exp

        condition = query.get_condition()
        if condition is not None:
            query.set_ast_condition(condition.transform(process_condition))
示例#2
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        if not get_config(self.__killswitch, 1):
            return

        cond_class = ConditionClass.IRRELEVANT
        condition = query.get_condition()
        if condition is not None:
            cond_class = self.__classify_combined_conditions(condition)
            if cond_class == ConditionClass.NOT_OPTIMIZABLE:
                return

        having_cond_class = ConditionClass.IRRELEVANT
        having_cond = query.get_having()
        if having_cond is not None:
            having_cond_class = self.__classify_combined_conditions(having_cond)
            if having_cond_class == ConditionClass.NOT_OPTIMIZABLE:
                return

        if not (
            cond_class == ConditionClass.OPTIMIZABLE
            or having_cond_class == ConditionClass.OPTIMIZABLE
        ):
            return

        metrics.increment("optimizable_query")

        if condition is not None:
            query.set_ast_condition(condition.transform(self.__replace_with_hash))
        if having_cond is not None:
            query.set_ast_having(having_cond.transform(self.__replace_with_hash))
示例#3
0
文件: split.py 项目: fpacifici/snuba
def _replace_ast_condition(
    query: Query, field: str, operator: str, new_operand: Expression
) -> None:
    """
    Replaces a condition in the top level AND boolean condition
    in the query WHERE clause.
    """

    def replace_condition(expression: Expression) -> Expression:
        match = FunctionCall(
            String(OPERATOR_TO_FUNCTION[operator]),
            (Param("column", Column(None, String(field))), AnyExpression()),
        ).match(expression)

        return (
            expression
            if match is None
            else replace(
                expression, parameters=(match.expression("column"), new_operand)
            )
        )

    condition = query.get_condition_from_ast()
    if condition is not None:
        query.set_ast_condition(
            combine_and_conditions(
                [
                    replace_condition(c)
                    for c in get_first_level_and_conditions(condition)
                ]
            )
        )
示例#4
0
    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
        if not get_config(self.__killswitch, 1):
            return
        condition, cond_class = self.__get_reduced_and_classified_query_clause(
            query.get_condition(), query
        )
        query.set_ast_condition(condition)
        if cond_class == ConditionClass.NOT_OPTIMIZABLE:
            return

        having_cond, having_cond_class = self.__get_reduced_and_classified_query_clause(
            query.get_having(), query
        )
        query.set_ast_having(having_cond)
        if having_cond_class == ConditionClass.NOT_OPTIMIZABLE:
            return

        if not (
            cond_class == ConditionClass.OPTIMIZABLE
            or having_cond_class == ConditionClass.OPTIMIZABLE
        ):
            return

        metrics.increment("optimizable_query")
        query.add_experiment("tags_hashmap_applied", 1)

        if condition is not None:
            query.set_ast_condition(condition.transform(self.__replace_with_hash))
        if having_cond is not None:
            query.set_ast_having(having_cond.transform(self.__replace_with_hash))
示例#5
0
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        def process_condition(exp: Expression) -> Expression:
            result = CONDITION_PATTERN.match(exp)
            if result is not None:
                key_column = result.optional_string(KEY_COL_MAPPING_PARAM)
                if key_column == "tags.key":
                    rhs = result.optional_string(KEY_MAPPING_PARAM)
                    table_name = result.optional_string(TABLE_MAPPING_PARAM)
                    replacement = FunctionCall(
                        exp.alias,
                        "has",
                        (Column(None, table_name,
                                "tags.key"), Literal(None, rhs)),
                    )

                    assert isinstance(exp, FunctionCall)
                    if exp.function_name == ConditionFunctions.EQ:
                        replacement = FunctionCall(exp.alias, "not",
                                                   (replacement, ))

                    return replacement

            return exp

        condition = query.get_condition()
        if condition is not None:
            query.set_ast_condition(condition.transform(process_condition))
示例#6
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        max_prewhere_conditions: int = (self.__max_prewhere_conditions
                                        or settings.MAX_PREWHERE_CONDITIONS)
        prewhere_keys = self.__prewhere_candidates

        # HACK: If query has final, do not move any condition on a column in the
        # omit_if_final list to prewhere.
        # There is a bug in ClickHouse affecting queries with FINAL and PREWHERE
        # with Low Cardinality and Nullable columns.
        # https://github.com/ClickHouse/ClickHouse/issues/16171
        if query.get_from_clause().final and self.__omit_if_final:
            prewhere_keys = [
                key for key in prewhere_keys if key not in self.__omit_if_final
            ]

        if not prewhere_keys:
            return

        ast_condition = query.get_condition_from_ast()
        if ast_condition is None:
            return

        prewhere_candidates = [
            (get_columns_in_expression(cond), cond)
            for cond in get_first_level_and_conditions(ast_condition)
            if isinstance(cond, FunctionCall)
            and cond.function_name in ALLOWED_OPERATORS and any(
                col.column_name in prewhere_keys
                for col in get_columns_in_expression(cond))
        ]
        if not prewhere_candidates:
            return

        # Use the condition that has the highest priority (based on the
        # position of its columns in the prewhere keys list)
        sorted_candidates = sorted(
            [(
                min(
                    prewhere_keys.index(col.column_name)
                    for col in cols if col.column_name in prewhere_keys),
                cond,
            ) for cols, cond in prewhere_candidates],
            key=lambda priority_and_col: priority_and_col[0],
        )
        prewhere_conditions = [cond for _, cond in sorted_candidates
                               ][:max_prewhere_conditions]

        new_conditions = [
            cond for cond in get_first_level_and_conditions(ast_condition)
            if cond not in prewhere_conditions
        ]

        query.set_ast_condition(
            combine_and_conditions(new_conditions) if new_conditions else None)
        query.set_prewhere_ast_condition(
            combine_and_conditions(prewhere_conditions
                                   ) if prewhere_conditions else None)
示例#7
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        max_prewhere_conditions: int = (
            self.__max_prewhere_conditions or settings.MAX_PREWHERE_CONDITIONS
        )
        prewhere_keys = query.get_from_clause().prewhere_candidates
        if not prewhere_keys:
            return

        ast_condition = query.get_condition_from_ast()
        if ast_condition is None:
            return

        prewhere_candidates = [
            (get_columns_in_expression(cond), cond)
            for cond in get_first_level_and_conditions(ast_condition)
            if isinstance(cond, FunctionCall)
            and cond.function_name in ALLOWED_OPERATORS
            and any(
                col.column_name in prewhere_keys
                for col in get_columns_in_expression(cond)
            )
        ]
        if not prewhere_candidates:
            return

        # Use the condition that has the highest priority (based on the
        # position of its columns in the prewhere keys list)
        sorted_candidates = sorted(
            [
                (
                    min(
                        prewhere_keys.index(col.column_name)
                        for col in cols
                        if col.column_name in prewhere_keys
                    ),
                    cond,
                )
                for cols, cond in prewhere_candidates
            ],
            key=lambda priority_and_col: priority_and_col[0],
        )
        prewhere_conditions = [cond for _, cond in sorted_candidates][
            :max_prewhere_conditions
        ]

        new_conditions = [
            cond
            for cond in get_first_level_and_conditions(ast_condition)
            if cond not in prewhere_conditions
        ]

        query.set_ast_condition(
            combine_and_conditions(new_conditions) if new_conditions else None
        )
        query.set_prewhere_ast_condition(
            combine_and_conditions(prewhere_conditions) if prewhere_conditions else None
        )
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        condition = query.get_condition_from_ast()
        if condition:
            query.set_ast_condition(condition.transform(self.process_condition))

        prewhere = query.get_prewhere_ast()
        if prewhere:
            query.set_prewhere_ast_condition(prewhere.transform(self.process_condition))

        if self.formatted:
            metrics.increment("query_processed", tags={"type": self.formatted})
示例#9
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        query.transform_expressions(
            self._process_expressions, skip_transform_condition=True
        )

        condition = query.get_condition()
        if condition is not None:
            processed = condition.transform(self.__process_optimizable_condition)
            if processed == condition:
                processed = condition.transform(self._process_expressions)

            query.set_ast_condition(processed)
示例#10
0
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        query.transform_expressions(self._process_expressions,
                                    skip_transform_condition=True)

        condition = query.get_condition()
        if condition is not None:
            if self.__contains_unoptimizable_condition(condition):
                processed = condition.transform(self._process_expressions)
            else:
                processed = condition.transform(
                    self.__process_optimizable_condition)
                if condition == processed:
                    processed = processed.transform(self._process_expressions)

            query.set_ast_condition(processed)
示例#11
0
    def _update_conditions(self, query: Query,
                           prewhere_conditions: Sequence[Expression]) -> None:
        ast_condition = query.get_condition_from_ast()
        # This should never be None at this point, but for mypy this can be None.
        assert ast_condition is not None

        new_conditions = [
            cond for cond in get_first_level_and_conditions(ast_condition)
            if cond not in prewhere_conditions
        ]

        query.set_ast_condition(
            combine_and_conditions(new_conditions) if new_conditions else None)
        query.set_prewhere_ast_condition(
            combine_and_conditions(prewhere_conditions
                                   ) if prewhere_conditions else None)
示例#12
0
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        max_prewhere_conditions: int = (self.__max_prewhere_conditions
                                        or settings.MAX_PREWHERE_CONDITIONS)
        prewhere_keys = self.__prewhere_candidates

        # We remove the candidates that appear in a uniq or -If aggregations
        # because a query like `countIf(col=x) .. PREWHERE col=x` can make
        # the Clickhouse server crash.
        uniq_cols: Set[str] = set()
        expressions = query.get_all_expressions()
        for exp in expressions:
            if isinstance(exp,
                          FunctionCall) and (exp.function_name == "uniq" or
                                             exp.function_name.endswith("If")):
                columns = get_columns_in_expression(exp)
                for c in columns:
                    uniq_cols.add(c.column_name)

        for col in uniq_cols:
            if col in prewhere_keys:
                metrics.increment(
                    "uniq_col_in_prewhere_candidate",
                    tags={
                        "column": col,
                        "referrer": query_settings.referrer
                    },
                )

        prewhere_keys = [key for key in prewhere_keys if key not in uniq_cols]

        # In case the query is final we cannot simply add any candidate
        # condition to the prewhere.
        # Final is applied after prewhere, so there are cases where moving
        # conditions to the prewhere could exclude from the result sets
        # rows that would be merged under the `final` condition.
        # Example, rewriting the group_id on an unmerge. If the group_id
        # is in the prewhere, final wil fail at merging the rows.
        # HACK: If query has final, do not move any condition on a column in the
        # omit_if_final list to prewhere.
        # There is a bug in ClickHouse affecting queries with FINAL and PREWHERE
        # with Low Cardinality and Nullable columns.
        # https://github.com/ClickHouse/ClickHouse/issues/16171
        if query.get_from_clause().final and self.__omit_if_final:
            prewhere_keys = [
                key for key in prewhere_keys if key not in self.__omit_if_final
            ]

        if not prewhere_keys:
            return

        ast_condition = query.get_condition()
        if ast_condition is None:
            return

        prewhere_candidates = [
            (get_columns_in_expression(cond), cond)
            for cond in get_first_level_and_conditions(ast_condition)
            if isinstance(cond, FunctionCall)
            and cond.function_name in ALLOWED_OPERATORS and any(
                col.column_name in prewhere_keys
                for col in get_columns_in_expression(cond))
        ]
        if not prewhere_candidates:
            return

        # Use the condition that has the highest priority (based on the
        # position of its columns in the prewhere keys list)
        sorted_candidates = sorted(
            [(
                min(
                    prewhere_keys.index(col.column_name)
                    for col in cols if col.column_name in prewhere_keys),
                cond,
            ) for cols, cond in prewhere_candidates],
            key=lambda priority_and_col: priority_and_col[0],
        )
        prewhere_conditions = [cond for _, cond in sorted_candidates
                               ][:max_prewhere_conditions]

        new_conditions = [
            cond for cond in get_first_level_and_conditions(ast_condition)
            if cond not in prewhere_conditions
        ]

        query.set_ast_condition(
            combine_and_conditions(new_conditions) if new_conditions else None)
        query.set_prewhere_ast_condition(
            combine_and_conditions(prewhere_conditions
                                   ) if prewhere_conditions else None)
示例#13
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        max_prewhere_conditions: int = (self.__max_prewhere_conditions
                                        or settings.MAX_PREWHERE_CONDITIONS)
        prewhere_keys = self.__prewhere_candidates

        # In case the query is final we cannot simply add any candidate
        # condition to the prewhere.
        # Final is applied after prewhere, so there are cases where moving
        # conditions to the prewhere could exclude from the result sets
        # rows that would be merged under the `final` condition.
        # Example, rewriting the group_id on an unmerge. If the group_id
        # is in the prewhere, final wil fail at merging the rows.
        # HACK: If query has final, do not move any condition on a column in the
        # omit_if_final list to prewhere.
        # There is a bug in ClickHouse affecting queries with FINAL and PREWHERE
        # with Low Cardinality and Nullable columns.
        # https://github.com/ClickHouse/ClickHouse/issues/16171
        if query.get_from_clause().final and self.__omit_if_final:
            prewhere_keys = [
                key for key in prewhere_keys if key not in self.__omit_if_final
            ]

        if not prewhere_keys:
            return

        ast_condition = query.get_condition()
        if ast_condition is None:
            return

        prewhere_candidates = [
            (get_columns_in_expression(cond), cond)
            for cond in get_first_level_and_conditions(ast_condition)
            if isinstance(cond, FunctionCall)
            and cond.function_name in ALLOWED_OPERATORS and any(
                col.column_name in prewhere_keys
                for col in get_columns_in_expression(cond))
        ]
        if not prewhere_candidates:
            return

        # Use the condition that has the highest priority (based on the
        # position of its columns in the prewhere keys list)
        sorted_candidates = sorted(
            [(
                min(
                    prewhere_keys.index(col.column_name)
                    for col in cols if col.column_name in prewhere_keys),
                cond,
            ) for cols, cond in prewhere_candidates],
            key=lambda priority_and_col: priority_and_col[0],
        )
        prewhere_conditions = [cond for _, cond in sorted_candidates
                               ][:max_prewhere_conditions]

        new_conditions = [
            cond for cond in get_first_level_and_conditions(ast_condition)
            if cond not in prewhere_conditions
        ]

        query.set_ast_condition(
            combine_and_conditions(new_conditions) if new_conditions else None)
        query.set_prewhere_ast_condition(
            combine_and_conditions(prewhere_conditions
                                   ) if prewhere_conditions else None)