Exemplo n.º 1
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        max_prewhere_conditions: int = (self.__max_prewhere_conditions
                                        or settings.MAX_PREWHERE_CONDITIONS)
        prewhere_keys = self.__prewhere_candidates

        # HACK: If query has final, do not move any condition on a column in the
        # omit_if_final list to prewhere.
        # There is a bug in ClickHouse affecting queries with FINAL and PREWHERE
        # with Low Cardinality and Nullable columns.
        # https://github.com/ClickHouse/ClickHouse/issues/16171
        if query.get_from_clause().final and self.__omit_if_final:
            prewhere_keys = [
                key for key in prewhere_keys if key not in self.__omit_if_final
            ]

        if not prewhere_keys:
            return

        ast_condition = query.get_condition_from_ast()
        if ast_condition is None:
            return

        prewhere_candidates = [
            (get_columns_in_expression(cond), cond)
            for cond in get_first_level_and_conditions(ast_condition)
            if isinstance(cond, FunctionCall)
            and cond.function_name in ALLOWED_OPERATORS and any(
                col.column_name in prewhere_keys
                for col in get_columns_in_expression(cond))
        ]
        if not prewhere_candidates:
            return

        # Use the condition that has the highest priority (based on the
        # position of its columns in the prewhere keys list)
        sorted_candidates = sorted(
            [(
                min(
                    prewhere_keys.index(col.column_name)
                    for col in cols if col.column_name in prewhere_keys),
                cond,
            ) for cols, cond in prewhere_candidates],
            key=lambda priority_and_col: priority_and_col[0],
        )
        prewhere_conditions = [cond for _, cond in sorted_candidates
                               ][:max_prewhere_conditions]

        new_conditions = [
            cond for cond in get_first_level_and_conditions(ast_condition)
            if cond not in prewhere_conditions
        ]

        query.set_ast_condition(
            combine_and_conditions(new_conditions) if new_conditions else None)
        query.set_prewhere_ast_condition(
            combine_and_conditions(prewhere_conditions
                                   ) if prewhere_conditions else None)
Exemplo n.º 2
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        max_prewhere_conditions: int = (
            self.__max_prewhere_conditions or settings.MAX_PREWHERE_CONDITIONS
        )
        prewhere_keys = query.get_from_clause().prewhere_candidates
        if not prewhere_keys:
            return

        ast_condition = query.get_condition_from_ast()
        if ast_condition is None:
            return

        prewhere_candidates = [
            (get_columns_in_expression(cond), cond)
            for cond in get_first_level_and_conditions(ast_condition)
            if isinstance(cond, FunctionCall)
            and cond.function_name in ALLOWED_OPERATORS
            and any(
                col.column_name in prewhere_keys
                for col in get_columns_in_expression(cond)
            )
        ]
        if not prewhere_candidates:
            return

        # Use the condition that has the highest priority (based on the
        # position of its columns in the prewhere keys list)
        sorted_candidates = sorted(
            [
                (
                    min(
                        prewhere_keys.index(col.column_name)
                        for col in cols
                        if col.column_name in prewhere_keys
                    ),
                    cond,
                )
                for cols, cond in prewhere_candidates
            ],
            key=lambda priority_and_col: priority_and_col[0],
        )
        prewhere_conditions = [cond for _, cond in sorted_candidates][
            :max_prewhere_conditions
        ]

        new_conditions = [
            cond
            for cond in get_first_level_and_conditions(ast_condition)
            if cond not in prewhere_conditions
        ]

        query.set_ast_condition(
            combine_and_conditions(new_conditions) if new_conditions else None
        )
        query.set_prewhere_ast_condition(
            combine_and_conditions(prewhere_conditions) if prewhere_conditions else None
        )
Exemplo n.º 3
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        condition = query.get_condition_from_ast()
        if condition:
            query.set_ast_condition(condition.transform(self.process_condition))

        prewhere = query.get_prewhere_ast()
        if prewhere:
            query.set_prewhere_ast_condition(prewhere.transform(self.process_condition))

        if self.formatted:
            metrics.increment("query_processed", tags={"type": self.formatted})
Exemplo n.º 4
0
    def _update_conditions(self, query: Query,
                           prewhere_conditions: Sequence[Expression]) -> None:
        ast_condition = query.get_condition_from_ast()
        # This should never be None at this point, but for mypy this can be None.
        assert ast_condition is not None

        new_conditions = [
            cond for cond in get_first_level_and_conditions(ast_condition)
            if cond not in prewhere_conditions
        ]

        query.set_ast_condition(
            combine_and_conditions(new_conditions) if new_conditions else None)
        query.set_prewhere_ast_condition(
            combine_and_conditions(prewhere_conditions
                                   ) if prewhere_conditions else None)
Exemplo n.º 5
0
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        max_prewhere_conditions: int = (self.__max_prewhere_conditions
                                        or settings.MAX_PREWHERE_CONDITIONS)
        prewhere_keys = self.__prewhere_candidates

        # We remove the candidates that appear in a uniq or -If aggregations
        # because a query like `countIf(col=x) .. PREWHERE col=x` can make
        # the Clickhouse server crash.
        uniq_cols: Set[str] = set()
        expressions = query.get_all_expressions()
        for exp in expressions:
            if isinstance(exp,
                          FunctionCall) and (exp.function_name == "uniq" or
                                             exp.function_name.endswith("If")):
                columns = get_columns_in_expression(exp)
                for c in columns:
                    uniq_cols.add(c.column_name)

        for col in uniq_cols:
            if col in prewhere_keys:
                metrics.increment(
                    "uniq_col_in_prewhere_candidate",
                    tags={
                        "column": col,
                        "referrer": query_settings.referrer
                    },
                )

        prewhere_keys = [key for key in prewhere_keys if key not in uniq_cols]

        # In case the query is final we cannot simply add any candidate
        # condition to the prewhere.
        # Final is applied after prewhere, so there are cases where moving
        # conditions to the prewhere could exclude from the result sets
        # rows that would be merged under the `final` condition.
        # Example, rewriting the group_id on an unmerge. If the group_id
        # is in the prewhere, final wil fail at merging the rows.
        # HACK: If query has final, do not move any condition on a column in the
        # omit_if_final list to prewhere.
        # There is a bug in ClickHouse affecting queries with FINAL and PREWHERE
        # with Low Cardinality and Nullable columns.
        # https://github.com/ClickHouse/ClickHouse/issues/16171
        if query.get_from_clause().final and self.__omit_if_final:
            prewhere_keys = [
                key for key in prewhere_keys if key not in self.__omit_if_final
            ]

        if not prewhere_keys:
            return

        ast_condition = query.get_condition()
        if ast_condition is None:
            return

        prewhere_candidates = [
            (get_columns_in_expression(cond), cond)
            for cond in get_first_level_and_conditions(ast_condition)
            if isinstance(cond, FunctionCall)
            and cond.function_name in ALLOWED_OPERATORS and any(
                col.column_name in prewhere_keys
                for col in get_columns_in_expression(cond))
        ]
        if not prewhere_candidates:
            return

        # Use the condition that has the highest priority (based on the
        # position of its columns in the prewhere keys list)
        sorted_candidates = sorted(
            [(
                min(
                    prewhere_keys.index(col.column_name)
                    for col in cols if col.column_name in prewhere_keys),
                cond,
            ) for cols, cond in prewhere_candidates],
            key=lambda priority_and_col: priority_and_col[0],
        )
        prewhere_conditions = [cond for _, cond in sorted_candidates
                               ][:max_prewhere_conditions]

        new_conditions = [
            cond for cond in get_first_level_and_conditions(ast_condition)
            if cond not in prewhere_conditions
        ]

        query.set_ast_condition(
            combine_and_conditions(new_conditions) if new_conditions else None)
        query.set_prewhere_ast_condition(
            combine_and_conditions(prewhere_conditions
                                   ) if prewhere_conditions else None)
Exemplo n.º 6
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        max_prewhere_conditions: int = (self.__max_prewhere_conditions
                                        or settings.MAX_PREWHERE_CONDITIONS)
        prewhere_keys = self.__prewhere_candidates

        # In case the query is final we cannot simply add any candidate
        # condition to the prewhere.
        # Final is applied after prewhere, so there are cases where moving
        # conditions to the prewhere could exclude from the result sets
        # rows that would be merged under the `final` condition.
        # Example, rewriting the group_id on an unmerge. If the group_id
        # is in the prewhere, final wil fail at merging the rows.
        # HACK: If query has final, do not move any condition on a column in the
        # omit_if_final list to prewhere.
        # There is a bug in ClickHouse affecting queries with FINAL and PREWHERE
        # with Low Cardinality and Nullable columns.
        # https://github.com/ClickHouse/ClickHouse/issues/16171
        if query.get_from_clause().final and self.__omit_if_final:
            prewhere_keys = [
                key for key in prewhere_keys if key not in self.__omit_if_final
            ]

        if not prewhere_keys:
            return

        ast_condition = query.get_condition()
        if ast_condition is None:
            return

        prewhere_candidates = [
            (get_columns_in_expression(cond), cond)
            for cond in get_first_level_and_conditions(ast_condition)
            if isinstance(cond, FunctionCall)
            and cond.function_name in ALLOWED_OPERATORS and any(
                col.column_name in prewhere_keys
                for col in get_columns_in_expression(cond))
        ]
        if not prewhere_candidates:
            return

        # Use the condition that has the highest priority (based on the
        # position of its columns in the prewhere keys list)
        sorted_candidates = sorted(
            [(
                min(
                    prewhere_keys.index(col.column_name)
                    for col in cols if col.column_name in prewhere_keys),
                cond,
            ) for cols, cond in prewhere_candidates],
            key=lambda priority_and_col: priority_and_col[0],
        )
        prewhere_conditions = [cond for _, cond in sorted_candidates
                               ][:max_prewhere_conditions]

        new_conditions = [
            cond for cond in get_first_level_and_conditions(ast_condition)
            if cond not in prewhere_conditions
        ]

        query.set_ast_condition(
            combine_and_conditions(new_conditions) if new_conditions else None)
        query.set_prewhere_ast_condition(
            combine_and_conditions(prewhere_conditions
                                   ) if prewhere_conditions else None)