def process_query(self, query: Query, request_settings: RequestSettings) -> None: def process_condition(exp: Expression) -> Expression: result = CONDITION_PATTERN.match(exp) if result is not None: key_column = result.optional_string(KEY_COL_MAPPING_PARAM) if key_column == "tags.key": rhs = result.optional_string(KEY_MAPPING_PARAM) table_name = result.optional_string(TABLE_MAPPING_PARAM) replacement = FunctionCall( exp.alias, "has", (Column(None, table_name, "tags.key"), Literal(None, rhs)), ) assert isinstance(exp, FunctionCall) if exp.function_name == ConditionFunctions.EQ: replacement = FunctionCall(exp.alias, "not", (replacement,)) prev_value = query.get_experiment_value( "empty-string-tag-condition" ) if prev_value is not None: return replacement if prev_value == "true" else exp if settings.TESTING or random.random() < 0.5: query.add_experiment("empty-string-tag-condition", "true") return replacement else: query.add_experiment("empty-string-tag-condition", "false") return exp condition = query.get_condition() if condition is not None: query.set_ast_condition(condition.transform(process_condition))
def process_query(self, query: Query, request_settings: RequestSettings) -> None: if not get_config(self.__killswitch, 1): return cond_class = ConditionClass.IRRELEVANT condition = query.get_condition() if condition is not None: cond_class = self.__classify_combined_conditions(condition) if cond_class == ConditionClass.NOT_OPTIMIZABLE: return having_cond_class = ConditionClass.IRRELEVANT having_cond = query.get_having() if having_cond is not None: having_cond_class = self.__classify_combined_conditions(having_cond) if having_cond_class == ConditionClass.NOT_OPTIMIZABLE: return if not ( cond_class == ConditionClass.OPTIMIZABLE or having_cond_class == ConditionClass.OPTIMIZABLE ): return metrics.increment("optimizable_query") if condition is not None: query.set_ast_condition(condition.transform(self.__replace_with_hash)) if having_cond is not None: query.set_ast_having(having_cond.transform(self.__replace_with_hash))
def _replace_ast_condition( query: Query, field: str, operator: str, new_operand: Expression ) -> None: """ Replaces a condition in the top level AND boolean condition in the query WHERE clause. """ def replace_condition(expression: Expression) -> Expression: match = FunctionCall( String(OPERATOR_TO_FUNCTION[operator]), (Param("column", Column(None, String(field))), AnyExpression()), ).match(expression) return ( expression if match is None else replace( expression, parameters=(match.expression("column"), new_operand) ) ) condition = query.get_condition_from_ast() if condition is not None: query.set_ast_condition( combine_and_conditions( [ replace_condition(c) for c in get_first_level_and_conditions(condition) ] ) )
def process_query(self, query: Query, query_settings: QuerySettings) -> None: if not get_config(self.__killswitch, 1): return condition, cond_class = self.__get_reduced_and_classified_query_clause( query.get_condition(), query ) query.set_ast_condition(condition) if cond_class == ConditionClass.NOT_OPTIMIZABLE: return having_cond, having_cond_class = self.__get_reduced_and_classified_query_clause( query.get_having(), query ) query.set_ast_having(having_cond) if having_cond_class == ConditionClass.NOT_OPTIMIZABLE: return if not ( cond_class == ConditionClass.OPTIMIZABLE or having_cond_class == ConditionClass.OPTIMIZABLE ): return metrics.increment("optimizable_query") query.add_experiment("tags_hashmap_applied", 1) if condition is not None: query.set_ast_condition(condition.transform(self.__replace_with_hash)) if having_cond is not None: query.set_ast_having(having_cond.transform(self.__replace_with_hash))
def process_query(self, query: Query, query_settings: QuerySettings) -> None: def process_condition(exp: Expression) -> Expression: result = CONDITION_PATTERN.match(exp) if result is not None: key_column = result.optional_string(KEY_COL_MAPPING_PARAM) if key_column == "tags.key": rhs = result.optional_string(KEY_MAPPING_PARAM) table_name = result.optional_string(TABLE_MAPPING_PARAM) replacement = FunctionCall( exp.alias, "has", (Column(None, table_name, "tags.key"), Literal(None, rhs)), ) assert isinstance(exp, FunctionCall) if exp.function_name == ConditionFunctions.EQ: replacement = FunctionCall(exp.alias, "not", (replacement, )) return replacement return exp condition = query.get_condition() if condition is not None: query.set_ast_condition(condition.transform(process_condition))
def process_query(self, query: Query, request_settings: RequestSettings) -> None: max_prewhere_conditions: int = (self.__max_prewhere_conditions or settings.MAX_PREWHERE_CONDITIONS) prewhere_keys = self.__prewhere_candidates # HACK: If query has final, do not move any condition on a column in the # omit_if_final list to prewhere. # There is a bug in ClickHouse affecting queries with FINAL and PREWHERE # with Low Cardinality and Nullable columns. # https://github.com/ClickHouse/ClickHouse/issues/16171 if query.get_from_clause().final and self.__omit_if_final: prewhere_keys = [ key for key in prewhere_keys if key not in self.__omit_if_final ] if not prewhere_keys: return ast_condition = query.get_condition_from_ast() if ast_condition is None: return prewhere_candidates = [ (get_columns_in_expression(cond), cond) for cond in get_first_level_and_conditions(ast_condition) if isinstance(cond, FunctionCall) and cond.function_name in ALLOWED_OPERATORS and any( col.column_name in prewhere_keys for col in get_columns_in_expression(cond)) ] if not prewhere_candidates: return # Use the condition that has the highest priority (based on the # position of its columns in the prewhere keys list) sorted_candidates = sorted( [( min( prewhere_keys.index(col.column_name) for col in cols if col.column_name in prewhere_keys), cond, ) for cols, cond in prewhere_candidates], key=lambda priority_and_col: priority_and_col[0], ) prewhere_conditions = [cond for _, cond in sorted_candidates ][:max_prewhere_conditions] new_conditions = [ cond for cond in get_first_level_and_conditions(ast_condition) if cond not in prewhere_conditions ] query.set_ast_condition( combine_and_conditions(new_conditions) if new_conditions else None) query.set_prewhere_ast_condition( combine_and_conditions(prewhere_conditions ) if prewhere_conditions else None)
def process_query(self, query: Query, request_settings: RequestSettings) -> None: max_prewhere_conditions: int = ( self.__max_prewhere_conditions or settings.MAX_PREWHERE_CONDITIONS ) prewhere_keys = query.get_from_clause().prewhere_candidates if not prewhere_keys: return ast_condition = query.get_condition_from_ast() if ast_condition is None: return prewhere_candidates = [ (get_columns_in_expression(cond), cond) for cond in get_first_level_and_conditions(ast_condition) if isinstance(cond, FunctionCall) and cond.function_name in ALLOWED_OPERATORS and any( col.column_name in prewhere_keys for col in get_columns_in_expression(cond) ) ] if not prewhere_candidates: return # Use the condition that has the highest priority (based on the # position of its columns in the prewhere keys list) sorted_candidates = sorted( [ ( min( prewhere_keys.index(col.column_name) for col in cols if col.column_name in prewhere_keys ), cond, ) for cols, cond in prewhere_candidates ], key=lambda priority_and_col: priority_and_col[0], ) prewhere_conditions = [cond for _, cond in sorted_candidates][ :max_prewhere_conditions ] new_conditions = [ cond for cond in get_first_level_and_conditions(ast_condition) if cond not in prewhere_conditions ] query.set_ast_condition( combine_and_conditions(new_conditions) if new_conditions else None ) query.set_prewhere_ast_condition( combine_and_conditions(prewhere_conditions) if prewhere_conditions else None )
def process_query(self, query: Query, request_settings: RequestSettings) -> None: condition = query.get_condition_from_ast() if condition: query.set_ast_condition(condition.transform(self.process_condition)) prewhere = query.get_prewhere_ast() if prewhere: query.set_prewhere_ast_condition(prewhere.transform(self.process_condition)) if self.formatted: metrics.increment("query_processed", tags={"type": self.formatted})
def process_query(self, query: Query, request_settings: RequestSettings) -> None: query.transform_expressions( self._process_expressions, skip_transform_condition=True ) condition = query.get_condition() if condition is not None: processed = condition.transform(self.__process_optimizable_condition) if processed == condition: processed = condition.transform(self._process_expressions) query.set_ast_condition(processed)
def process_query(self, query: Query, query_settings: QuerySettings) -> None: query.transform_expressions(self._process_expressions, skip_transform_condition=True) condition = query.get_condition() if condition is not None: if self.__contains_unoptimizable_condition(condition): processed = condition.transform(self._process_expressions) else: processed = condition.transform( self.__process_optimizable_condition) if condition == processed: processed = processed.transform(self._process_expressions) query.set_ast_condition(processed)
def _update_conditions(self, query: Query, prewhere_conditions: Sequence[Expression]) -> None: ast_condition = query.get_condition_from_ast() # This should never be None at this point, but for mypy this can be None. assert ast_condition is not None new_conditions = [ cond for cond in get_first_level_and_conditions(ast_condition) if cond not in prewhere_conditions ] query.set_ast_condition( combine_and_conditions(new_conditions) if new_conditions else None) query.set_prewhere_ast_condition( combine_and_conditions(prewhere_conditions ) if prewhere_conditions else None)
def process_query(self, query: Query, query_settings: QuerySettings) -> None: max_prewhere_conditions: int = (self.__max_prewhere_conditions or settings.MAX_PREWHERE_CONDITIONS) prewhere_keys = self.__prewhere_candidates # We remove the candidates that appear in a uniq or -If aggregations # because a query like `countIf(col=x) .. PREWHERE col=x` can make # the Clickhouse server crash. uniq_cols: Set[str] = set() expressions = query.get_all_expressions() for exp in expressions: if isinstance(exp, FunctionCall) and (exp.function_name == "uniq" or exp.function_name.endswith("If")): columns = get_columns_in_expression(exp) for c in columns: uniq_cols.add(c.column_name) for col in uniq_cols: if col in prewhere_keys: metrics.increment( "uniq_col_in_prewhere_candidate", tags={ "column": col, "referrer": query_settings.referrer }, ) prewhere_keys = [key for key in prewhere_keys if key not in uniq_cols] # In case the query is final we cannot simply add any candidate # condition to the prewhere. # Final is applied after prewhere, so there are cases where moving # conditions to the prewhere could exclude from the result sets # rows that would be merged under the `final` condition. # Example, rewriting the group_id on an unmerge. If the group_id # is in the prewhere, final wil fail at merging the rows. # HACK: If query has final, do not move any condition on a column in the # omit_if_final list to prewhere. # There is a bug in ClickHouse affecting queries with FINAL and PREWHERE # with Low Cardinality and Nullable columns. # https://github.com/ClickHouse/ClickHouse/issues/16171 if query.get_from_clause().final and self.__omit_if_final: prewhere_keys = [ key for key in prewhere_keys if key not in self.__omit_if_final ] if not prewhere_keys: return ast_condition = query.get_condition() if ast_condition is None: return prewhere_candidates = [ (get_columns_in_expression(cond), cond) for cond in get_first_level_and_conditions(ast_condition) if isinstance(cond, FunctionCall) and cond.function_name in ALLOWED_OPERATORS and any( col.column_name in prewhere_keys for col in get_columns_in_expression(cond)) ] if not prewhere_candidates: return # Use the condition that has the highest priority (based on the # position of its columns in the prewhere keys list) sorted_candidates = sorted( [( min( prewhere_keys.index(col.column_name) for col in cols if col.column_name in prewhere_keys), cond, ) for cols, cond in prewhere_candidates], key=lambda priority_and_col: priority_and_col[0], ) prewhere_conditions = [cond for _, cond in sorted_candidates ][:max_prewhere_conditions] new_conditions = [ cond for cond in get_first_level_and_conditions(ast_condition) if cond not in prewhere_conditions ] query.set_ast_condition( combine_and_conditions(new_conditions) if new_conditions else None) query.set_prewhere_ast_condition( combine_and_conditions(prewhere_conditions ) if prewhere_conditions else None)
def process_query(self, query: Query, request_settings: RequestSettings) -> None: max_prewhere_conditions: int = (self.__max_prewhere_conditions or settings.MAX_PREWHERE_CONDITIONS) prewhere_keys = self.__prewhere_candidates # In case the query is final we cannot simply add any candidate # condition to the prewhere. # Final is applied after prewhere, so there are cases where moving # conditions to the prewhere could exclude from the result sets # rows that would be merged under the `final` condition. # Example, rewriting the group_id on an unmerge. If the group_id # is in the prewhere, final wil fail at merging the rows. # HACK: If query has final, do not move any condition on a column in the # omit_if_final list to prewhere. # There is a bug in ClickHouse affecting queries with FINAL and PREWHERE # with Low Cardinality and Nullable columns. # https://github.com/ClickHouse/ClickHouse/issues/16171 if query.get_from_clause().final and self.__omit_if_final: prewhere_keys = [ key for key in prewhere_keys if key not in self.__omit_if_final ] if not prewhere_keys: return ast_condition = query.get_condition() if ast_condition is None: return prewhere_candidates = [ (get_columns_in_expression(cond), cond) for cond in get_first_level_and_conditions(ast_condition) if isinstance(cond, FunctionCall) and cond.function_name in ALLOWED_OPERATORS and any( col.column_name in prewhere_keys for col in get_columns_in_expression(cond)) ] if not prewhere_candidates: return # Use the condition that has the highest priority (based on the # position of its columns in the prewhere keys list) sorted_candidates = sorted( [( min( prewhere_keys.index(col.column_name) for col in cols if col.column_name in prewhere_keys), cond, ) for cols, cond in prewhere_candidates], key=lambda priority_and_col: priority_and_col[0], ) prewhere_conditions = [cond for _, cond in sorted_candidates ][:max_prewhere_conditions] new_conditions = [ cond for cond in get_first_level_and_conditions(ast_condition) if cond not in prewhere_conditions ] query.set_ast_condition( combine_and_conditions(new_conditions) if new_conditions else None) query.set_prewhere_ast_condition( combine_and_conditions(prewhere_conditions ) if prewhere_conditions else None)