def __init__(self, uuid_columns: Set[str]) -> None: self.__unique_uuid_columns = uuid_columns self.__uuid_column_match = Or([String(u_col) for u_col in uuid_columns]) self.uuid_in_condition = FunctionCallMatch( Or((String(ConditionFunctions.IN), String(ConditionFunctions.NOT_IN))), ( self.formatted_uuid_pattern(), Param("params", FunctionCallMatch(String("tuple"), None)), ), ) self.uuid_condition = FunctionCallMatch( Or( [ String(op) for op in FUNCTION_TO_OPERATOR if op not in (ConditionFunctions.IN, ConditionFunctions.NOT_IN) ] ), ( Or( ( Param("literal_0", LiteralMatch(AnyOptionalString())), self.formatted_uuid_pattern("_0"), ) ), Or( ( Param("literal_1", LiteralMatch(AnyOptionalString())), self.formatted_uuid_pattern("_1"), ) ), ), ) self.formatted: Optional[str] = None
def extract_granularity_from_query(query: Query, column: str) -> Optional[int]: """ This extracts the `granularity` from the `groupby` statement of the query. The matches are essentially the reverse of `TimeSeriesProcessor.__group_time_function`. """ groupby = query.get_groupby() column_match = ColumnMatch(None, String(column)) fn_match = FunctionCallMatch( Param( "time_fn", Or( [ String("toStartOfHour"), String("toStartOfMinute"), String("toStartOfDay"), String("toDate"), ] ), ), (column_match,), with_optionals=True, ) expr_match = FunctionCallMatch( String("toDateTime"), ( FunctionCallMatch( String("multiply"), ( FunctionCallMatch( String("intDiv"), ( FunctionCallMatch(String("toUInt32"), (column_match,)), LiteralMatch(Param("granularity", Any(int))), ), ), LiteralMatch(Param("granularity", Any(int))), ), ), LiteralMatch(Any(str)), ), ) for top_expr in groupby: for expr in top_expr: result = fn_match.match(expr) if result is not None: return GRANULARITY_MAPPING[result.string("time_fn")] result = expr_match.match(expr) if result is not None: return result.integer("granularity") return None
class DefaultIfNullCurriedFunctionMapper(CurriedFunctionCallMapper): """ If a curried function is being called on a column that doesn't exist, or is being called on NULL, change the entire function to be NULL. """ function_match = FunctionCallMatch(StringMatch("identity"), (LiteralMatch(), )) def attempt_map( self, expression: CurriedFunctionCall, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[Union[CurriedFunctionCall, FunctionCall]]: internal_function = expression.internal_function.accept( children_translator) assert isinstance(internal_function, FunctionCall) # mypy parameters = tuple( p.accept(children_translator) for p in expression.parameters) for param in parameters: # All impossible columns that have been converted to NULL will be the identity function. # So we know that if a function has the identity function as a parameter, we can # collapse the entire expression. fmatch = self.function_match.match(param) if fmatch is not None: return identity(Literal(None, None), expression.alias) return None
class DefaultIfNullFunctionMapper(FunctionCallMapper): """ If a function is being called on a column that doesn't exist, or is being called on NULL, change the entire function to be NULL. """ function_match = FunctionCallMatch(StringMatch("identity"), (LiteralMatch(value=Any(type(None))), )) def attempt_map( self, expression: FunctionCall, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[FunctionCall]: # HACK: Quick fix to avoid this function dropping important conditions from the query logical_functions = {"and", "or", "xor"} if expression.function_name in logical_functions: return None parameters = tuple( p.accept(children_translator) for p in expression.parameters) for param in parameters: # All impossible columns will have been converted to the identity function. # So we know that if a function has the identity function as a parameter, we can # collapse the entire expression. fmatch = self.function_match.match(param) if fmatch is not None: return identity(Literal(None, None), expression.alias) return None
class DefaultIfNullCurriedFunctionMapper(CurriedFunctionCallMapper): """ If a curried function is being called on a column that doesn't exist, or is being called on NULL, change the entire function to be NULL. """ function_match = FunctionCallMatch(StringMatch("ifNull"), (LiteralMatch(), LiteralMatch())) def attempt_map( self, expression: CurriedFunctionCall, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[CurriedFunctionCall]: internal_function = expression.internal_function.accept( children_translator) assert isinstance(internal_function, FunctionCall) # mypy parameters = tuple( p.accept(children_translator) for p in expression.parameters) all_null = True for param in parameters: # Handle wrapped functions that have been converted to ifNull(NULL, NULL) fmatch = self.function_match.match(param) if fmatch is None: if isinstance(param, Literal): if param.value is not None: all_null = False break else: all_null = False break if all_null and len(parameters) > 0: # Currently curried function mappers require returning other curried functions. # So return this to keep the mapper happy. return CurriedFunctionCall( alias=expression.alias, internal_function=FunctionCall( None, f"{internal_function.function_name}OrNull", internal_function.parameters, ), parameters=tuple(Literal(None, None) for p in parameters), ) return None
def __init__(self, columns: Set[str]): self.columns = columns column_match = Or([String(col) for col in columns]) literal = Param("literal", LiteralMatch(AnyMatch(str))) operator = Param( "operator", Or( [ String(op) for op in FUNCTION_TO_OPERATOR if op not in (ConditionFunctions.IN, ConditionFunctions.NOT_IN) ] ), ) in_operators = Param( "operator", Or((String(ConditionFunctions.IN), String(ConditionFunctions.NOT_IN))), ) col = Param("col", ColumnMatch(None, column_match)) self.__condition_matcher = Or( [ FunctionCallMatch(operator, (literal, col)), FunctionCallMatch(operator, (col, literal)), FunctionCallMatch(Param("operator", String("has")), (col, literal)), ] ) self.__in_condition_matcher = FunctionCallMatch( in_operators, ( col, Param( "tuple", FunctionCallMatch(String("tuple"), all_parameters=LiteralMatch()), ), ), )
def build_match( col: str, ops: Sequence[str], param_type: Any ) -> Or[Expression]: # The IN condition has to be checked separately since each parameter # has to be checked individually. column_match = ColumnMatch(alias_match, StringMatch(col)) return Or( [ FunctionCallMatch( Or([StringMatch(op) for op in ops]), (column_match, LiteralMatch(AnyMatch(param_type))), ), FunctionCallMatch( StringMatch(ConditionFunctions.IN), ( column_match, FunctionCallMatch( Or([StringMatch("array"), StringMatch("tuple")]), all_parameters=LiteralMatch(AnyMatch(param_type)), ), ), ), ] )
class DefaultIfNullFunctionMapper(FunctionCallMapper): """ If a function is being called on a column that doesn't exist, or is being called on NULL, change the entire function to be NULL. """ function_match = FunctionCallMatch(StringMatch("ifNull"), (LiteralMatch(), LiteralMatch())) def attempt_map( self, expression: FunctionCall, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[FunctionCall]: parameters = tuple( p.accept(children_translator) for p in expression.parameters) all_null = True for param in parameters: # Handle wrapped functions that have been converted to ifNull(NULL, NULL) fmatch = self.function_match.match(param) if fmatch is None: if isinstance(param, Literal): if param.value is not None: all_null = False break else: all_null = False break if all_null and len(parameters) > 0: # Currently function mappers require returning other functions. So return this # to keep the mapper happy. return FunctionCall(expression.alias, "ifNull", (Literal(None, None), Literal(None, None))) return None
TRANSACTIONS_COLUMNS) track_bad_query(query, selected_entity, EVENTS_COLUMNS, TRANSACTIONS_COLUMNS) return selected_entity metrics = MetricsWrapper(environment.metrics, "api.discover") logger = logging.getLogger(__name__) EVENT_CONDITION = FunctionCallMatch( Param("function", Or([StringMatch(op) for op in BINARY_OPERATORS])), ( Or([ColumnMatch(None, StringMatch("type")), LiteralMatch(None)]), Param("event_type", Or([ColumnMatch(), LiteralMatch()])), ), ) TRANSACTION_FUNCTIONS = FunctionCallMatch( Or([StringMatch("apdex"), StringMatch("failure_rate")]), None) EVENT_FUNCTIONS = FunctionCallMatch( Or([StringMatch("isHandled"), StringMatch("notHandled")]), None) def match_query_to_entity( query: Query,
def __init__(self, time_group_columns: Mapping[str, str], time_parse_columns: Sequence[str]) -> None: # Column names that should be mapped to different columns. self.__time_replace_columns = time_group_columns # time_parse_columns is a list of columns that, if used in a condition, should be compared with datetimes. # The columns here might overlap with the columns that get replaced, so we have to search for transformed # columns. column_match = ColumnMatch( None, Param( "column_name", Or([String(tc) for tc in time_parse_columns]), ), ) self.condition_match = FunctionCallMatch( Or([ String(ConditionFunctions.GT), String(ConditionFunctions.GTE), String(ConditionFunctions.LT), String(ConditionFunctions.LTE), String(ConditionFunctions.EQ), String(ConditionFunctions.NEQ), ]), ( Or([ column_match, FunctionCallMatch( Or([ String("toStartOfHour"), String("toStartOfMinute"), String("toStartOfDay"), String("toDate"), ]), (column_match, ), with_optionals=True, ), FunctionCallMatch( String("toDateTime"), ( FunctionCallMatch( String("multiply"), ( FunctionCallMatch( String("intDiv"), ( FunctionCallMatch( String("toUInt32"), (column_match, ), ), LiteralMatch(Any(int)), ), ), LiteralMatch(Any(int)), ), ), LiteralMatch(Any(str)), ), ), ]), Param("literal", LiteralMatch(Any(str))), ), )
EVENTS = EntityKey.EVENTS TRANSACTIONS = EntityKey.TRANSACTIONS EVENTS_AND_TRANSACTIONS = "events_and_transactions" metrics = MetricsWrapper(environment.metrics, "api.discover") logger = logging.getLogger(__name__) EVENT_CONDITION = FunctionCallMatch( None, Param("function", Or([StringMatch(op) for op in BINARY_OPERATORS])), ( Or( [ ColumnMatch(None, None, StringMatch("type")), LiteralMatch(StringMatch("type"), None), ] ), Param("event_type", Or([ColumnMatch(), LiteralMatch()])), ), ) def match_query_to_table( query: Query, events_only_columns: ColumnSet, transactions_only_columns: ColumnSet ) -> Union[EntityKey, str]: # First check for a top level condition on the event type condition = query.get_condition_from_ast() event_types = set() if condition: top_level_condition = get_first_level_and_conditions(condition)
def __init__(self, columns: Set[str], optimize_ordering: bool = False): self.columns = columns self.optimize_ordering = optimize_ordering column_match = Or([String(col) for col in columns]) literal = Param("literal", LiteralMatch(AnyMatch(str))) ordering_operators = ( ConditionFunctions.GT, ConditionFunctions.GTE, ConditionFunctions.LT, ConditionFunctions.LTE, ) operator = Param( "operator", Or([ String(op) for op in ( ConditionFunctions.EQ, ConditionFunctions.NEQ, ConditionFunctions.IS_NULL, ConditionFunctions.IS_NOT_NULL, *(ordering_operators if self.optimize_ordering else ()), ) ]), ) unoptimizable_operator = Param( "operator", Or([ String(op) for op in ( ConditionFunctions.LIKE, ConditionFunctions.NOT_LIKE, *(() if self.optimize_ordering else ordering_operators), ) ]), ) in_operators = Param( "operator", Or((String(ConditionFunctions.IN), String(ConditionFunctions.NOT_IN))), ) col = Param("col", ColumnMatch(None, column_match)) self.__condition_matcher = Or([ FunctionCallMatch(operator, (literal, col)), FunctionCallMatch(operator, (col, literal)), FunctionCallMatch(Param("operator", String("has")), (col, literal)), ]) self.__in_condition_matcher = FunctionCallMatch( in_operators, ( col, Param( "tuple", FunctionCallMatch(String("tuple"), all_parameters=LiteralMatch()), ), ), ) self.__unoptimizable_condition_matcher = Or([ FunctionCallMatch(unoptimizable_operator, (literal, col)), FunctionCallMatch(unoptimizable_operator, (col, literal)), ])
if not isinstance(exp, Column): return exp parts = exp.column_name.split(".", 1) if len(parts) != 2 or parts[0] not in aliases: raise ParsingException( f"column {exp.column_name} must be qualified in a join query" ) return Column(exp.alias, parts[0], parts[1]) query.transform_expressions(transform) DATETIME_MATCH = FunctionCallMatch( StringMatch("toDateTime"), (Param("date_string", LiteralMatch(AnyMatch(str))),) ) def _parse_datetime_literals( query: Union[CompositeQuery[QueryEntity], LogicalQuery] ) -> None: def parse(exp: Expression) -> Expression: result = DATETIME_MATCH.match(exp) if result is not None: date_string = result.expression("date_string") assert isinstance(date_string, Literal) # mypy assert isinstance(date_string.value, str) # mypy return Literal(exp.alias, parse_datetime(date_string.value)) return exp
if not isinstance(exp, Column): return exp parts = exp.column_name.split(".", 1) if len(parts) != 2 or parts[0] not in aliases: raise ParsingException( f"column {exp.column_name} must be qualified in a join query") return Column(exp.alias, parts[0], parts[1]) query.transform_expressions(transform) DATETIME_MATCH = FunctionCallMatch( StringMatch("toDateTime"), (Param("date_string", LiteralMatch(AnyMatch(str))), )) def _parse_datetime_literals( query: Union[CompositeQuery[QueryEntity], LogicalQuery]) -> None: def parse(exp: Expression) -> Expression: result = DATETIME_MATCH.match(exp) if result is not None: date_string = result.expression("date_string") assert isinstance(date_string, Literal) # mypy assert isinstance(date_string.value, str) # mypy return Literal(exp.alias, parse_datetime(date_string.value)) return exp query.transform_expressions(parse)