def get_project_ids_in_condition( condition: Expression) -> Optional[Set[int]]: """ Extract project ids from an expression. Returns None if no project if condition is found. It returns an empty set of conflicting project_id conditions are found. """ match = FunctionCall( None, String(ConditionFunctions.EQ), ( Column(column_name=String(project_column)), Literal(value=Param("project_id", Any(int))), ), ).match(condition) if match is not None: return {match.integer("project_id")} match = is_in_condition_pattern( Column(column_name=String(project_column))).match(condition) if match is not None: projects = match.expression("tuple") assert isinstance(projects, FunctionCallExpr) return { l.value for l in projects.parameters if isinstance(l, LiteralExpr) and isinstance(l.value, int) } match = FunctionCall( None, Param( "operator", Or([String(BooleanFunctions.AND), String(BooleanFunctions.OR)]), ), (Param("lhs", AnyExpression()), Param("rhs", AnyExpression())), ).match(condition) if match is not None: lhs_projects = get_project_ids_in_condition( match.expression("lhs")) rhs_projects = get_project_ids_in_condition( match.expression("rhs")) if lhs_projects is None: return rhs_projects elif rhs_projects is None: return lhs_projects else: return (lhs_projects & rhs_projects if match.string("operator") == BooleanFunctions.AND else lhs_projects | rhs_projects) return None
def replace_condition(expression: Expression) -> Expression: match = FunctionCall( String(OPERATOR_TO_FUNCTION[operator]), (Param("column", Column(None, String(field))), AnyExpression()), ).match(expression) return (expression if match is None else replace( expression, parameters=(match.expression("column"), new_operand)))
def _get_mapping_keys_in_condition( condition: Expression, column_name: str ) -> Optional[Set[str]]: """ Finds the top level conditions that include filter based on the arrayJoin. This is meant to be used to find the keys the query is filtering the arrayJoin on. We can only apply the arrayFilter optimization to arrayJoin conditions that are not in OR with other columns. To simplify the problem, we only consider those conditions that are included in the first level of the query: [['tagskey' '=' 'a'],['col' '=' 'b'],['col2' '=' 'c']] works [[['tagskey' '=' 'a'], ['col2' '=' 'b']], ['tagskey' '=' 'c']] does not If we encounter an OR condition we return None, which means we cannot safely apply the optimization. Empty set means we did not find any suitable arrayJoin for optimization in this condition but that does not disqualify the whole query in the way the OR condition does. """ keys_found = set() conditions = get_first_level_and_conditions(condition) for c in conditions: if is_binary_condition(c, BooleanFunctions.OR): return None match = FunctionCall( None, String(ConditionFunctions.EQ), (array_join_pattern(column_name), Literal(None, Param("key", Any(str)))), ).match(c) if match is not None: keys_found.add(match.string("key")) match = is_in_condition_pattern(array_join_pattern(column_name)).match(c) if match is not None: function = match.expression("tuple") assert isinstance(function, FunctionCallExpr) keys_found |= { lit.value for lit in function.parameters if isinstance(lit, LiteralExpr) and isinstance(lit.value, str) } return keys_found
def extractor(condition: Expression) -> Set[Tuple[str, ...]]: match = FunctionCall( String(ConditionFunctions.EQ), (key_pattern, Param("tuple", FunctionCall(String("tuple"), None))), ).match(condition) if match is None: return set() function = match.expression("tuple") if ( not isinstance(function, FunctionCallExpr) or function.function_name != "tuple" ): return set() parameters = tuple( param.value for param in function.parameters if isinstance(param, LiteralExpr) and isinstance(param.value, str) ) return {parameters}