def test_nested_parenthesis_boolean(): actual = WhereClauseParser.parse( to_tokens("(col = 1 and col2=4) or (col = 2 and (col =6 or col=9))")) expected = WhereClause( BooleanCondition( "or", Parenthesis( BooleanCondition( "and", Condition(Column("col"), "=", Integer(1)), Condition(Column("col2"), "=", Integer(4)), )), Parenthesis( BooleanCondition( "and", Condition(Column("col"), "=", Integer(2)), Parenthesis( BooleanCondition( "or", Condition(Column("col"), "=", Integer(6)), Condition(Column("col"), "=", Integer(9)), )), )), )) assert actual == expected
def test_limit_parentheses(): actual = SQLStatementParser.parse(to_tokens("SELECT 1 LIMIT (((3)))")) expected = SelectStatement( expressions=[Integer(1)], limit_clause=LimitClause( limit_all=False, expression=Parenthesis(Parenthesis(Parenthesis(Integer(3)))), ), semi_colon=False, ) assert actual == expected
def test_consecutive_parenthesis(): actual = ExpressionParser.parse(to_tokens("((col+1) = 3 AND col2=4)")) expected = Parenthesis( BooleanCondition( "and", Condition(Parenthesis(Addition(Column("col"), Integer(1))), "=", Integer(3)), Condition( Column("col2"), "=", Integer(4), ), )) assert actual == expected
def test_parenthesis_boolean_where_clause(): actual = WhereClauseParser.parse( to_tokens("(col = 3 and field = 5) or (f2 or f3)")) expected = WhereClause( BooleanCondition( "or", Parenthesis( BooleanCondition( "and", Condition(Column("col"), "=", Integer(3)), Condition(Column("field"), "=", Integer(5)), )), Parenthesis(BooleanCondition("or", Column("f2"), Column("f3"))), )) assert actual == expected
def test_subquery(): actual = SQLStatementParser.parse( to_tokens( "SELECT col" " from (select count(*) col" " from table group by x) WHERE col > 10 ORDER BY col DESC;" ) ) expected = SelectStatement( expressions=[Column("col")], from_statement=Parenthesis( SelectStatement( expressions=[ Alias(CountFunctionCall(Column("*")), "col", with_as=False) ], from_statement=Table(Column("table")), group_by_clause=GroupByClause(Column("x")), semi_colon=False, ) ), where_clause=WhereClause(Condition(Column("col"), ">", Integer(10))), order_by_clause=OrderByClause(OrderByItem(Column("col"), has_desc=True)), semi_colon=True, ) assert actual == expected
def test_from_subquery(): actual = FromStatementParser.parse( to_tokens("(select field from table_stmt)")) expected = Parenthesis( SelectStatement( expressions=[Column("field")], from_statement=Table("table_stmt"), semi_colon=False, )) assert actual == expected
def test_multiple_args_boolean_condition(): actual = WhereClauseParser.parse( to_tokens("(col = 1 and col2=4 and col3=4)")) expected = WhereClause( Parenthesis( BooleanCondition( "and", Condition(Column("col"), "=", Integer(1)), BooleanCondition( "and", Condition(Column("col2"), "=", Integer(4)), Condition(Column("col3"), "=", Integer(4)), ), ), )) assert actual == expected
def test_chained_columns_with_arithmetic_operator(): actual, _ = ExpressionParser.parse( to_tokens("IF((a.field + b.field) = 200, 'true', 'false') fa")) expected = Alias( FunctionCall( "IF", *[ Condition( Parenthesis( ArithmaticOperator( "+", ChainedColumns(Column("a"), Column("field")), ChainedColumns(Column("b"), Column("field")), )), "=", Integer(200), ), String("true", quotes="'"), String("false", quotes="'"), ]), with_as=False, alias="fa", ) assert actual == expected
def parse( tokens, is_right_hand=False, can_be_type=False, can_alias=True, until_one_of=None, first_token=None, is_chained_columns=False, ) -> Tuple[Expression, Any]: until_one_of = until_one_of or [] main_token = first_token or next(tokens) next_token = None if main_token in String.QUOTES: expression = StringParser.parse(tokens, main_token) elif main_token.isdigit(): expression = Integer(main_token) elif main_token.replace(".", "").isdigit(): expression = Float(main_token) elif lower(main_token) in Boolean.BOOLEAN_VALUES: expression = Boolean(main_token) elif lower(main_token) in Null.VALUES: expression = Null() elif lower(main_token) == Negation.PREDICATE: rest_expression, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of, ) expression = Negation(rest_expression) elif main_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Parenthesis(*arguments) elif main_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) assert next_token == "]", next_token arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Array(*arguments) next_token = next(tokens, None) elif lower(main_token) == "case": argument_tokens, next_token = get_tokens_until_one_of( tokens, ["end"]) assert lower(next_token) == "end" next_token = next(tokens, None) expression = CaseParser.parse(iter(argument_tokens)) elif lower(main_token) == "select": argument_tokens, next_token = get_tokens_until_one_of(tokens, []) next_token = next(tokens, None) expression = SelectStatementParser.parse(iter(argument_tokens)) else: expression = None if next_token is None: next_token = next(tokens, None) # Expressions that need the next_token to be read if expression is None: if next_token is not None and next_token == "(": if lower(main_token) == "cast": column_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["as"]) column, _ = ExpressionParser.parse( iter(column_tokens), is_right_hand=True, until_one_of=until_one_of, ) assert lower(next_token) == "as", next_token next_token = next(tokens) cast_type = Type(next_token) expression = CastFunctionCall(column, cast_type) next_token = next(tokens) assert lower(next_token) == ")", next_token elif lower(main_token) == "array_agg": next_token = next(tokens) if lower(next_token) == "distinct": distinct = True first_token = None else: distinct = False first_token = next_token column_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=[ ")", "ignore", "respects", "order", "limit" ], first_token=first_token, ) column, _ = ExpressionParser.parse( iter(column_tokens), until_one_of=until_one_of) ignore_nulls = respect_nulls = False if lower(next_token) == "ignore": next_token = next(tokens) assert lower(next_token) == "nulls" ignore_nulls = True next_token = next(tokens) elif lower(next_token) == "respect": next_token = next(tokens) assert lower(next_token) == "nulls" respect_nulls = True next_token = next(tokens) if lower(next_token) == "order": next_token = next(tokens) assert lower(next_token) == "by" expression_tokens, next_token = get_tokens_until_one_of( tokens, ["limit", ")"]) order_bys = OrderByParser.parse( iter(expression_tokens)) else: order_bys = None limit = None if lower(next_token) == "limit": next_token = next(tokens) limit = int(next_token) next_token = next(tokens) assert lower(next_token) == ")", next_token expression = ArrayAggFunctionCall( column=column, distinct=distinct, ignore_nulls=ignore_nulls, respect_nulls=respect_nulls, order_bys=order_bys, limit=limit, ) elif lower(main_token) == "count": next_token = next(tokens) if lower(next_token) == "distinct": distinct = True first_token = None else: distinct = False first_token = next_token argument_tokens = get_tokens_until_closing_parenthesis( tokens, first_token=first_token) arguments = ExpressionListParser.parse( iter(argument_tokens)) expression = CountFunctionCall(*arguments, distinct=distinct) else: argument_tokens = get_tokens_until_closing_parenthesis( tokens) arguments_can_be_type = can_be_type or any( lower(t) == "timestamp_trunc" for t in argument_tokens) arguments = ExpressionListParser.parse( iter(argument_tokens), can_be_type=arguments_can_be_type) expression = FunctionCall(main_token, *arguments) next_token = next(tokens, None) if next_token and lower(next_token) == "filter": next_next_token = next(tokens) assert next_next_token == "(", next_next_token argument_tokens = get_tokens_until_closing_parenthesis( tokens) assert lower( argument_tokens[0]) == "where", argument_tokens filter_condition, next_token = ExpressionParser.parse( iter(argument_tokens[1:]), can_alias=False, ) expression = FilteredFunctionCall(expression, filter_condition) next_token = next(tokens, None) elif (next_token is not None and lower(main_token) in DatePartExtraction.PARTS and lower(next_token) == "from"): rest_expression, next_token = ExpressionParser.parse( tokens, until_one_of=until_one_of) expression = DatePartExtraction(main_token, rest_expression) elif lower(main_token) in Type.VALUES and can_be_type: expression = Type(main_token) elif next_token is not None and next_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Index( Column(main_token), arguments) # left item will not always be a column next_token = next(tokens, None) elif next_token is not None and main_token == "-" and next_token.isdigit( ): expression = Integer(-int(next_token)) next_token = next(tokens, None) elif (next_token is not None and main_token == "-" and next_token.replace(".", "").isdigit()): expression = Float(-float(next_token)) next_token = next(tokens, None) elif (lower(main_token) in String.PREFIXES and next_token is not None and lower(next_token) in String.QUOTES): expression = StringParser.parse(tokens, start_quote=next_token, prefix=main_token) else: expression = Column(main_token) if lower(next_token) == "over": opening_parenthesis = next(tokens, None) if opening_parenthesis != "(": raise ParsingError("expected '('") argument_tokens = iter( get_tokens_until_closing_parenthesis(tokens)) argument_next_token = next(argument_tokens, None) if lower(argument_next_token) == "partition": argument_next_token = next(argument_tokens, None) if not argument_next_token or lower( argument_next_token) != "by": raise ParsingError("Missing BY after PARTITION") expression_tokens, argument_next_token = get_tokens_until_one_of( argument_tokens, ["order", "rows", "range"]) partition_by = ExpressionListParser.parse( iter(expression_tokens)) else: partition_by = None if lower(argument_next_token) == "order": argument_next_token = next(argument_tokens, None) if not argument_next_token or lower( argument_next_token) != "by": raise ParsingError("Missing BY after ORDER") expression_tokens, argument_next_token = get_tokens_until_one_of( argument_tokens, ["rows", "range"]) order_by = OrderByParser.parse(iter(expression_tokens)) else: order_by = None if lower(argument_next_token) in ("rows", "range"): rows_range = argument_next_token expression_tokens, _ = get_tokens_until_one_of( argument_tokens, []) frame_clause: Optional[WindowFrameClause] = WindowFrameClause( rows_range, " ".join(expression_tokens)) else: frame_clause = None expression = AnalyticsClause( expression, partition_by=partition_by, order_by=order_by, frame_clause=frame_clause, ) next_token = next(tokens, None) while next_token == ".": right_hand, next_token = ExpressionParser.parse( tokens, until_one_of=until_one_of, is_chained_columns=True) expression = ChainedColumns(expression, right_hand) if next_token and next_token in ("+", "-", "*", "/") and not is_chained_columns: left_hand = expression symbol = next_token right_hand, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of, ) expression = ArithmaticOperator(symbol, left_hand, right_hand) while next_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Index(expression, arguments) next_token = next(tokens, None) if is_right_hand or is_chained_columns: return expression, next_token if lower(next_token) in Condition.PREDICATES: first_token = None symbol = next_token if lower(next_token) == "is": next_next_token = next(tokens) if lower(next_next_token) == "not": symbol = "is not" else: first_token = next_next_token elif lower(next_token) == "not": next_next_token = next(tokens) if lower(next_next_token) == "in": symbol = "not in" else: first_token = next_next_token right_hand, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of, first_token=first_token, ) expression = Condition(expression, symbol, right_hand) elif lower(next_token) == "between": symbol = next_token right_hand_left, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of) if lower(next_token) != "and": raise ParsingError("expected AND") right_hand_right, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of) right_hand = BooleanCondition( "and", right_hand_left, right_hand_right, ) expression = Condition(expression, symbol, right_hand) elif next_token in BitwiseOperation.OPERATORS: operator = next_token right_hand, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of) expression = BitwiseOperation(expression, operator, right_hand) if lower(next_token) in BooleanCondition.PREDICATES: left_hand = expression symbol = next_token right_hand, next_token = ExpressionParser.parse( tokens, until_one_of=until_one_of) right_alias = None if isinstance(right_hand, Alias): right_alias = right_hand right_hand = right_hand.expression expression = BooleanCondition(symbol, left_hand, right_hand) if right_alias is not None: right_alias.expression = expression expression = right_alias if lower(next_token) == "except": opening_parenthesis = next(tokens, None) if opening_parenthesis != "(": raise ParsingError("expected '('") argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = ExceptClause(expression, arguments) next_token = next(tokens, None) if (next_token is not None and next_token != ")" and not (next_token in String.QUOTES and isinstance(expression, String)) and next_token != ";" and lower(next_token) not in until_one_of and can_alias): if lower(next_token) == "as": with_as = True alias, _ = ExpressionParser.parse(tokens, is_right_hand=True, until_one_of=until_one_of) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) return Alias(expression, alias, with_as), next(tokens, None) return expression, next_token
def parse(tokens): next_token = next(tokens) if next_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) try: argument = SQLStatementParser.parse(iter(argument_tokens)) except ParsingError: argument = FromStatementParser.parse(iter(argument_tokens)) expression = Parenthesis(argument) next_token = next(tokens, None) elif next_token in String.QUOTES: table_name = StringParser.parse(tokens, next_token) next_token = next(tokens, None) while next_token == ".": right_hand, next_token = ExpressionParser.parse( tokens, is_chained_columns=True, ) table_name = ChainedColumns(table_name, right_hand) expression = Table(table_name) elif next_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) table, _ = ExpressionParser.parse(iter(argument_tokens)) expression = Table(table, in_square_brackets=True) assert next_token == "]", next_token next_token = next(tokens, None) else: if lower(next_token) == "unnest": expression, next_token = UnnestParser.parse(tokens) else: argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=Join.VALUES + CombinedQueries.SET_OPERATORS, first_token=next_token, ) expression = Table( ExpressionParser.parse(iter(argument_tokens))[0]) if (next_token is not None and lower(next_token) not in Join.VALUES and lower(next_token) not in CombinedQueries.SET_OPERATORS): if lower(next_token) == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while lower(next_token) in CombinedQueries.SET_OPERATORS: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token) set_operator = SetOperatorTypeParser.parse(iter(expression_tokens)) expression_tokens, next_token = get_tokens_until_one_of( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token, ) if lower(expression_tokens[0]) == "select": right_expr = SelectStatementParser.parse( iter(expression_tokens[1:])) else: right_expr = FromStatementParser.parse(iter(expression_tokens)) expression = CombinedQueries(set_operator, left_expr, right_expr) if (next_token is not None and lower(next_token) not in CombinedQueries.SET_OPERATORS): if lower(next_token) == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while lower(next_token) in Join.VALUES: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, Join.VALUES, first_token=next_token) join_type = JoinTypeParser.parse(iter(expression_tokens)) if join_type in ("CROSS JOIN", ","): expression_tokens, next_token = [next_token ] + list(tokens), None else: expression_tokens, next_token = get_tokens_until_one_of( tokens, ("on", "using"), first_token=next_token) right_expr = FromStatementParser.parse(iter(expression_tokens)) on = None using = None on_or_using = next_token expression_tokens, next_token = get_tokens_until_one_of( tokens, Join.VALUES, ) if lower(on_or_using) == "on": expression, _ = ExpressionParser.parse(iter(expression_tokens)) on = OnClause(expression) elif lower(on_or_using) == "using": expressions, _ = ExpressionParser.parse( iter(expression_tokens)) using = UsingClause(expressions) expression = Join(join_type, left_expr, right_expr, on=on, using=using) if next_token is not None and lower(next_token) not in Join.VALUES: if lower(next_token) == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) expression = Alias(expression, alias, with_as) next_token = next(tokens, None) return expression
def parse(tokens, is_right_hand=False): main_token = next(tokens) next_token = None if main_token in String.QUOTES: expression = StringParser.parse(tokens, main_token) elif main_token.isdigit(): expression = Integer(main_token) elif main_token in Boolean.BOOLEAN_VALUES: expression = Boolean(main_token) elif main_token in Null.VALUES: expression = Null() elif main_token in Type.VALUES: expression = Type(main_token) elif main_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Parenthesis(*arguments) elif main_token == "case": argument_tokens, next_token = get_tokens_until_one_of( tokens, ["end"]) assert next_token == "end" next_token = next(tokens, None) expression = CaseParser.parse(iter(argument_tokens)) elif main_token == "select": argument_tokens, next_token = get_tokens_until_one_of(tokens, []) next_token = next(tokens, None) expression = SelectStatementParser.parse(iter(argument_tokens)) else: expression = None if next_token is None: next_token = next(tokens, None) # Expressions that need the next_token to be read if expression is None: if next_token is not None and next_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = FunctionCall(main_token, *arguments) next_token = next(tokens, None) elif next_token is not None and next_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Index( Column(main_token), arguments) # left item will not always be a column next_token = next(tokens, None) elif next_token is not None and main_token == "-" and next_token.isdigit( ): expression = Integer(-int(next_token)) next_token = next(tokens, None) elif (main_token in String.PREFIXES and next_token is not None and next_token in String.QUOTES): expression = StringParser.parse(tokens, start_quote=next_token, prefix=main_token) else: expression = Column(main_token) if next_token == "over": opening_parenthesis = next(tokens, None) if opening_parenthesis != "(": raise ParsingError("expected '('") argument_tokens = iter( get_tokens_until_closing_parenthesis(tokens)) argument_next_token = next(argument_tokens, None) if argument_next_token == "partition": argument_next_token = next(argument_tokens, None) if not argument_next_token or argument_next_token != "by": raise ParsingError("Missing BY after PARTITION") expression_tokens, argument_next_token = get_tokens_until_one_of( argument_tokens, ["order", "rows", "range"]) partition_by = ExpressionListParser.parse( iter(expression_tokens)) else: partition_by = None if argument_next_token == "order": argument_next_token = next(argument_tokens, None) if not argument_next_token or argument_next_token != "by": raise ParsingError("Missing BY after ORDER") expression_tokens, argument_next_token = get_tokens_until_one_of( argument_tokens, ["rows", "range"]) order_by = OrderByParser.parse(iter(expression_tokens)) else: order_by = None if argument_next_token in ("rows", "range"): rows_range = argument_next_token expression_tokens, _ = get_tokens_until_one_of( argument_tokens, []) frame_clause = WindowFrameClause(rows_range, " ".join(expression_tokens)) else: frame_clause = None expression = AnalyticsClause( expression, partition_by=partition_by, order_by=order_by, frame_clause=frame_clause, ) next_token = next(tokens, None) if next_token and next_token in ("+", "-", "*", "/"): left_hand = expression symbol = next_token right_hand, next_token = ExpressionParser.parse(tokens, is_right_hand=True) expression = ArithmaticOperator(symbol, left_hand, right_hand) if is_right_hand: return expression, next_token if next_token in Condition.PREDICATES: symbol = next_token if next_token == "is": next_next_token = next(tokens) if next_next_token == "not": symbol = "is not" else: tokens, _ = get_tokens_until_one_of( tokens, [], first_token=next_next_token) tokens = iter(tokens) right_hand, next_token = ExpressionParser.parse(tokens, is_right_hand=True) expression = Condition(expression, symbol, right_hand) elif next_token == "between": symbol = next_token right_hand_left, next_token = ExpressionParser.parse( tokens, is_right_hand=True) if next_token != "and": raise ParsingError("expected AND") right_hand_right, next_token = ExpressionParser.parse( tokens, is_right_hand=True) right_hand = BooleanCondition( "and", right_hand_left, right_hand_right, ) expression = Condition(expression, symbol, right_hand) if next_token in BooleanCondition.PREDICATES: left_hand = expression symbol = next_token right_hand = ExpressionParser.parse(tokens) expression = BooleanCondition(symbol, left_hand, right_hand) next_token = next(tokens, None) if next_token == "except": opening_parenthesis = next(tokens, None) if opening_parenthesis != "(": raise ParsingError("expected '('") argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = ExceptClause(expression, arguments) next_token = next(tokens, None) if (next_token is not None and next_token != ")" and next_token != "'" and next_token != '"' and next_token != "`" and next_token != ";"): if next_token == "as": with_as = True alias, _ = ExpressionParser.parse(tokens, is_right_hand=True) else: with_as = False alias = next_token return Alias(expression, alias, with_as) return expression
def parse(tokens): next_token = next(tokens) if next_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) try: argument = SQLStatementParser.parse(iter(argument_tokens)) except ParsingError: argument = FromStatementParser.parse(iter(argument_tokens)) expression = Parenthesis(argument) elif next_token == "'" or next_token == '"' or next_token == "`": expression = Table(StringParser.parse(tokens, next_token)) else: if next_token == "unnest": next_next_token = next(tokens) assert next_next_token == "(" argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = FunctionCall(next_token, *arguments) else: expression = Table(next_token) next_token = next(tokens, None) if (next_token is not None and next_token not in Join.VALUES and next_token not in CombinedQueries.SET_OPERATORS): if next_token == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while next_token in CombinedQueries.SET_OPERATORS: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token) set_operator = SetOperatorTypeParser.parse(iter(expression_tokens)) expression_tokens, next_token = get_tokens_until_one_of( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token, ) if expression_tokens[0] == "select": right_expr = SelectStatementParser.parse( iter(expression_tokens[1:])) else: right_expr = FromStatementParser.parse(iter(expression_tokens)) expression = CombinedQueries(set_operator, left_expr, right_expr) if (next_token is not None and next_token not in CombinedQueries.SET_OPERATORS): if next_token == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while next_token in Join.VALUES: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, Join.VALUES, first_token=next_token) join_type = JoinTypeParser.parse(iter(expression_tokens)) expression_tokens, next_token = get_tokens_until_one_of( tokens, ("on", "using"), first_token=next_token) right_expr = FromStatementParser.parse(iter(expression_tokens)) on = None using = None on_or_using = next_token expression_tokens, next_token = get_tokens_until_one_of( tokens, Join.VALUES, ) if on_or_using == "on": expression = ExpressionParser.parse(iter(expression_tokens)) on = OnClause(expression) elif on_or_using == "using": expressions = ExpressionParser.parse(iter(expression_tokens)) using = UsingClause(expressions) else: raise ParsingError("Missing ON or USING for join") expression = Join(join_type, left_expr, right_expr, on=on, using=using) if next_token is not None and next_token not in Join.VALUES: if next_token == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token expression = Alias(expression, alias, with_as) next_token = next(tokens, None) return expression
def test_nested_joins(): sql = """ SELECT COALESCE(sq_1.col, sq_2.col) f0_ FROM (SELECT ANY_VALUE(col) col, LAST_VALUE(ANY_VALUE(col2)) OVER (PARTITION BY ANY_VALUE(col) ORDER BY SUM(clicks) ASC, SUM(metric) ASC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) last, hash FROM (SELECT * FROM `events` WHERE _TABLE_SUFFIX BETWEEN '20200410' AND '20200510') JOIN (SELECT * EXCEPT (hash) FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY hash) AS rn FROM `test-table` WHERE _TABLE_SUFFIX BETWEEN '20200401' AND '20200501') WHERE rn = 1) USING (hash) GROUP BY hash) sq_1 FULL OUTER JOIN (SELECT ANY_VALUE(col) col, hash FROM (SELECT * FROM `events` WHERE _TABLE_SUFFIX BETWEEN '20200310' AND '20200410') JOIN (SELECT * EXCEPT (hash), FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY hash) AS rn FROM `test-table` WHERE _TABLE_SUFFIX BETWEEN '20200301' AND '20200401') WHERE rn = 1) USING (hash) GROUP BY hash) sq_2 ON sq_1.hash = sq_2.hash WHERE sq_1.last = 1 GROUP BY f0_ """ # noqa actual = SQLStatementParser.parse(to_tokens(sql)) expected = SelectStatement( expressions=[ Alias( FunctionCall("coalesce", Column("sq_1.col"), Column("sq_2.col")), "f0_", with_as=False, ) ], from_statement=Join( "FULL OUTER JOIN", left_from=Alias( Parenthesis( SelectStatement( expressions=[ Alias( FunctionCall("any_value", Column("col")), "col", with_as=False, ), Alias( AnalyticsClause( FunctionCall( "last_value", FunctionCall("any_value", Column("col2")), ), partition_by=[ FunctionCall("any_value", Column("col")) ], order_by=OrderByClause( OrderByItem( FunctionCall( "sum", Column("clicks")), has_asc=True, ), OrderByItem( FunctionCall( "sum", Column("metric")), has_asc=True, ), ), frame_clause=WindowFrameClause( "range", "between unbounded preceding and unbounded following", # noqa ), ), "last", with_as=False, ), Column("hash"), ], from_statement=Join( join_type="JOIN", left_from=Parenthesis( SelectStatement( expressions=[Column("*")], from_statement=Table( String("events", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200410", quotes="'"), String("20200510", quotes="'"), ), )), semi_colon=False, )), right_from=Parenthesis( SelectStatement( expressions=[ ExceptClause(Column("*"), [Column("hash")]) ], from_statement=Parenthesis( SelectStatement( expressions=[ Column("*"), Alias( AnalyticsClause( FunctionCall( "row_number"), partition_by=[ Column("hash") ], order_by=None, frame_clause=None, ), Column("rn"), with_as=True, ), ], from_statement=Table( String("test-table", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200401", quotes="'"), String("20200501", quotes="'"), ), )), semi_colon=False, )), where_clause=WhereClause( Condition(Column("rn"), "=", Integer(1))), semi_colon=False, )), on=None, using=UsingClause(Parenthesis(Column("hash"))), ), group_by_clause=GroupByClause(Column("hash")), semi_colon=False, )), "sq_1", with_as=False, ), right_from=Alias( Parenthesis( SelectStatement( expressions=[ Alias( FunctionCall("any_value", Column("col")), "col", with_as=False, ), Column("hash"), ], from_statement=Join( join_type="JOIN", left_from=Parenthesis( SelectStatement( expressions=[Column("*")], from_statement=Table( String("events", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200310", quotes="'"), String("20200410", quotes="'"), ), )), semi_colon=False, )), right_from=Parenthesis( SelectStatement( expressions=[ ExceptClause(Column("*"), [Column("hash")]) ], from_statement=Parenthesis( SelectStatement( expressions=[ Column("*"), Alias( AnalyticsClause( FunctionCall( "row_number"), partition_by=[ Column("hash") ], order_by=None, frame_clause=None, ), Column("rn"), with_as=True, ), ], from_statement=Table( String("test-table", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200301", quotes="'"), String("20200401", quotes="'"), ), )), semi_colon=False, ), ), where_clause=WhereClause( Condition(Column("rn"), "=", Integer(1))), semi_colon=False, )), on=None, using=UsingClause(Parenthesis(Column("hash"))), ), group_by_clause=GroupByClause(Column("hash")), semi_colon=False, )), "sq_2", with_as=False, ), on=OnClause( Condition(Column("sq_1.hash"), "=", Column("sq_2.hash"))), using=None, ), where_clause=WhereClause( Condition(Column("sq_1.last"), "=", Integer(1))), group_by_clause=GroupByClause(Column("f0_")), semi_colon=False, ) assert actual == expected
def test_parenthesis(): actual = ExpressionParser.parse(to_tokens("(field)")) expected = Parenthesis(Column("field")) assert actual == expected
def test_multiple_parentheses(): actual = ExpressionParser.parse(to_tokens("((2))")) expected = Parenthesis(Parenthesis(Integer(2))) assert actual == expected
def test_parenthesis_conditional(): actual = ExpressionParser.parse(to_tokens("(field+3) = 4")) expected = Condition(Parenthesis(Addition(Column("field"), Integer(3))), "=", Integer(4)) assert actual == expected
def test_parenthesis_expression_where_clause(): actual = WhereClauseParser.parse(to_tokens("(col + 1) = col2")) expected = WhereClause( Condition(Parenthesis(Addition(Column("col"), Integer(1))), "=", Column("col2"))) assert actual == expected
def test_parenthesis_multiple_elements(): actual = ExpressionParser.parse(to_tokens("(field,other_field,3,'test')")) expected = Parenthesis(Column("field"), Column("other_field"), Integer(3), String("test", quotes="'")) assert actual == expected