def test_subquery(): actual = SQLStatementParser.parse( to_tokens( "SELECT col" " from (select count(*) col" " from table group by x) WHERE col > 10 ORDER BY col DESC;" ) ) expected = SelectStatement( expressions=[Column("col")], from_statement=Parenthesis( SelectStatement( expressions=[ Alias(CountFunctionCall(Column("*")), "col", with_as=False) ], from_statement=Table(Column("table")), group_by_clause=GroupByClause(Column("x")), semi_colon=False, ) ), where_clause=WhereClause(Condition(Column("col"), ">", Integer(10))), order_by_clause=OrderByClause(OrderByItem(Column("col"), has_desc=True)), semi_colon=True, ) assert actual == expected
def test_unnest_alias_with_offset_as_alias(): sql = "unnest(table) t with offset as o" actual = FromStatementParser.parse(to_tokens(sql)) expected = Unnest( Alias(FunctionCall("unnest", Column("table")), with_as=False, alias="t"), with_offset=True, with_offset_as=True, offset_alias="o", ) assert actual == expected
def test_multiple_indices_access(): actual, _ = ExpressionParser.parse(to_tokens("array[index(0), 'foo'] alias")) expected = Alias( Index( Column("array"), [FunctionCall("index", Integer(0)), String("foo", quotes="'")], ), with_as=False, alias="alias", ) assert actual == expected
def test_boolean_condition(): actual, _ = ExpressionParser.parse(to_tokens("f1 IS NOT NULL AND f2 > 0 fnew")) expected = Alias( BooleanCondition( "AND", Condition(Column("f1"), "is not", Null()), Condition(Column("f2"), ">", Integer(0)), ), with_as=False, alias="fnew", ) assert actual == expected
def parse(tokens): next_token = next(tokens) assert next_token == "(" argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = FunctionCall("unnest", *arguments) next_token = next(tokens, None) if (next_token is not None and lower(next_token) not in Join.VALUES and lower(next_token) not in CombinedQueries.SET_OPERATORS and lower(next_token) != "with"): if lower(next_token) == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) expression = Alias(expression, alias, with_as) next_token = next(tokens, None) with_offset = False with_offset_as = False offset_alias = None if lower(next_token) == "with": next_token = next(tokens) assert lower(next_token) == "offset", next_token with_offset = True next_token = next(tokens, None) if (next_token is not None and lower(next_token) not in Join.VALUES and lower(next_token) not in CombinedQueries.SET_OPERATORS): if lower(next_token) == "as": with_offset_as = True offset_alias = next(tokens) else: offset_alias = next_token next_token = next(tokens, None) return ( Unnest( expression, with_offset=with_offset, with_offset_as=with_offset_as, offset_alias=offset_alias, ), next_token, )
def test_nested_date_functions(): actual = ExpressionParser.parse( to_tokens("DATE(TIMESTAMP_TRUNC(CAST(a.date AS TIMESTAMP), MONTH))")) expected = FunctionCall( "date", FunctionCall( "timestamp_trunc", FunctionCall( "cast", Alias(Column("a.date"), alias=Type("timestamp"), with_as=True)), Type("month"), ), ) assert actual == expected
def test_chained_columns_with_arithmetic_operator(): actual, _ = ExpressionParser.parse( to_tokens("IF((a.field + b.field) = 200, 'true', 'false') fa")) expected = Alias( FunctionCall( "IF", *[ Condition( Parenthesis( ArithmaticOperator( "+", ChainedColumns(Column("a"), Column("field")), ChainedColumns(Column("b"), Column("field")), )), "=", Integer(200), ), String("true", quotes="'"), String("false", quotes="'"), ]), with_as=False, alias="fa", ) assert actual == expected
def parse(tokens): next_token = next(tokens) if next_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) try: argument = SQLStatementParser.parse(iter(argument_tokens)) except ParsingError: argument = FromStatementParser.parse(iter(argument_tokens)) expression = Parenthesis(argument) next_token = next(tokens, None) elif next_token in String.QUOTES: table_name = StringParser.parse(tokens, next_token) next_token = next(tokens, None) while next_token == ".": right_hand, next_token = ExpressionParser.parse( tokens, is_chained_columns=True, ) table_name = ChainedColumns(table_name, right_hand) expression = Table(table_name) elif next_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) table, _ = ExpressionParser.parse(iter(argument_tokens)) expression = Table(table, in_square_brackets=True) assert next_token == "]", next_token next_token = next(tokens, None) else: if lower(next_token) == "unnest": expression, next_token = UnnestParser.parse(tokens) else: argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=Join.VALUES + CombinedQueries.SET_OPERATORS, first_token=next_token, ) expression = Table( ExpressionParser.parse(iter(argument_tokens))[0]) if (next_token is not None and lower(next_token) not in Join.VALUES and lower(next_token) not in CombinedQueries.SET_OPERATORS): if lower(next_token) == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while lower(next_token) in CombinedQueries.SET_OPERATORS: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token) set_operator = SetOperatorTypeParser.parse(iter(expression_tokens)) expression_tokens, next_token = get_tokens_until_one_of( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token, ) if lower(expression_tokens[0]) == "select": right_expr = SelectStatementParser.parse( iter(expression_tokens[1:])) else: right_expr = FromStatementParser.parse(iter(expression_tokens)) expression = CombinedQueries(set_operator, left_expr, right_expr) if (next_token is not None and lower(next_token) not in CombinedQueries.SET_OPERATORS): if lower(next_token) == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while lower(next_token) in Join.VALUES: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, Join.VALUES, first_token=next_token) join_type = JoinTypeParser.parse(iter(expression_tokens)) if join_type in ("CROSS JOIN", ","): expression_tokens, next_token = [next_token ] + list(tokens), None else: expression_tokens, next_token = get_tokens_until_one_of( tokens, ("on", "using"), first_token=next_token) right_expr = FromStatementParser.parse(iter(expression_tokens)) on = None using = None on_or_using = next_token expression_tokens, next_token = get_tokens_until_one_of( tokens, Join.VALUES, ) if lower(on_or_using) == "on": expression, _ = ExpressionParser.parse(iter(expression_tokens)) on = OnClause(expression) elif lower(on_or_using) == "using": expressions, _ = ExpressionParser.parse( iter(expression_tokens)) using = UsingClause(expressions) expression = Join(join_type, left_expr, right_expr, on=on, using=using) if next_token is not None and lower(next_token) not in Join.VALUES: if lower(next_token) == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) expression = Alias(expression, alias, with_as) next_token = next(tokens, None) return expression
def parse(tokens, is_right_hand=False): main_token = next(tokens) next_token = None if main_token in String.QUOTES: expression = StringParser.parse(tokens, main_token) elif main_token.isdigit(): expression = Integer(main_token) elif main_token in Boolean.BOOLEAN_VALUES: expression = Boolean(main_token) elif main_token in Null.VALUES: expression = Null() elif main_token in Type.VALUES: expression = Type(main_token) elif main_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Parenthesis(*arguments) elif main_token == "case": argument_tokens, next_token = get_tokens_until_one_of( tokens, ["end"]) assert next_token == "end" next_token = next(tokens, None) expression = CaseParser.parse(iter(argument_tokens)) elif main_token == "select": argument_tokens, next_token = get_tokens_until_one_of(tokens, []) next_token = next(tokens, None) expression = SelectStatementParser.parse(iter(argument_tokens)) else: expression = None if next_token is None: next_token = next(tokens, None) # Expressions that need the next_token to be read if expression is None: if next_token is not None and next_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = FunctionCall(main_token, *arguments) next_token = next(tokens, None) elif next_token is not None and next_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Index( Column(main_token), arguments) # left item will not always be a column next_token = next(tokens, None) elif next_token is not None and main_token == "-" and next_token.isdigit( ): expression = Integer(-int(next_token)) next_token = next(tokens, None) elif (main_token in String.PREFIXES and next_token is not None and next_token in String.QUOTES): expression = StringParser.parse(tokens, start_quote=next_token, prefix=main_token) else: expression = Column(main_token) if next_token == "over": opening_parenthesis = next(tokens, None) if opening_parenthesis != "(": raise ParsingError("expected '('") argument_tokens = iter( get_tokens_until_closing_parenthesis(tokens)) argument_next_token = next(argument_tokens, None) if argument_next_token == "partition": argument_next_token = next(argument_tokens, None) if not argument_next_token or argument_next_token != "by": raise ParsingError("Missing BY after PARTITION") expression_tokens, argument_next_token = get_tokens_until_one_of( argument_tokens, ["order", "rows", "range"]) partition_by = ExpressionListParser.parse( iter(expression_tokens)) else: partition_by = None if argument_next_token == "order": argument_next_token = next(argument_tokens, None) if not argument_next_token or argument_next_token != "by": raise ParsingError("Missing BY after ORDER") expression_tokens, argument_next_token = get_tokens_until_one_of( argument_tokens, ["rows", "range"]) order_by = OrderByParser.parse(iter(expression_tokens)) else: order_by = None if argument_next_token in ("rows", "range"): rows_range = argument_next_token expression_tokens, _ = get_tokens_until_one_of( argument_tokens, []) frame_clause = WindowFrameClause(rows_range, " ".join(expression_tokens)) else: frame_clause = None expression = AnalyticsClause( expression, partition_by=partition_by, order_by=order_by, frame_clause=frame_clause, ) next_token = next(tokens, None) if next_token and next_token in ("+", "-", "*", "/"): left_hand = expression symbol = next_token right_hand, next_token = ExpressionParser.parse(tokens, is_right_hand=True) expression = ArithmaticOperator(symbol, left_hand, right_hand) if is_right_hand: return expression, next_token if next_token in Condition.PREDICATES: symbol = next_token if next_token == "is": next_next_token = next(tokens) if next_next_token == "not": symbol = "is not" else: tokens, _ = get_tokens_until_one_of( tokens, [], first_token=next_next_token) tokens = iter(tokens) right_hand, next_token = ExpressionParser.parse(tokens, is_right_hand=True) expression = Condition(expression, symbol, right_hand) elif next_token == "between": symbol = next_token right_hand_left, next_token = ExpressionParser.parse( tokens, is_right_hand=True) if next_token != "and": raise ParsingError("expected AND") right_hand_right, next_token = ExpressionParser.parse( tokens, is_right_hand=True) right_hand = BooleanCondition( "and", right_hand_left, right_hand_right, ) expression = Condition(expression, symbol, right_hand) if next_token in BooleanCondition.PREDICATES: left_hand = expression symbol = next_token right_hand = ExpressionParser.parse(tokens) expression = BooleanCondition(symbol, left_hand, right_hand) next_token = next(tokens, None) if next_token == "except": opening_parenthesis = next(tokens, None) if opening_parenthesis != "(": raise ParsingError("expected '('") argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = ExceptClause(expression, arguments) next_token = next(tokens, None) if (next_token is not None and next_token != ")" and next_token != "'" and next_token != '"' and next_token != "`" and next_token != ";"): if next_token == "as": with_as = True alias, _ = ExpressionParser.parse(tokens, is_right_hand=True) else: with_as = False alias = next_token return Alias(expression, alias, with_as) return expression
def parse(tokens): next_token = next(tokens) if next_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) try: argument = SQLStatementParser.parse(iter(argument_tokens)) except ParsingError: argument = FromStatementParser.parse(iter(argument_tokens)) expression = Parenthesis(argument) elif next_token == "'" or next_token == '"' or next_token == "`": expression = Table(StringParser.parse(tokens, next_token)) else: if next_token == "unnest": next_next_token = next(tokens) assert next_next_token == "(" argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = FunctionCall(next_token, *arguments) else: expression = Table(next_token) next_token = next(tokens, None) if (next_token is not None and next_token not in Join.VALUES and next_token not in CombinedQueries.SET_OPERATORS): if next_token == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while next_token in CombinedQueries.SET_OPERATORS: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token) set_operator = SetOperatorTypeParser.parse(iter(expression_tokens)) expression_tokens, next_token = get_tokens_until_one_of( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token, ) if expression_tokens[0] == "select": right_expr = SelectStatementParser.parse( iter(expression_tokens[1:])) else: right_expr = FromStatementParser.parse(iter(expression_tokens)) expression = CombinedQueries(set_operator, left_expr, right_expr) if (next_token is not None and next_token not in CombinedQueries.SET_OPERATORS): if next_token == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while next_token in Join.VALUES: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, Join.VALUES, first_token=next_token) join_type = JoinTypeParser.parse(iter(expression_tokens)) expression_tokens, next_token = get_tokens_until_one_of( tokens, ("on", "using"), first_token=next_token) right_expr = FromStatementParser.parse(iter(expression_tokens)) on = None using = None on_or_using = next_token expression_tokens, next_token = get_tokens_until_one_of( tokens, Join.VALUES, ) if on_or_using == "on": expression = ExpressionParser.parse(iter(expression_tokens)) on = OnClause(expression) elif on_or_using == "using": expressions = ExpressionParser.parse(iter(expression_tokens)) using = UsingClause(expressions) else: raise ParsingError("Missing ON or USING for join") expression = Join(join_type, left_expr, right_expr, on=on, using=using) if next_token is not None and next_token not in Join.VALUES: if next_token == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token expression = Alias(expression, alias, with_as) next_token = next(tokens, None) return expression
def test_aliased_column(): actual = ExpressionParser.parse(to_tokens("col AS column_name")) expected = Alias(Column("col"), alias=Column("column_name"), with_as=True) assert actual == expected
def test_nested_joins(): sql = """ SELECT COALESCE(sq_1.col, sq_2.col) f0_ FROM (SELECT ANY_VALUE(col) col, LAST_VALUE(ANY_VALUE(col2)) OVER (PARTITION BY ANY_VALUE(col) ORDER BY SUM(clicks) ASC, SUM(metric) ASC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) last, hash FROM (SELECT * FROM `events` WHERE _TABLE_SUFFIX BETWEEN '20200410' AND '20200510') JOIN (SELECT * EXCEPT (hash) FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY hash) AS rn FROM `test-table` WHERE _TABLE_SUFFIX BETWEEN '20200401' AND '20200501') WHERE rn = 1) USING (hash) GROUP BY hash) sq_1 FULL OUTER JOIN (SELECT ANY_VALUE(col) col, hash FROM (SELECT * FROM `events` WHERE _TABLE_SUFFIX BETWEEN '20200310' AND '20200410') JOIN (SELECT * EXCEPT (hash), FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY hash) AS rn FROM `test-table` WHERE _TABLE_SUFFIX BETWEEN '20200301' AND '20200401') WHERE rn = 1) USING (hash) GROUP BY hash) sq_2 ON sq_1.hash = sq_2.hash WHERE sq_1.last = 1 GROUP BY f0_ """ # noqa actual = SQLStatementParser.parse(to_tokens(sql)) expected = SelectStatement( expressions=[ Alias( FunctionCall("coalesce", Column("sq_1.col"), Column("sq_2.col")), "f0_", with_as=False, ) ], from_statement=Join( "FULL OUTER JOIN", left_from=Alias( Parenthesis( SelectStatement( expressions=[ Alias( FunctionCall("any_value", Column("col")), "col", with_as=False, ), Alias( AnalyticsClause( FunctionCall( "last_value", FunctionCall("any_value", Column("col2")), ), partition_by=[ FunctionCall("any_value", Column("col")) ], order_by=OrderByClause( OrderByItem( FunctionCall( "sum", Column("clicks")), has_asc=True, ), OrderByItem( FunctionCall( "sum", Column("metric")), has_asc=True, ), ), frame_clause=WindowFrameClause( "range", "between unbounded preceding and unbounded following", # noqa ), ), "last", with_as=False, ), Column("hash"), ], from_statement=Join( join_type="JOIN", left_from=Parenthesis( SelectStatement( expressions=[Column("*")], from_statement=Table( String("events", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200410", quotes="'"), String("20200510", quotes="'"), ), )), semi_colon=False, )), right_from=Parenthesis( SelectStatement( expressions=[ ExceptClause(Column("*"), [Column("hash")]) ], from_statement=Parenthesis( SelectStatement( expressions=[ Column("*"), Alias( AnalyticsClause( FunctionCall( "row_number"), partition_by=[ Column("hash") ], order_by=None, frame_clause=None, ), Column("rn"), with_as=True, ), ], from_statement=Table( String("test-table", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200401", quotes="'"), String("20200501", quotes="'"), ), )), semi_colon=False, )), where_clause=WhereClause( Condition(Column("rn"), "=", Integer(1))), semi_colon=False, )), on=None, using=UsingClause(Parenthesis(Column("hash"))), ), group_by_clause=GroupByClause(Column("hash")), semi_colon=False, )), "sq_1", with_as=False, ), right_from=Alias( Parenthesis( SelectStatement( expressions=[ Alias( FunctionCall("any_value", Column("col")), "col", with_as=False, ), Column("hash"), ], from_statement=Join( join_type="JOIN", left_from=Parenthesis( SelectStatement( expressions=[Column("*")], from_statement=Table( String("events", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200310", quotes="'"), String("20200410", quotes="'"), ), )), semi_colon=False, )), right_from=Parenthesis( SelectStatement( expressions=[ ExceptClause(Column("*"), [Column("hash")]) ], from_statement=Parenthesis( SelectStatement( expressions=[ Column("*"), Alias( AnalyticsClause( FunctionCall( "row_number"), partition_by=[ Column("hash") ], order_by=None, frame_clause=None, ), Column("rn"), with_as=True, ), ], from_statement=Table( String("test-table", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200301", quotes="'"), String("20200401", quotes="'"), ), )), semi_colon=False, ), ), where_clause=WhereClause( Condition(Column("rn"), "=", Integer(1))), semi_colon=False, )), on=None, using=UsingClause(Parenthesis(Column("hash"))), ), group_by_clause=GroupByClause(Column("hash")), semi_colon=False, )), "sq_2", with_as=False, ), on=OnClause( Condition(Column("sq_1.hash"), "=", Column("sq_2.hash"))), using=None, ), where_clause=WhereClause( Condition(Column("sq_1.last"), "=", Integer(1))), group_by_clause=GroupByClause(Column("f0_")), semi_colon=False, ) assert actual == expected
def test_aliased_string_without_as(): actual = ExpressionParser.parse(to_tokens("'col' column_name")) expected = Alias(String("col", quotes="'"), alias="column_name", with_as=False) assert actual == expected
def test_empty_list_with_alias(): actual, _ = ExpressionParser.parse(to_tokens("[] x")) expected = Alias(Array(), with_as=False, alias="x") assert actual == expected
def test_aliased_table(): actual = FromStatementParser.parse(to_tokens("table as sq_1")) expected = Table( Alias(Column("table"), with_as=True, alias=Column("sq_1")), ) assert actual == expected
def parse( tokens, is_right_hand=False, can_be_type=False, can_alias=True, until_one_of=None, first_token=None, is_chained_columns=False, ) -> Tuple[Expression, Any]: until_one_of = until_one_of or [] main_token = first_token or next(tokens) next_token = None if main_token in String.QUOTES: expression = StringParser.parse(tokens, main_token) elif main_token.isdigit(): expression = Integer(main_token) elif main_token.replace(".", "").isdigit(): expression = Float(main_token) elif lower(main_token) in Boolean.BOOLEAN_VALUES: expression = Boolean(main_token) elif lower(main_token) in Null.VALUES: expression = Null() elif lower(main_token) == Negation.PREDICATE: rest_expression, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of, ) expression = Negation(rest_expression) elif main_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Parenthesis(*arguments) elif main_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) assert next_token == "]", next_token arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Array(*arguments) next_token = next(tokens, None) elif lower(main_token) == "case": argument_tokens, next_token = get_tokens_until_one_of( tokens, ["end"]) assert lower(next_token) == "end" next_token = next(tokens, None) expression = CaseParser.parse(iter(argument_tokens)) elif lower(main_token) == "select": argument_tokens, next_token = get_tokens_until_one_of(tokens, []) next_token = next(tokens, None) expression = SelectStatementParser.parse(iter(argument_tokens)) else: expression = None if next_token is None: next_token = next(tokens, None) # Expressions that need the next_token to be read if expression is None: if next_token is not None and next_token == "(": if lower(main_token) == "cast": column_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["as"]) column, _ = ExpressionParser.parse( iter(column_tokens), is_right_hand=True, until_one_of=until_one_of, ) assert lower(next_token) == "as", next_token next_token = next(tokens) cast_type = Type(next_token) expression = CastFunctionCall(column, cast_type) next_token = next(tokens) assert lower(next_token) == ")", next_token elif lower(main_token) == "array_agg": next_token = next(tokens) if lower(next_token) == "distinct": distinct = True first_token = None else: distinct = False first_token = next_token column_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=[ ")", "ignore", "respects", "order", "limit" ], first_token=first_token, ) column, _ = ExpressionParser.parse( iter(column_tokens), until_one_of=until_one_of) ignore_nulls = respect_nulls = False if lower(next_token) == "ignore": next_token = next(tokens) assert lower(next_token) == "nulls" ignore_nulls = True next_token = next(tokens) elif lower(next_token) == "respect": next_token = next(tokens) assert lower(next_token) == "nulls" respect_nulls = True next_token = next(tokens) if lower(next_token) == "order": next_token = next(tokens) assert lower(next_token) == "by" expression_tokens, next_token = get_tokens_until_one_of( tokens, ["limit", ")"]) order_bys = OrderByParser.parse( iter(expression_tokens)) else: order_bys = None limit = None if lower(next_token) == "limit": next_token = next(tokens) limit = int(next_token) next_token = next(tokens) assert lower(next_token) == ")", next_token expression = ArrayAggFunctionCall( column=column, distinct=distinct, ignore_nulls=ignore_nulls, respect_nulls=respect_nulls, order_bys=order_bys, limit=limit, ) elif lower(main_token) == "count": next_token = next(tokens) if lower(next_token) == "distinct": distinct = True first_token = None else: distinct = False first_token = next_token argument_tokens = get_tokens_until_closing_parenthesis( tokens, first_token=first_token) arguments = ExpressionListParser.parse( iter(argument_tokens)) expression = CountFunctionCall(*arguments, distinct=distinct) else: argument_tokens = get_tokens_until_closing_parenthesis( tokens) arguments_can_be_type = can_be_type or any( lower(t) == "timestamp_trunc" for t in argument_tokens) arguments = ExpressionListParser.parse( iter(argument_tokens), can_be_type=arguments_can_be_type) expression = FunctionCall(main_token, *arguments) next_token = next(tokens, None) if next_token and lower(next_token) == "filter": next_next_token = next(tokens) assert next_next_token == "(", next_next_token argument_tokens = get_tokens_until_closing_parenthesis( tokens) assert lower( argument_tokens[0]) == "where", argument_tokens filter_condition, next_token = ExpressionParser.parse( iter(argument_tokens[1:]), can_alias=False, ) expression = FilteredFunctionCall(expression, filter_condition) next_token = next(tokens, None) elif (next_token is not None and lower(main_token) in DatePartExtraction.PARTS and lower(next_token) == "from"): rest_expression, next_token = ExpressionParser.parse( tokens, until_one_of=until_one_of) expression = DatePartExtraction(main_token, rest_expression) elif lower(main_token) in Type.VALUES and can_be_type: expression = Type(main_token) elif next_token is not None and next_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Index( Column(main_token), arguments) # left item will not always be a column next_token = next(tokens, None) elif next_token is not None and main_token == "-" and next_token.isdigit( ): expression = Integer(-int(next_token)) next_token = next(tokens, None) elif (next_token is not None and main_token == "-" and next_token.replace(".", "").isdigit()): expression = Float(-float(next_token)) next_token = next(tokens, None) elif (lower(main_token) in String.PREFIXES and next_token is not None and lower(next_token) in String.QUOTES): expression = StringParser.parse(tokens, start_quote=next_token, prefix=main_token) else: expression = Column(main_token) if lower(next_token) == "over": opening_parenthesis = next(tokens, None) if opening_parenthesis != "(": raise ParsingError("expected '('") argument_tokens = iter( get_tokens_until_closing_parenthesis(tokens)) argument_next_token = next(argument_tokens, None) if lower(argument_next_token) == "partition": argument_next_token = next(argument_tokens, None) if not argument_next_token or lower( argument_next_token) != "by": raise ParsingError("Missing BY after PARTITION") expression_tokens, argument_next_token = get_tokens_until_one_of( argument_tokens, ["order", "rows", "range"]) partition_by = ExpressionListParser.parse( iter(expression_tokens)) else: partition_by = None if lower(argument_next_token) == "order": argument_next_token = next(argument_tokens, None) if not argument_next_token or lower( argument_next_token) != "by": raise ParsingError("Missing BY after ORDER") expression_tokens, argument_next_token = get_tokens_until_one_of( argument_tokens, ["rows", "range"]) order_by = OrderByParser.parse(iter(expression_tokens)) else: order_by = None if lower(argument_next_token) in ("rows", "range"): rows_range = argument_next_token expression_tokens, _ = get_tokens_until_one_of( argument_tokens, []) frame_clause: Optional[WindowFrameClause] = WindowFrameClause( rows_range, " ".join(expression_tokens)) else: frame_clause = None expression = AnalyticsClause( expression, partition_by=partition_by, order_by=order_by, frame_clause=frame_clause, ) next_token = next(tokens, None) while next_token == ".": right_hand, next_token = ExpressionParser.parse( tokens, until_one_of=until_one_of, is_chained_columns=True) expression = ChainedColumns(expression, right_hand) if next_token and next_token in ("+", "-", "*", "/") and not is_chained_columns: left_hand = expression symbol = next_token right_hand, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of, ) expression = ArithmaticOperator(symbol, left_hand, right_hand) while next_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = Index(expression, arguments) next_token = next(tokens, None) if is_right_hand or is_chained_columns: return expression, next_token if lower(next_token) in Condition.PREDICATES: first_token = None symbol = next_token if lower(next_token) == "is": next_next_token = next(tokens) if lower(next_next_token) == "not": symbol = "is not" else: first_token = next_next_token elif lower(next_token) == "not": next_next_token = next(tokens) if lower(next_next_token) == "in": symbol = "not in" else: first_token = next_next_token right_hand, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of, first_token=first_token, ) expression = Condition(expression, symbol, right_hand) elif lower(next_token) == "between": symbol = next_token right_hand_left, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of) if lower(next_token) != "and": raise ParsingError("expected AND") right_hand_right, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of) right_hand = BooleanCondition( "and", right_hand_left, right_hand_right, ) expression = Condition(expression, symbol, right_hand) elif next_token in BitwiseOperation.OPERATORS: operator = next_token right_hand, next_token = ExpressionParser.parse( tokens, is_right_hand=True, until_one_of=until_one_of) expression = BitwiseOperation(expression, operator, right_hand) if lower(next_token) in BooleanCondition.PREDICATES: left_hand = expression symbol = next_token right_hand, next_token = ExpressionParser.parse( tokens, until_one_of=until_one_of) right_alias = None if isinstance(right_hand, Alias): right_alias = right_hand right_hand = right_hand.expression expression = BooleanCondition(symbol, left_hand, right_hand) if right_alias is not None: right_alias.expression = expression expression = right_alias if lower(next_token) == "except": opening_parenthesis = next(tokens, None) if opening_parenthesis != "(": raise ParsingError("expected '('") argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = ExceptClause(expression, arguments) next_token = next(tokens, None) if (next_token is not None and next_token != ")" and not (next_token in String.QUOTES and isinstance(expression, String)) and next_token != ";" and lower(next_token) not in until_one_of and can_alias): if lower(next_token) == "as": with_as = True alias, _ = ExpressionParser.parse(tokens, is_right_hand=True, until_one_of=until_one_of) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) return Alias(expression, alias, with_as), next(tokens, None) return expression, next_token
def test_index_access_alias(): actual = ExpressionParser.parse(to_tokens("array[0] alias")) expected = Alias(Index(Column("array"), [Integer(0)]), with_as=False, alias="alias") assert actual == expected