def visit_order_columns( self, node: Node, visited_children: Tuple[Expression, Node, Any, Any, Any]) -> OrderBy: column, order, _, _, _ = visited_children direction = (OrderByDirection.ASC if order.text == "ASC" else OrderByDirection.DESC) return OrderBy(direction, column)
def test_format_clickhouse_specific_query() -> None: """ Adds a few of the Clickhosue specific fields to the query. """ query = ClickhouseQuery( TableSource("my_table", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression("column2", Column(None, "table1", "column2")), ], condition=binary_condition( None, "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, "blabla"), ), groupby=[ Column(None, None, "column1"), Column(None, "table1", "column2") ], having=binary_condition( None, "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, None, "column1")) ], array_join=Column(None, None, "column1"), sample=0.1, totals=True, limitby=(10, "environment"), ) query.set_final(True) query.set_offset(50) query.set_limit(100) request_settings = HTTPRequestSettings() clickhouse_query = AstSqlQuery(query, request_settings) expected = { "from": "FROM my_table FINAL SAMPLE 0.1", "group": "GROUP BY (column1, table1.column2) WITH TOTALS", "having": "HAVING eq(column1, 123)", "array_join": "ARRAY JOIN column1", "limit": "LIMIT 100 OFFSET 50", "limitby": "LIMIT 10 BY environment", "order": "ORDER BY column1 ASC", "select": "SELECT column1, table1.column2", "where": "WHERE eq(column1, 'blabla')", } assert clickhouse_query.sql_data() == expected
def test_format_clickhouse_specific_query() -> None: """ Adds a few of the Clickhosue specific fields to the query. """ query = ClickhouseQuery( Table("my_table", ColumnSet([]), final=True, sampling_rate=0.1), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression("column2", Column(None, "table1", "column2")), ], condition=binary_condition( "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, "blabla"), ), groupby=[ Column(None, None, "column1"), Column(None, "table1", "column2") ], having=binary_condition( "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, None, "column1")) ], array_join=Column(None, None, "column1"), totals=True, limitby=LimitBy(10, Column(None, None, "environment")), ) query.set_offset(50) query.set_limit(100) request_settings = HTTPRequestSettings() clickhouse_query = format_query(query, request_settings) expected = [ "SELECT column1, table1.column2", ["FROM", "my_table FINAL SAMPLE 0.1"], "ARRAY JOIN column1", "WHERE eq(column1, 'blabla')", "GROUP BY column1, table1.column2 WITH TOTALS", "HAVING eq(column1, 123)", "ORDER BY column1 ASC", "LIMIT 10 BY environment", "LIMIT 100 OFFSET 50", ] assert clickhouse_query.structured() == expected
def test_iterate_over_query() -> None: """ Creates a query with the new AST and iterate over all expressions. """ column1 = Column(None, "t1", "c1") column2 = Column(None, "t1", "c2") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2, )) condition = binary_condition(ConditionFunctions.EQ, column1, Literal(None, "1")) prewhere = binary_condition(ConditionFunctions.EQ, column2, Literal(None, "2")) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( Table("my_table", ColumnSet([])), selected_columns=[SelectedExpression("alias", function_1)], array_join=None, condition=condition, groupby=[function_1], prewhere=prewhere, having=None, order_by=[orderby], ) expected_expressions = [ # selected columns column1, column2, function_1, # condition column1, Literal(None, "1"), condition, # groupby column1, column2, function_1, # order by column2, function_2, # prewhere column2, Literal(None, "2"), prewhere, ] assert list(query.get_all_expressions()) == expected_expressions
def visit_order_list( self, node: Node, visited_children: Tuple[OrderBy, Expression, Node]) -> Sequence[OrderBy]: left_order_list, right_order, order = visited_children ret: List[OrderBy] = [] # in the case of one OrderBy # left_order_list will be an empty node if not isinstance(left_order_list, Node): if not isinstance(left_order_list, (list, tuple)): ret.append(left_order_list) else: for p in left_order_list: ret.append(p) direction = (OrderByDirection.ASC if order.text == "ASC" else OrderByDirection.DESC) ret.append(OrderBy(direction, right_order)) return ret
lhs=Column("al", None, "column3"), rhs=Literal(None, "blabla"), ), groupby=[ Column(None, None, "column1"), Column(None, "table1", "column2"), Column("al", None, "column3"), Column(None, None, "column4"), ], having=binary_condition( "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, None, "column1")), OrderBy(OrderByDirection.DESC, Column(None, "table1", "column2")), ], ), [ "SELECT column1, table1.column2, (column3 AS al)", ["FROM", "my_table"], "WHERE eq(al, 'blabla')", "GROUP BY column1, table1.column2, al, column4", "HAVING eq(column1, 123)", "ORDER BY column1 ASC, table1.column2 DESC", ], ("SELECT column1, table1.column2, (column3 AS al) " "FROM my_table " "WHERE eq(al, 'blabla') "
) LOGICAL_QUERY = LogicalQuery( from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5), selected_columns=[ SelectedExpression("c1", Column("_snuba_c1", "t", "c")), SelectedExpression( "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))), ], array_join=Column(None, None, "col"), condition=binary_condition("equals", Column(None, None, "c4"), Literal(None, "asd")), groupby=[Column(None, "t", "c4")], having=binary_condition("equals", Column(None, None, "c6"), Literal(None, "asd2")), order_by=[OrderBy(OrderByDirection.ASC, Column(None, "t", "c"))], limitby=LimitBy(100, Column(None, None, "c8")), limit=150, ) SIMPLE_FORMATTED = { "FROM": { "ENTITY": EntityKey.EVENTS, "SAMPLE": "0.5" }, "SELECT": [["c1", "(t.c AS _snuba_c1)"], ["f1", ["_snuba_f1", "f", ["t.c2"]]]], "ARRAYJOIN": "col", "WHERE": ["equals", ["c4", "asd"]], "GROUPBY": ["t.c4"], "HAVING": ["equals", ["c6", "asd2"]],
def test_replace_expression() -> None: """ Create a query with the new AST and replaces a function with a different function replaces f1(...) with tag(f1) """ column1 = Column(None, "t1", "c1") column2 = Column(None, "t1", "c2") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2,)) condition = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "1")) prewhere = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "2")) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( Table("my_table", ColumnSet([])), selected_columns=[SelectedExpression("alias", function_1)], array_join=None, condition=condition, groupby=[function_1], having=None, prewhere=prewhere, order_by=[orderby], ) def replace(exp: Expression) -> Expression: if isinstance(exp, FunctionCall) and exp.function_name == "f1": return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),)) return exp query.transform_expressions(replace) expected_query = Query( Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression( "alias", FunctionCall("alias", "tag", (Literal(None, "f1"),)) ) ], array_join=None, condition=binary_condition( ConditionFunctions.EQ, FunctionCall("alias", "tag", (Literal(None, "f1"),)), Literal(None, "1"), ), groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))], prewhere=binary_condition( ConditionFunctions.EQ, FunctionCall("alias", "tag", (Literal(None, "f1"),)), Literal(None, "2"), ), having=None, order_by=[orderby], ) assert query.get_selected_columns() == expected_query.get_selected_columns() assert query.get_condition() == expected_query.get_condition() assert query.get_groupby() == expected_query.get_groupby() assert query.get_having() == expected_query.get_having() assert query.get_orderby() == expected_query.get_orderby() assert list(query.get_all_expressions()) == list( expected_query.get_all_expressions() )
pytest.param( LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), selected_columns=[ SelectedExpression("count", FunctionCall("_snuba_count", "count", ())), ], condition=binary_condition( "equals", Column("_snuba_project_id", None, "project_id"), Literal(None, 1), ), order_by=[ OrderBy( OrderByDirection.ASC, Column("_snuba_timestamp", None, "timestamp") ) ], ), id="no orderby clauses", ), pytest.param( LogicalQuery( QueryEntity( EntityKey.METRICS_COUNTERS, get_entity(EntityKey.METRICS_COUNTERS).get_data_model(), ), selected_columns=[SelectedExpression("value", Column(None, None, "value"))], condition=binary_condition( BooleanFunctions.AND, binary_condition(
def _parse_query_impl(body: MutableMapping[str, Any], entity: Entity) -> Query: def build_selected_expressions( raw_expressions: Sequence[Any], ) -> List[SelectedExpression]: output = [] for raw_expression in raw_expressions: exp = parse_expression( tuplify(raw_expression), entity.get_data_model(), set() ) output.append( SelectedExpression( # An expression in the query can be a string or a # complex list with an alias. In the second case # we trust the parser to find the alias. name=raw_expression if isinstance(raw_expression, str) else exp.alias, expression=exp, ) ) return output aggregations = [] for aggregation in body.get("aggregations", []): if not isinstance(aggregation, Sequence): raise ParsingException( ( f"Invalid aggregation structure {aggregation}. " "It must be a sequence containing expression, column and alias." ) ) aggregation_function = aggregation[0] column_expr = aggregation[1] column_expr = column_expr if column_expr else [] alias = aggregation[2] alias = alias if alias else None aggregations.append( SelectedExpression( name=alias, expression=parse_aggregation( aggregation_function, column_expr, alias, entity.get_data_model(), set(), ), ) ) groupby_clause = build_selected_expressions(to_list(body.get("groupby", []))) select_clause = ( groupby_clause + aggregations + build_selected_expressions(body.get("selected_columns", [])) ) array_join_cols = set() arrayjoin = body.get("arrayjoin") # TODO: Properly detect all array join columns in all clauses of the query. # This is missing an arrayJoin in condition with an alias that is then # used in the select. if arrayjoin: array_join_cols.add(arrayjoin) array_join_expr: Optional[Expression] = parse_expression( body["arrayjoin"], entity.get_data_model(), {arrayjoin} ) else: array_join_expr = None for select_expr in select_clause: if isinstance(select_expr.expression, FunctionCall): if select_expr.expression.function_name == "arrayJoin": parameters = select_expr.expression.parameters if len(parameters) != 1: raise ParsingException( "arrayJoin(...) only accepts a single parameter." ) if isinstance(parameters[0], Column): array_join_cols.add(parameters[0].column_name) else: # We only accepts columns or functions that do not # reference columns. We could not say whether we are # actually arrayjoining on the values of the column # if it is nested in an arbitrary function. But # functions of literals are fine. for e in parameters[0]: if isinstance(e, Column): raise ParsingException( "arrayJoin(...) cannot contain columns nested in functions." ) where_expr = parse_conditions_to_expr( body.get("conditions", []), entity, array_join_cols ) having_expr = parse_conditions_to_expr( body.get("having", []), entity, array_join_cols ) orderby_exprs = [] for orderby in to_list(body.get("orderby", [])): if isinstance(orderby, str): match = NEGATE_RE.match(orderby) if match is None: raise ParsingException( ( f"Invalid Order By clause {orderby}. If the Order By is a string, " "it must respect the format `[-]column`" ) ) direction, col = match.groups() orderby = col elif is_function(orderby): match = NEGATE_RE.match(orderby[0]) if match is None: raise ParsingException( ( f"Invalid Order By clause {orderby}. If the Order By is an expression, " "the function name must respect the format `[-]func_name`" ) ) direction, col = match.groups() orderby = [col] + orderby[1:] else: raise ParsingException( ( f"Invalid Order By clause {orderby}. The Clause was neither " "a string nor a function call." ) ) orderby_parsed = parse_expression( tuplify(orderby), entity.get_data_model(), set() ) orderby_exprs.append( OrderBy( OrderByDirection.DESC if direction == "-" else OrderByDirection.ASC, orderby_parsed, ) ) return Query( body, None, selected_columns=select_clause, array_join=array_join_expr, condition=where_expr, groupby=[g.expression for g in groupby_clause], having=having_expr, order_by=orderby_exprs, limitby=body.get("limitby"), sample=body.get("sample"), limit=body.get("limit", None), offset=body.get("offset", 0), totals=body.get("totals", False), granularity=body.get("granularity"), )
func(column3) DESC """.format(conditions=snql_conditions_with_default()), Query( QueryEntity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()), selected_columns=[ SelectedExpression("column1", Column("_snuba_column1", None, "column1")), SelectedExpression("column2", Column("_snuba_column2", None, "column2")), ], condition=with_required(), groupby=None, having=None, order_by=[ OrderBy(OrderByDirection.ASC, Column("_snuba_column1", None, "column1")), OrderBy(OrderByDirection.DESC, Column("_snuba_column2", None, "column2")), OrderBy( OrderByDirection.DESC, FunctionCall( None, "func", (Column("_snuba_column3", None, "column3"), )), ), ], limit=1000, ), id="Order by with functions", ), pytest.param( """
"orderby": ["column1", "-column2", ["-func", ["column3"]]], }, Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column("column1", None, "column1")), SelectedExpression("column2", Column("column2", None, "column2")), ], condition=None, groupby=None, having=None, order_by=[ OrderBy(OrderByDirection.ASC, Column("column1", None, "column1")), OrderBy(OrderByDirection.DESC, Column("column2", None, "column2")), OrderBy( OrderByDirection.DESC, FunctionCall(None, "func", (Column("column3", None, "column3"), )), ), ], ), id="Order by with functions", ), pytest.param( { "selected_columns": [], "groupby": "column1",
), SelectedExpression("c", Column("_snuba_c", None, "c")), SelectedExpression("d", Column("_snuba_d", None, "d")), SelectedExpression( "2+7", FunctionCall(None, "plus", (Literal(None, 2), Literal(None, 7))), ), ], condition=binary_condition( "less", Column("_snuba_a", None, "a"), Literal(None, 3) ), groupby=[ Column("_snuba_d", None, "d"), FunctionCall(None, "plus", (Literal(None, 2), Literal(None, 7))), ], order_by=[OrderBy(OrderByDirection.DESC, Column("_snuba_f", None, "f"))], ), id="Simple complete query with example of parenthesized arithmetic expression in SELECT", ), pytest.param( "MATCH (events) SELECT (2*(4-5)+3), foo(c) AS thing2, c BY d, 2+7 WHERE a<3 ORDER BY f DESC", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), selected_columns=[ SelectedExpression( "(2*(4-5)+3)", FunctionCall( None, "plus",
), ], condition=binary_condition(None, "less", Column(None, None, "a"), Literal(None, 3)), groupby=[ Column(None, None, "d"), FunctionCall( None, "plus", ( Literal(None, 2), Literal(None, 7), ), ), ], order_by=[OrderBy(OrderByDirection.DESC, Column(None, None, "f"))], ), id= "Simple complete query with example of parenthesized arithmetic expression in COLLECT", ), pytest.param( "MATCH (blah) WHERE time_seen<3 AND last_seen=2 AND c=2 AND d=3 COLLECT a", Query( {}, None, selected_columns=[ SelectedExpression( name="a", expression=Column(alias=None, table_name=None, column_name="a"),