def test_format_clickhouse_specific_query() -> None: """ Adds a few of the Clickhosue specific fields to the query. """ query = Query( { "sample": 0.1, "totals": True, "limitby": (10, "environment") }, TableSource("my_table", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression("column2", Column(None, "table1", "column2")), ], condition=binary_condition( None, "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, "blabla"), ), groupby=[ Column(None, None, "column1"), Column(None, "table1", "column2") ], having=binary_condition( None, "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, None, "column1")) ], array_join=Column(None, None, "column1"), ) query.set_final(True) query.set_offset(50) query.set_limit(100) request_settings = HTTPRequestSettings() clickhouse_query = AstSqlQuery(query, request_settings) expected = { "from": "FROM my_table FINAL SAMPLE 0.1", "group": "GROUP BY (column1, table1.column2) WITH TOTALS", "having": "HAVING eq(column1, 123)", "array_join": "ARRAY JOIN column1", "limit": "LIMIT 100 OFFSET 50", "limitby": "LIMIT 10 BY environment", "order": "ORDER BY column1 ASC", "select": "SELECT column1, table1.column2", "where": "WHERE eq(column1, 'blabla')", } assert clickhouse_query.sql_data() == expected
def visit_order_columns( self, node: Node, visited_children: Tuple[Expression, Node, Any, Any, Any] ) -> OrderBy: column, order, _, _, _ = visited_children direction = ( OrderByDirection.ASC if order.text == "ASC" else OrderByDirection.DESC ) return OrderBy(direction, column)
def test_iterate_over_query(): """ Creates a query with the new AST and iterate over all expressions. """ column1 = Column(None, "t1", "c1") column2 = Column(None, "t1", "c2") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2, )) condition = binary_condition(None, ConditionFunctions.EQ, column1, Literal(None, "1")) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( {}, TableSource("my_table", ColumnSet([])), selected_columns=[SelectedExpression("alias", function_1)], array_join=None, condition=condition, groupby=[function_1], having=None, order_by=[orderby], ) expected_expressions = [ # selected columns column1, column2, function_1, # condition column1, Literal(None, "1"), condition, # groupby column1, column2, function_1, # order by column2, function_2, ] assert list(query.get_all_expressions()) == expected_expressions
def visit_order_list( self, node: Node, visited_children: Tuple[OrderBy, Expression, Node] ) -> Sequence[OrderBy]: left_order_list, right_order, order = visited_children ret: List[OrderBy] = [] # in the case of one OrderBy # left_order_list will be an empty node if not isinstance(left_order_list, Node): if not isinstance(left_order_list, (list, tuple)): ret.append(left_order_list) else: for p in left_order_list: ret.append(p) direction = ( OrderByDirection.ASC if order.text == "ASC" else OrderByDirection.DESC ) ret.append(OrderBy(direction, right_order)) return ret
), ], condition=binary_condition(None, "less", Column(None, None, "a"), Literal(None, 3)), groupby=[ Column(None, None, "d"), FunctionCall( None, "plus", ( Literal(None, 2), Literal(None, 7), ), ), ], order_by=[OrderBy(OrderByDirection.DESC, Column(None, None, "f"))], ), id= "Simple complete query with example of parenthesized arithmetic expression in COLLECT", ), pytest.param( "MATCH (blah) WHERE time_seen<3 AND last_seen=2 AND c=2 AND d=3 COLLECT a", Query( {}, None, selected_columns=[ SelectedExpression( name="a", expression=Column(alias=None, table_name=None, column_name="a"),
def _parse_query_impl(body: MutableMapping[str, Any], entity: Entity) -> Query: def build_selected_expressions( raw_expressions: Sequence[Any], ) -> List[SelectedExpression]: output = [] for raw_expression in raw_expressions: exp = parse_expression(tuplify(raw_expression), entity.get_data_model(), set()) output.append( SelectedExpression( # An expression in the query can be a string or a # complex list with an alias. In the second case # we trust the parser to find the alias. name=raw_expression if isinstance(raw_expression, str) else exp.alias, expression=exp, )) return output aggregations = [] for aggregation in body.get("aggregations", []): if not isinstance(aggregation, Sequence): raise ParsingException(( f"Invalid aggregation structure {aggregation}. " "It must be a sequence containing expression, column and alias." )) aggregation_function = aggregation[0] column_expr = aggregation[1] column_expr = column_expr if column_expr else [] alias = aggregation[2] alias = alias if alias else None aggregations.append( SelectedExpression( name=alias, expression=parse_aggregation( aggregation_function, column_expr, alias, entity.get_data_model(), set(), ), )) groupby_clause = build_selected_expressions( to_list(body.get("groupby", []))) select_clause = ( groupby_clause + aggregations + build_selected_expressions(body.get("selected_columns", []))) array_join_cols = set() arrayjoin = body.get("arrayjoin") # TODO: Properly detect all array join columns in all clauses of the query. # This is missing an arrayJoin in condition with an alias that is then # used in the select. if arrayjoin: array_join_cols.add(arrayjoin) array_join_expr: Optional[Expression] = parse_expression( body["arrayjoin"], entity.get_data_model(), {arrayjoin}) else: array_join_expr = None for select_expr in select_clause: if isinstance(select_expr.expression, FunctionCall): if select_expr.expression.function_name == "arrayJoin": parameters = select_expr.expression.parameters if len(parameters) != 1: raise ParsingException( "arrayJoin(...) only accepts a single parameter.") if isinstance(parameters[0], Column): array_join_cols.add(parameters[0].column_name) else: # We only accepts columns or functions that do not # reference columns. We could not say whether we are # actually arrayjoining on the values of the column # if it is nested in an arbitrary function. But # functions of literals are fine. for e in parameters[0]: if isinstance(e, Column): raise ParsingException( "arrayJoin(...) cannot contain columns nested in functions." ) where_expr = parse_conditions_to_expr(body.get("conditions", []), entity, array_join_cols) having_expr = parse_conditions_to_expr(body.get("having", []), entity, array_join_cols) orderby_exprs = [] for orderby in to_list(body.get("orderby", [])): if isinstance(orderby, str): match = NEGATE_RE.match(orderby) if match is None: raise ParsingException(( f"Invalid Order By clause {orderby}. If the Order By is a string, " "it must respect the format `[-]column`")) direction, col = match.groups() orderby = col elif is_function(orderby): match = NEGATE_RE.match(orderby[0]) if match is None: raise ParsingException(( f"Invalid Order By clause {orderby}. If the Order By is an expression, " "the function name must respect the format `[-]func_name`" )) direction, col = match.groups() orderby = [col] + orderby[1:] else: raise ParsingException( (f"Invalid Order By clause {orderby}. The Clause was neither " "a string nor a function call.")) orderby_parsed = parse_expression(tuplify(orderby), entity.get_data_model(), set()) orderby_exprs.append( OrderBy( OrderByDirection.DESC if direction == "-" else OrderByDirection.ASC, orderby_parsed, )) return Query( body, None, selected_columns=select_clause, array_join=array_join_expr, condition=where_expr, groupby=[g.expression for g in groupby_clause], having=having_expr, order_by=orderby_exprs, )
rhs=Literal(None, "blabla"), ), groupby=[ Column(None, None, "column1"), Column(None, "table1", "column2"), Column("al", None, "column3"), Column(None, None, "column4"), ], having=binary_condition( None, "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, None, "column1")), OrderBy(OrderByDirection.DESC, Column(None, "table1", "column2")), ], ), { "from": "FROM my_table", "group": "GROUP BY (column1, table1.column2, al, column4)", "having": "HAVING eq(column1, 123)", "order": "ORDER BY column1 ASC, table1.column2 DESC", "select": "SELECT column1, table1.column2, (column3 AS al)", "where": "WHERE eq(al, 'blabla')", }, id="Query_with_aliases", ), pytest.param(
"orderby": ["column1", "-column2", ["-func", ["column3"]]], }, Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column("column1", None, "column1")), SelectedExpression("column2", Column("column2", None, "column2")), ], condition=None, groupby=None, having=None, order_by=[ OrderBy(OrderByDirection.ASC, Column("column1", None, "column1")), OrderBy(OrderByDirection.DESC, Column("column2", None, "column2")), OrderBy( OrderByDirection.DESC, FunctionCall(None, "func", (Column("column3", None, "column3"), )), ), ], ), id="Order by with functions", ), pytest.param( { "selected_columns": [], "groupby": "column1",
def test_replace_expression(): """ Create a query with the new AST and replaces a function with a different function replaces f1(...) with tag(f1) """ column1 = Column(None, "t1", "c1") column2 = Column(None, "t1", "c2") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2, )) condition = binary_condition(None, ConditionFunctions.EQ, function_1, Literal(None, "1")) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( {}, TableSource("my_table", ColumnSet([])), selected_columns=[SelectedExpression("alias", function_1)], array_join=None, condition=condition, groupby=[function_1], having=None, order_by=[orderby], ) def replace(exp: Expression) -> Expression: if isinstance(exp, FunctionCall) and exp.function_name == "f1": return FunctionCall(exp.alias, "tag", (Literal(None, "f1"), )) return exp query.transform_expressions(replace) expected_query = Query( {}, TableSource("my_table", ColumnSet([])), selected_columns=[ SelectedExpression( "alias", FunctionCall("alias", "tag", (Literal(None, "f1"), ))) ], array_join=None, condition=binary_condition( None, ConditionFunctions.EQ, FunctionCall("alias", "tag", (Literal(None, "f1"), )), Literal(None, "1"), ), groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"), ))], having=None, order_by=[orderby], ) assert (query.get_selected_columns_from_ast() == expected_query.get_selected_columns_from_ast()) assert query.get_condition_from_ast( ) == expected_query.get_condition_from_ast() assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast( ) assert query.get_having_from_ast() == expected_query.get_having_from_ast() assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast( ) assert list(query.get_all_expressions()) == list( expected_query.get_all_expressions())