def test_find_projects( query_body: MutableMapping[str, Any], expected_projects: Optional[Set[int]] ) -> None: events = get_dataset("events") if expected_projects is None: with pytest.raises(ParsingException): snql_query = json_to_snql(query_body, "events") query, _ = parse_snql_query(str(snql_query), events) identity_translate(query) else: snql_query = json_to_snql(query_body, "events") query, _ = parse_snql_query(str(snql_query), events) query = identity_translate(query) project_ids_ast = get_object_ids_in_query_ast(query, "project_id") assert project_ids_ast == expected_projects
def test_get_time_range() -> None: """ Test finding the time range of a query. """ body = { "selected_columns": ["event_id"], "conditions": [ # Cannot test complex conditions based on explicit calls # the `and` and `or` functions, because they would not be # parsed as datetime by the old parser. ("timestamp", ">=", "2019-09-18T10:00:00"), ("timestamp", ">=", "2000-09-18T10:00:00"), ("timestamp", "<", "2019-09-19T12:00:00"), [("timestamp", "<", "2019-09-18T12:00:00"), ("project_id", "IN", [1])], ("project_id", "IN", [1]), ], } events = get_dataset("events") query = parse_query(body, events) processors = events.get_default_entity().get_query_processors() for processor in processors: if isinstance(processor, TimeSeriesProcessor): processor.process_query(query, HTTPRequestSettings()) from_date_ast, to_date_ast = get_time_range(identity_translate(query), "timestamp") assert (from_date_ast is not None and isinstance(from_date_ast, datetime) and from_date_ast.isoformat() == "2019-09-18T10:00:00") assert (to_date_ast is not None and isinstance(to_date_ast, datetime) and to_date_ast.isoformat() == "2019-09-19T12:00:00")
def test_find_projects( query_body: MutableMapping[str, Any], expected_projects: Set[int] ) -> None: events = get_dataset("events") query = identity_translate(parse_query(query_body, events)) project_ids_ast = get_project_ids_in_query_ast(query, "project_id") assert project_ids_ast == expected_projects
def test_get_time_range() -> None: """ Test finding the time range of a query. """ body = """ MATCH (events) SELECT event_id WHERE timestamp >= toDateTime('2019-09-18T10:00:00') AND timestamp >= toDateTime('2000-09-18T10:00:00') AND timestamp < toDateTime('2019-09-19T12:00:00') AND (timestamp < toDateTime('2019-09-18T12:00:00') OR project_id IN tuple(1)) AND project_id IN tuple(1) """ events = get_dataset("events") query, _ = parse_snql_query(body, events) processors = events.get_default_entity().get_query_processors() for processor in processors: if isinstance(processor, TimeSeriesProcessor): processor.process_query(query, HTTPQuerySettings()) from_date_ast, to_date_ast = get_time_range(identity_translate(query), "timestamp") assert (from_date_ast is not None and isinstance(from_date_ast, datetime) and from_date_ast.isoformat() == "2019-09-18T10:00:00") assert (to_date_ast is not None and isinstance(to_date_ast, datetime) and to_date_ast.isoformat() == "2019-09-19T12:00:00")
def test_time_split_ast() -> None: """ Test that the time split transforms the query properly both on the old representation and on the AST representation. """ found_timestamps = [] def do_query( query: ClickhouseQuery, request_settings: RequestSettings, ) -> QueryResult: from_date_ast, to_date_ast = get_time_range(query, "timestamp") assert from_date_ast is not None and isinstance( from_date_ast, datetime) assert to_date_ast is not None and isinstance(to_date_ast, datetime) found_timestamps.append( (from_date_ast.isoformat(), to_date_ast.isoformat())) return QueryResult({"data": []}, {}) body = { "selected_columns": [ "event_id", "level", "logger", "server_name", "transaction", "timestamp", "project_id", ], "conditions": [ ("timestamp", ">=", "2019-09-18T10:00:00"), ("timestamp", "<", "2019-09-19T12:00:00"), ("project_id", "IN", [1]), ], "limit": 10, "orderby": ["-timestamp"], } query = parse_query(body, get_dataset("events")) entity = get_entity(query.get_from_clause().key) settings = HTTPRequestSettings() for p in entity.get_query_processors(): p.process_query(query, settings) clickhouse_query = identity_translate(query) splitter = TimeSplitQueryStrategy("timestamp") splitter.execute(clickhouse_query, settings, do_query) assert found_timestamps == [ ("2019-09-19T11:00:00", "2019-09-19T12:00:00"), ("2019-09-19T01:00:00", "2019-09-19T11:00:00"), ("2019-09-18T10:00:00", "2019-09-19T01:00:00"), ]
def test_prewhere( query_body: MutableMapping[str, Any], keys: Sequence[str], new_ast_condition: Optional[Expression], new_prewhere_ast_condition: Optional[Expression], ) -> None: settings.MAX_PREWHERE_CONDITIONS = 2 events = get_dataset("events") query = identity_translate(parse_query(query_body, events)) query.set_from_clause(Table("my_table", ColumnSet([]), prewhere_candidates=keys)) request_settings = HTTPRequestSettings() processor = PrewhereProcessor() processor.process_query(query, request_settings) assert query.get_condition_from_ast() == new_ast_condition assert query.get_prewhere_ast() == new_prewhere_ast_condition
def test_time_split_ast() -> None: """ Test that the time split transforms the query properly both on the old representation and on the AST representation. """ found_timestamps = [] def do_query( query: ClickhouseQuery, query_settings: QuerySettings, ) -> QueryResult: from_date_ast, to_date_ast = get_time_range(query, "timestamp") assert from_date_ast is not None and isinstance( from_date_ast, datetime) assert to_date_ast is not None and isinstance(to_date_ast, datetime) found_timestamps.append( (from_date_ast.isoformat(), to_date_ast.isoformat())) return QueryResult({"data": []}, {}) body = """ MATCH (events) SELECT event_id, level, logger, server_name, transaction, timestamp, project_id WHERE timestamp >= toDateTime('2019-09-18T10:00:00') AND timestamp < toDateTime('2019-09-19T12:00:00') AND project_id IN tuple(1) ORDER BY timestamp DESC LIMIT 10 """ query, _ = parse_snql_query(body, get_dataset("events")) entity = get_entity(query.get_from_clause().key) settings = HTTPQuerySettings() for p in entity.get_query_processors(): p.process_query(query, settings) clickhouse_query = identity_translate(query) splitter = TimeSplitQueryStrategy("timestamp") splitter.execute(clickhouse_query, settings, do_query) assert found_timestamps == [ ("2019-09-19T11:00:00", "2019-09-19T12:00:00"), ("2019-09-19T01:00:00", "2019-09-19T11:00:00"), ("2019-09-18T10:00:00", "2019-09-19T01:00:00"), ]
def test_prewhere( query_body: MutableMapping[str, Any], keys: Sequence[str], omit_if_final_keys: Sequence[str], new_ast_condition: Optional[Expression], new_prewhere_ast_condition: Optional[Expression], final: bool, ) -> None: settings.MAX_PREWHERE_CONDITIONS = 2 events = get_dataset("events") # HACK until we migrate these tests to SnQL query_body["selected_columns"] = ["project_id"] query_body["conditions"] += [ ["timestamp", ">=", "2021-01-01T00:00:00"], ["timestamp", "<", "2021-01-02T00:00:00"], ["project_id", "=", 1], ] snql_query = json_to_snql(query_body, "events") query, _ = parse_snql_query(str(snql_query), events) query = identity_translate(query) query.set_from_clause(Table("my_table", all_columns, final=final)) query_settings = HTTPQuerySettings() processor = PrewhereProcessor(keys, omit_if_final=omit_if_final_keys) processor.process_query(query, query_settings) # HACK until we migrate these tests to SnQL def verify_expressions(top_level: Expression, expected: Expression) -> bool: actual_conds = get_first_level_and_conditions(top_level) expected_conds = get_first_level_and_conditions(expected) for cond in expected_conds: if cond not in actual_conds: return False return True if new_ast_condition: condition = query.get_condition() assert condition is not None assert verify_expressions(condition, new_ast_condition) if new_prewhere_ast_condition: prewhere = query.get_prewhere_ast() assert prewhere is not None assert verify_expressions(prewhere, new_prewhere_ast_condition)