def test_merge_term() -> None: next_foo = Query.by( AllTerm()).traverse_in(until=Navigation.Max).filter("foo") query = Query.by( MergeTerm(Query.mk_term("bla"), [MergeQuery("foo123", next_foo)], Query.mk_term("bla"))) assert_round_trip(term_parser, query)
def test_combine() -> None: query1 = Query.by(P("test") == True).traverse_out().combine(Query.by("foo")).combine(Query.by("bla")) assert str(query1) == 'test == true -default-> (is("foo") and is("bla"))' query2 = ( Query.by(P("test") == True) .traverse_out() .combine(Query.by("foo").traverse_out()) .combine(Query.by("bla").traverse_out()) ) assert str(query2) == 'test == true -default-> is("foo") -default-> is("bla") -default->' query3 = ( Query.by(P("test") == True) .traverse_out() .filter("boo") .traverse_out() .filter("bar") .combine(Query.by("foo")) .combine(Query.by("bla")) ) assert str(query3) == 'test == true -default-> is("boo") -default-> ((is("bar") and is("foo")) and is("bla"))' query4 = Query.by("a").with_limit(10).combine(Query.by("b").with_limit(2)) assert query4.current_part.limit == Limit(0, 2) # minimum is taken with pytest.raises(AttributeError): # can not combine 2 aggregations parse_query("aggregate(sum(1)): is(a)").combine(parse_query("aggregate(sum(1)): is(a)")) with pytest.raises(AttributeError): # can not combine 2 with statements parse_query("is(foo) with(empty, -default->)").combine(parse_query("is(bla) with(empty, -default->)"))
def test_merge_query_creation() -> None: inbound = Navigation(1, Navigation.Max, direction=Direction.inbound) for_foo = Query( [Part(IsTerm(["foo"])), Part(AllTerm(), navigation=inbound)]) merge_foo = [MergeQuery("ancestors.foo", for_foo)] # merge_foo is created automatically assert Part(AllTerm()).merge_queries_for(["ancestors.foo.reported.bla" ]) == merge_foo # merge_foo is already included and not added assert Part(MergeTerm(AllTerm(), merge_foo)).merge_queries_for( ["ancestors.foo.reported.bla"]) == merge_foo # neither ancestors/descendants with pytest.raises(Exception): Part(AllTerm()).merge_queries_for(["unknown.foo.reported.bla"]) # no path is given with pytest.raises(Exception): Part(AllTerm()).merge_queries_for(["ancestors.foo"]) # rewrite for ancestors/descendants also work with additional properties assert (str( Query.by("test").rewrite_for_ancestors_descendants([ "ancestors.kind.reported.prop", "test", "a" ])) == 'is("test") {ancestors.kind: all <-default[1:]- is("kind")}') assert ( str( Query.by("test").merge_with( "ancestors.cloud", NavigateUntilRoot, IsTerm(["cloud"])).rewrite_for_ancestors_descendants( ["ancestors.kind.reported.prop", "test", "a"])) == 'is("test") {ancestors.kind: all <-default[1:]- is("kind"), ancestors.cloud: all <-default[1:]- is("cloud")}' )
def test_sort_order_for_synthetic_prop(foo_model: Model, graph_db: GraphDB) -> None: def check_sort_in_query(q: Query, expected_sort: str) -> None: query_str, _ = to_query(graph_db, QueryModel(q, foo_model)) assert f"SORT {expected_sort}" in query_str, f"Expected {expected_sort} in {query_str}" check_sort_in_query( Query.by("foo").add_sort("reported.age"), "m0.reported.ctime desc") check_sort_in_query( Query.by("foo").add_sort("some.age"), "m0.some.age asc") check_sort_in_query( Query.by("foo").add_sort("reported.ctime"), "m0.reported.ctime asc") check_sort_in_query( Query.by("foo").add_sort("metadata.expired"), "m0.metadata.expired asc")
async def test_jq_command(cli: CLI) -> None: ctx = CLIContext(env={"section": "reported"}, query=Query.by("test")) # .test -> .reported.test assert JqCommand.rewrite_props(".a,.b", ctx) == ".reported.a,.reported.b" # absolute paths are rewritten correctly assert JqCommand.rewrite_props("./reported", ctx) == ".reported" # object construction is supported assert JqCommand.rewrite_props("{a:.a, b:.b}", ctx) == "{a:.reported.a, b:.reported.b}" # no replacement after pipe assert JqCommand.rewrite_props( "map(.color) | {a:.a, b:.b}", ctx) == "map(.reported.color) | {a:.a, b:.b}" result = await cli.execute_cli_command('json {"a":{"b":1}} | jq ".a.b"', stream.list) assert len(result[0]) == 1 assert result[0][0] == 1 # allow absolute paths as json path result = await cli.execute_cli_command( 'json {"id":"123", "reported":{"b":1}} | jq "./reported"', stream.list) assert result == [[{"b": 1}]] # jq .kind is rewritten as .reported.kind result = await cli.execute_cli_command("search is(foo) limit 2 | jq .kind", stream.list) assert result[0] == ["foo", "foo"]
def test_query() -> None: query = (Query.by( "ec2", P("cpu") > 4, (P("mem") < 23) | (P("mem") < 59)).merge_with( "cloud", Navigation(1, Navigation.Max, direction=Direction.inbound), Query.mk_term("cloud")).traverse_out().filter( P("some.int.value") < 1, P("some.other") == 23).traverse_out().filter( P("active") == 12, P.function("in_subnet").on( "ip", "1.2.3.4/96")).filter_with( WithClause(WithClauseFilter( "==", 0), Navigation())).group_by([ AggregateVariable( AggregateVariableName("foo")) ], [AggregateFunction("sum", "cpu")]).add_sort( Sort("test", "asc")).with_limit(10)) assert str(query) == ( 'aggregate(foo: sum(cpu)):((is("ec2") and cpu > 4) and (mem < 23 or mem < 59)) ' '{cloud: all <-default[1:]- is("cloud")} -default-> ' "(some.int.value < 1 and some.other == 23) -default-> " '(active == 12 and in_subnet(ip, "1.2.3.4/96")) ' "with(empty, -default->) sort test asc limit 10") assert_round_trip(query_parser, query)
async def load_graph(db: GraphDB, model: Model, base_id: str = "sub_root") -> MultiDiGraph: blas = Query.by("foo", P("identifier") == base_id).traverse_out( 0, Navigation.Max) return await db.search_graph(QueryModel(blas.on_section("reported"), model))
async def test_query_not(filled_graph_db: ArangoGraphDB, foo_model: Model) -> None: # select everything that is not foo --> should be blas blas = Query.by(Query.mk_term("foo").not_term()) async with await filled_graph_db.search_list( QueryModel(blas.on_section("reported"), foo_model)) as gen: result = [from_js(x["reported"], Bla) async for x in gen] assert len(result) == 102
def test_simplify() -> None: # some_criteria | all => all assert str((IsTerm(["test"]) | AllTerm())) == "all" # some_criteria & all => some_criteria assert str((IsTerm(["test"]) & AllTerm())) == 'is("test")' # also works in nested setup q = Query.by(AllTerm() & ((P("test") == True) & (IsTerm(["test"]) | AllTerm()))) assert (str(q)) == "test == true"
def test_ip_range() -> None: bind_vars: Json = {} model = QueryModel( Query.by(IsTerm(["foo"])).on_section("reported"), Model.empty()) result = in_subnet( "crs", bind_vars, FunctionTerm("in_subnet", "foo.bla", ["192.168.1.0/24"]), model) assert result == "BIT_AND(IPV4_TO_NUMBER(crs.foo.bla), 4294967040) == @0" assert bind_vars["0"] == 3232235776
def test_sort_order() -> None: assert sort_parser.parse("sort foo") == [Sort("foo", "asc")] assert sort_parser.parse("sort foo asc") == [Sort("foo", "asc")] parsed = sort_parser.parse("sort foo asc, bla desc, bar") assert parsed == [ Sort("foo", "asc"), Sort("bla", "desc"), Sort("bar", "asc") ] assert_round_trip(query_parser, Query.by("test").add_sort("test").add_sort("goo"))
def test_has_key() -> None: bind_vars: Json = {} model = QueryModel(Query.by("foo"), Model.empty()) result = has_key("crs", bind_vars, FunctionTerm("has_key", "foo.bla", [["a", "b", "c"]]), model) assert result == "@fn0 ALL IN ATTRIBUTES(crs.foo.bla, true)" assert bind_vars["fn0"] == ["a", "b", "c"] bind_vars2: Json = {} result = has_key("crs", bind_vars2, FunctionTerm("has_key", "foo.bla", ["a"]), model) assert result == "HAS(crs.foo.bla, @fn0)" assert bind_vars2["fn0"] == "a"
def test_simple_query() -> None: a = (Query.by("ec2", P("cpu") > 4, (P("mem") < 23) | (P("mem") < 59)).traverse_out().filter( P("some.int.value") < 1, P("some.other") == 23).traverse_out().filter( P("active") == 12, P.function("in_subnet").on("ip", "1.2.3.4/32"))) assert (str(a) == '((is("ec2") and cpu > 4) and (mem < 23 or mem < 59)) -default-> ' "(some.int.value < 1 and some.other == 23) -default-> " '(active == 12 and in_subnet(ip, "1.2.3.4/32"))')
async def test_query_graph(filled_graph_db: ArangoGraphDB, foo_model: Model) -> None: graph = await load_graph(filled_graph_db, foo_model) assert len(graph.edges) == 110 assert len(graph.nodes.values()) == 111 # filter data and tag result, and then traverse to the end of the graph in both directions around_me = Query.by( "foo", P("identifier") == "9").tag("red").traverse_inout(start=0) graph = await filled_graph_db.search_graph( QueryModel(around_me.on_section("reported"), foo_model)) assert len({x for x in graph.nodes}) == 12 assert GraphAccess.root_id(graph) == "sub_root" assert list(graph.successors("sub_root"))[0] == "9" assert set(graph.successors("9")) == {f"9_{x}" for x in range(0, 10)} for from_node, to_node, data in graph.edges.data(True): assert from_node == "9" or to_node == "9" assert data == {"edge_type": "default"} for node_id, node in graph.nodes.data(True): if node_id == "9": assert node["metadata"]["query_tag"] == "red" else: assert "tag" not in node["metadata"] async def assert_result(query: str, nodes: int, edges: int) -> None: q = parse_query(query) graph = await filled_graph_db.search_graph(QueryModel(q, foo_model)) assert len(graph.nodes) == nodes assert len(graph.edges) == edges await assert_result( "is(foo) and reported.identifier==9 <-delete[0:]default->", 11, 20) await assert_result( "is(foo) and reported.identifier==9 <-default[0:]delete->", 4, 3) await assert_result("is(foo) and reported.identifier==9 <-default[0:]->", 14, 13) await assert_result("is(foo) and reported.identifier==9 <-delete[0:]->", 11, 10) await assert_result("is(foo) and reported.identifier==9 -default[0:]->", 11, 10) await assert_result("is(foo) and reported.identifier==9 <-delete[0:]-", 11, 10) await assert_result("is(foo) and reported.identifier==9 <-default[0:]-", 4, 3) await assert_result("is(foo) and reported.identifier==9 -delete[0:]->", 1, 0)
async def test_query_list(filled_graph_db: ArangoGraphDB, foo_model: Model) -> None: blas = Query.by("foo", P("identifier") == "9").traverse_out().filter( "bla", P("f") == 23) async with await filled_graph_db.search_list( QueryModel(blas.on_section("reported"), foo_model)) as gen: result = [from_js(x["reported"], Bla) async for x in gen] assert len(result) == 10 foos_or_blas = parse_query("is([foo, bla])") async with await filled_graph_db.search_list( QueryModel(foos_or_blas.on_section("reported"), foo_model)) as gen: result = [x async for x in gen] assert len(result) == 111 # 113 minus 1 graph_root, minus one cloud
def test_query() -> None: query = (Query.by( "ec2", P("cpu") > 4, (P("mem") < 23) | (P("mem") < 59), preamble={ "merge_with_ancestors": "cloud" }).traverse_out().filter( P("some.int.value") < 1, P("some.other") == 23).traverse_out().filter( P("active") == 12, P.function("in_subnet").on("ip", "1.2.3.4/96")).filter_with( WithClause(WithClauseFilter( "==", 0), Navigation())).group_by( [AggregateVariable(AggregateVariableName("foo"))], [AggregateFunction("sum", "cpu")]).add_sort( "test", "asc").with_limit(10)) assert ( str(query) == 'aggregate(foo: sum(cpu))(merge_with_ancestors="cloud"):' + '((is("ec2") and cpu > 4) and (mem < 23 or mem < 59)) -default-> ' + "(some.int.value < 1 and some.other == 23) -default-> " + '(active == 12 and in_subnet(ip, "1.2.3.4/96")) ' + "with(empty, -default->) sort test asc limit 10") assert_round_trip(query_parser, query)
async def create_query( self, commands: List[ExecutableCommand], ctx: CLIContext ) -> Tuple[Query, Dict[str, Any], List[ExecutableCommand]]: """ Takes a list of query part commands and combine them to a single executable query command. This process can also introduce new commands that should run after the query is finished. Therefore, a list of executable commands is returned. :param commands: the incoming executable commands, which actions are all instances of SearchCLIPart. :param ctx: the context to execute within. :return: the resulting list of commands to execute. """ # Pass parsed options to execute query # Multiple query commands are possible - so the dict is combined with every parsed query. parsed_options: Dict[str, Any] = {} async def parse_query(query_arg: str) -> Query: nonlocal parsed_options parsed, query_part = ExecuteSearchCommand.parse_known(query_arg) parsed_options = {**parsed_options, **parsed} # section expansion is disabled here: it will happen on the final query after all parts have been combined return await self.dependencies.template_expander.parse_query( "".join(query_part), None, omit_section_expansion=True, **ctx.env ) query: Query = Query.by(AllTerm()) additional_commands: List[ExecutableCommand] = [] # We need to remember the first head/tail, since tail will reverse the sort order first_head_tail_in_a_row: Optional[CLICommand] = None head_tail_keep_order = True for command in commands: part = command.command arg = command.arg if command.arg else "" if isinstance(part, SearchPart): query = query.combine(await parse_query(arg)) elif isinstance(part, PredecessorsPart): origin, edge = PredecessorsPart.parse_args(arg, ctx) query = query.traverse_in(origin, 1, edge) elif isinstance(part, SuccessorsPart): origin, edge = PredecessorsPart.parse_args(arg, ctx) query = query.traverse_out(origin, 1, edge) elif isinstance(part, AncestorsPart): origin, edge = PredecessorsPart.parse_args(arg, ctx) query = query.traverse_in(origin, Navigation.Max, edge) elif isinstance(part, DescendantsPart): origin, edge = PredecessorsPart.parse_args(arg, ctx) query = query.traverse_out(origin, Navigation.Max, edge) elif isinstance(part, AggregatePart): group_vars, group_function_vars = aggregate_parameter_parser.parse(arg) query = replace(query, aggregate=Aggregate(group_vars, group_function_vars)) elif isinstance(part, CountCommand): # count command followed by a query: make it an aggregation # since the output of aggregation is not exactly the same as count # we also add the aggregate_to_count command after the query assert query.aggregate is None, "Can not combine aggregate and count!" group_by = [AggregateVariable(AggregateVariableName(arg), "name")] if arg else [] aggregate = Aggregate(group_by, [AggregateFunction("sum", 1, [], "count")]) # If the query should be explained, we want the output as is if "explain" not in parsed_options: additional_commands.append(self.command("aggregate_to_count", None, ctx)) query = replace(query, aggregate=aggregate) query = query.add_sort(f"{PathRoot}count") elif isinstance(part, HeadCommand): size = HeadCommand.parse_size(arg) limit = query.parts[0].limit or Limit(0, size) if first_head_tail_in_a_row and head_tail_keep_order: query = query.with_limit(Limit(limit.offset, min(limit.length, size))) elif first_head_tail_in_a_row and not head_tail_keep_order: length = min(limit.length, size) query = query.with_limit(Limit(limit.offset + limit.length - length, length)) else: query = query.with_limit(size) elif isinstance(part, TailCommand): size = HeadCommand.parse_size(arg) limit = query.parts[0].limit or Limit(0, size) if first_head_tail_in_a_row and head_tail_keep_order: query = query.with_limit(Limit(limit.offset + max(0, limit.length - size), min(limit.length, size))) elif first_head_tail_in_a_row and not head_tail_keep_order: query = query.with_limit(Limit(limit.offset, min(limit.length, size))) else: head_tail_keep_order = False query = query.with_limit(size) p = query.current_part # the limit might have created a new part - make sure there is a sort order p = p if p.sort else replace(p, sort=DefaultSort) # reverse the sort order -> limit -> reverse the result query.parts[0] = replace(p, sort=[s.reversed() for s in p.sort], reverse_result=True) else: raise AttributeError(f"Do not understand: {part} of type: {class_fqn(part)}") # Remember the first head tail in a row of head tails if isinstance(part, (HeadCommand, TailCommand)): if not first_head_tail_in_a_row: first_head_tail_in_a_row = part else: first_head_tail_in_a_row = None head_tail_keep_order = True # Define default sort order, if not already defined # A sort order is required to always return the result in a deterministic way to the user. # Deterministic order is required for head/tail to work parts = [pt if pt.sort else replace(pt, sort=DefaultSort) for pt in query.parts] query = replace(query, parts=parts) # If the last part is a navigation, we need to add sort which will ingest a new part. with_sort = query.set_sort(DefaultSort) if query.current_part.navigation else query # When all parts are combined, interpret the result on defined section. final_query = with_sort.on_section(ctx.env.get("section", PathRoot)) options = ExecuteSearchCommand.argument_string(parsed_options) query_string = str(final_query) execute_search = self.command("execute_search", options + query_string, ctx) return final_query, parsed_options, [execute_search, *additional_commands]
def test_marshal_query() -> None: q = Query.by("ec2", P("foo") > 23, P("test") >= "bummer", P("das") < "set") again = parse_query(str(q)) assert str(q) == str(again)
def test_limit() -> None: assert limit_parser.parse("limit 23") == Limit(0, 23) assert limit_parser.parse("limit 3, 23") == Limit(3, 23) assert_round_trip(query_parser, Query.by("test").with_limit(23))
def simple_reference() -> None: # only kind Query.by("ec2") # equality Query.by(P.of_kind("ec2") & (P("simple") == "hallo")) Query.by(P.of_kind("ec2") & (P("simple") != "hallo")) # regex Query.by(P.of_kind("ec2") & P("simple").matches("^some.regex[a-d]+$")) Query.by(P.of_kind("ec2") & P("simple").not_matches("^some.regex[a-d]+$")) # comparator Query.by(P.of_kind("ec2") & (P("num") > 23)) Query.by(P.of_kind("ec2") & (P("num") >= 23)) Query.by(P.of_kind("ec2") & (P("num") == 23)) Query.by(P.of_kind("ec2") & (P("num") <= 23)) Query.by(P.of_kind("ec2") & (P("num") < 23)) # in set Query.by(P.of_kind("ec2") & P("num").is_in([1, 2, 5])) Query.by(P.of_kind("ec2") & P("num").is_not_in([1, 2, 5])) # array: all above operators are available Query.by(P.of_kind("ec2") & (P.array("some.array").for_all() > 12.23)) Query.by( P.of_kind("ec2") & (P.array("some.array").for_any().is_in([1, 2, 3]))) Query.by(P.of_kind("ec2") & (P.array("some.array").for_none() == 5)) # call a function Query.by(P.function("in_subnet").on("ip", "1.2.3.4/16")) # refine with multiple predicates (all predicates have to match) Query.by( P.of_kind("ec2") & P("a").ge(1), P("b") == 2, P("c").matches("aaa"))