def render_analytic(analytic, engine_type, analytics_only=False, config=None): """Render a full script for an EQL analytic. :param dict|EqlAnalytic analytic: The analytic object in AST or dictionary form :param str engine_type: The target file extension :param analytics_only: Render the converted analytics without including the EQL core :param dict config: An optional engine configuration """ if not isinstance(analytic, EqlAnalytic): analytic = parse_analytic(analytic) return render_analytics([analytic], engine_type, config=config, analytics_only=analytics_only)
def get_analytic(cls, query_text): """Get a cached EQL analytic.""" if query_text not in cls._query_cache: analytic_info = { 'metadata': { 'id': 'query-{:d}'.format(len(cls._query_cache)), 'name': query_text, 'analytic_version': '1.0.0' }, 'query': query_text } cls._query_cache[query_text] = parse_analytic(analytic_info) return cls._query_cache[query_text]
def get_analytic(cls, query_text): """Get a cached EQL analytic.""" with cls.schema: if query_text not in cls.query_cache: analytic_info = { 'metadata': { 'id': 'query-{:d}'.format(len(cls.query_cache)), 'name': query_text }, 'query': query_text } cls.query_cache[query_text] = parse_analytic(analytic_info) return cls.query_cache[query_text]
def test_engine_schema(self): """Test loading the engine with a custom schema.""" queries = [ 'movie where name == "*Breakfast*" and IN_80s(release)', 'person where name == "John Hughes"', ] analytic_dicts = [{'query': q} for q in queries] definitions = """ macro IN_80s(date) date == "*/*/1980" """ config = { 'schema': { 'event_types': { 'movie': 1, 'person': 2 } }, 'definitions': parse_definitions(definitions), 'analytics': analytic_dicts } pp = PreProcessor() pp.add_definitions(config['definitions']) with use_schema(config['schema']): expected = [ parse_analytic(d, preprocessor=pp) for d in analytic_dicts ] engine = BaseEngine(config) with use_schema(engine.schema): engine.add_analytics([parse_analytic(d) for d in analytic_dicts]) self.assertListEqual( engine.analytics, expected, "Analytics were not loaded and expanded properly.")
def test_map_reduce_analytics(self): """Test map reduce functionality of python engines.""" input_events = defaultdict(list) host_results = [] for i, host in enumerate("abcdefghijklmnop"): events = [] for event_number in range(10): data = { 'number': event_number, 'a': host + '-a-' + str(event_number), 'b': -event_number } events.append(Event.from_data(data)) input_events[host] = events query_text = 'generic where true | sort a | head 5 | sort b' analytic = parse_analytic({ 'query': query_text, 'metadata': { 'id': 'test-analytic' } }) host_engine = get_engine(analytic) # Map across multiple 'hosts' for hostname, host_events in input_events.items(): for result in host_engine(host_events): # type: AnalyticOutput for event in result.events: event.data['hostname'] = hostname host_results.append(result) # Reduce across multiple 'hosts' reducer = get_reducer(analytic) reduced_results = reducer(host_results) expected_a = ['a-a-{}'.format(value) for value in range(10)][:5][::-1] actual_a = [ event.data['a'] for result in reduced_results for event in result.events ] self.validate_results(actual_a, expected_a, query_text)
def get_reducer(query, config=None): """Get a reducer to aggregate results from distributed EQL queries. :param str|dict|EqlAnalytic|PipedQuery query: The query text or parsed query :param dict config: The configuration for PythonEngine """ if isinstance(query, dict): query = parse_analytic(query) elif is_string(query): query = parse_query(query, implied_base=True, implied_any=True) def reducer(inputs): results = [] engine = PythonEngine(config) engine.add_reducer(query) engine.add_output_hook(results.append) engine.reduce_events(inputs, finalize=True) return results return reducer
def get_engine(query, config=None): """Run an EQL query or analytic over a list of events and get the results. :param str|dict|EqlAnalytic|PipedQuery query: The query text or parsed query :param dict config: The configuration for PythonEngine """ if isinstance(query, dict): query = parse_analytic(query) elif is_string(query): query = parse_query(query, implied_base=True, implied_any=True) def run_engine(inputs): results = [] engine = PythonEngine(config) if isinstance(query, PipedQuery): engine.add_query(query) else: engine.add_analytic(query) engine.add_output_hook(results.append) engine.stream_events(inputs, finalize=True) return results return run_engine
def test_engine_load(self): """Check that various queries can be converted and loaded into the python engine.""" engine = PythonEngine() engine.add_custom_function('myFn', lambda x, y, z: 100) queries = [ 'process where process_name == "net.exe" and command_line == "* user*.exe"', 'process where command_line == "~!@#$%^&*();\'[]{}\\\\|<>?,./:\\"-= \' "', 'process where \n\n\npid ==\t 4', 'process where process_name in ("net.exe", "cmd.exe", "at.exe")', 'process where command_line == "*.exe *admin*" or command_line == "* a b*"', 'process where pid in (1,2,3,4,5,6,7,8) and abc == 100 and def == 200 and ghi == 300 and jkl == x', 'process where ppid != pid', 'image_load where not x != y', 'image_load where not x == y', 'image_load where not not not not x < y', 'image_load where not x <= y', 'image_load where not x >= y', 'image_load where not x > y', 'process where pid == 4 or pid == 5 or pid == 6 or pid == 7 or pid == 8', 'network where pid == 0 or pid == 4 or (ppid == 0 or ppid = 4) or (abc == defgh) and process_name == "*" ', 'network where pid = 4', 'join \t\t\t[process where process_name == "*"] [ file where file_path == "*"\n]', 'join by pid [process where name == "*"] [file where path == "*"] until [process where opcode == 2]', 'sequence [process where name == "*"] [file where path == "*"] until [process where opcode == 2]', 'sequence by pid [process where name == "*"] [file where path == "*"] until [process where opcode == 2]', 'join [process where process_name == "*"] by process_path [file where file_path == "*"] by image_path', 'sequence [process where process_name == "*"] by process_path [file where file_path == "*"] by image_path', 'sequence by pid [process where process_name == "*"] [file where file_path == "*"]', 'sequence by pid with maxspan=2s [process where process_name == "*" ] [file where file_path == "*"]', 'sequence by pid with maxspan=2sec [process where process_name == "*" ] [file where file_path == "*"]', 'sequence by pid with maxspan=2seconds [process where process_name == "*" ] [file where file_path == "*"]', 'sequence with maxspan=2.5m [process where x == x] by pid [file where file_path == "*"] by ppid', 'sequence by pid with maxspan=2.0h [process where process_name == "*"] [file where file_path == "*"]', 'sequence by pid with maxspan=2.0h [process where process_name == "*"] [file where file_path == "*"]', 'sequence by pid with maxspan=1.0075d [process where process_name == "*"] [file where file_path == "*"]', 'process where descendant of [process where process_name == "lsass.exe"] and process_name == "cmd.exe"', 'dns where pid == 100 | head 100 | tail 50 | unique pid', 'network where pid == 100 | unique command_line | count', 'security where user_domain == "endgame" | count user_name | tail 5', 'process where 1==1 | count user_name, unique_pid, myFn(field2,a,bc)', 'process where 1==1 | unique user_name, myFn(field2,a,bc), field2', 'process where true', 'any where topField.subField[100].subsubField == 0', 'process where true | filter true', 'process where 1==1 | filter abc == def', 'process where 1==1 | filter abc == def and 1 != 2', 'process where 1==1 | count process_name | filter percent > 0.5', 'process where a > 100000000000000000000000000000000', ] for query in queries: # Make sure every query can be converted without raising any exceptions parsed_query = parse_query(query) engine.add_query(parsed_query) # Also try to load it as an analytic parsed_analytic = parse_analytic({ 'metadata': { 'id': uuid.uuid4() }, 'query': query }) engine.add_analytic(parsed_analytic)
def test_output_types(self): """Test that output types are correctly returned from eql.utils.get_output_types.""" query_ast = parse_query("process where true") self.assertEquals(get_output_types(query_ast), ["process"]) query_ast = parse_analytic( {"query": "process where descendant of [file where true]"}) self.assertEquals(get_output_types(query_ast), ["process"]) query_ast = parse_query("file where true | unique pid | head 1") self.assertEquals(get_output_types(query_ast), ["file"]) query_ast = parse_query("file where true | unique_count file_path") self.assertEquals(get_output_types(query_ast), ["file"]) query_ast = parse_query("any where true | unique_count file_path") self.assertEquals(get_output_types(query_ast), ["any"]) query_ast = parse_query("file where true | count") self.assertEquals(get_output_types(query_ast), ["generic"]) query_ast = parse_query("file where true | count process_name") self.assertEquals(get_output_types(query_ast), ["generic"]) query_ast = parse_query(""" sequence [registry where true] [file where true] [process where true] [process where true] [process where true] [network where true] """) self.assertEquals( get_output_types(query_ast), ["registry", "file", "process", "process", "process", "network"]) query_ast = parse_query(""" sequence [registry where true] [file where true] [process where true] [process where true] [process where true] [network where true] | count event_type | head 5 """) self.assertEquals(get_output_types(query_ast), ["generic"]) query_ast = parse_query(""" sequence [registry where true] [file where true] [process where true] [process where true] [process where true] [network where true] | unique events[2].event_type | sort events[1].file_size | head 5 | filter events[4].process_name == 'test.exe' """) self.assertEquals( get_output_types(query_ast), ["registry", "file", "process", "process", "process", "network"])