def test_event_load(self): """Test that events can be loaded from valid data buffers or full json.""" event_type = 'process' event_time = 131509374020000000 event_data = { "command_line": "\\??\\C:\\Windows\\system32\\conhost.exe", "event_type_full": "process_event", "parent_process_name": "csrss.exe", "parent_process_path": "C:\\Windows\\System32\\csrss.exe", "pid": 3080, "ppid": 372, "process_name": "conhost.exe", "process_path": "C:\\Windows\\System32\\conhost.exe", "serial_event_id": 49, "timestamp": 131509374020000000, "user_domain": "vagrant", "user_name": "vagrant", } expected_event = Event(event_type, event_time, event_data) from_full_event = Event.from_data({ 'event_timestamp': event_time, 'event_type': 4, 'data_buffer': event_data }) from_buffer = Event.from_data(event_data) self.assertEqual(from_full_event, expected_event, "Full event didn't load properly") self.assertEqual(from_buffer, expected_event, "Event buffer didn't load properly")
def test_nested_data(self): """Test that highly structured is also searchable.""" event_1 = { 'top': [{ 'middle': { 'abc': 0 } }, { 'middle2': ['def', 'ghi'] }] } event_2 = { 'top': [{ 'middle': { 'abc': 123 } }, { 'middle2': ['tuv', 'wxyz'] }] } events = [ Event(EVENT_TYPE_GENERIC, 1, event_1), Event(EVENT_TYPE_GENERIC, 2, event_2) ] query = parse_query('generic where top[0].middle.abc == 123') results = self.get_output(queries=[query], events=events, config={'flatten': True}) self.assertEqual(len(results), 1, "Missing or extra results") self.assertEqual(results[0].data, event_2, "Failed to match on correct event")
def test_post_processor(self): """Test that post-processing of analytic results works.""" data = [Event.from_data({'num': i}) for i in range(100)] query = '| head 10' processor = get_post_processor(query, {'flatten': True}) results = processor(data) self.validate_results(results, data[:10], query)
def count_tuple_callback(events): # type: (list[Event]) -> None if events is PIPE_EOF: # This may seem a little tricky, but we need to effectively learn the type(s) to perform comparison # Python 3 doesn't allow you to use a key function that returns various types converter = get_type_converter(count_table) converted_count_table = { converter(k): v for k, v in count_table.items() } total = sum(tbl['count'] for tbl in count_table.values()) for key, details in sorted(converted_count_table.items(), key=lambda kv: (kv[1]['count'], kv[0])): hosts = details.pop('hosts') if len(hosts): details['hosts'] = list(sorted(hosts)) details['total_hosts'] = len(hosts) details['key'] = key details['percent'] = float(details['count']) / total next_pipe([Event(EVENT_TYPE_GENERIC, 0, details)]) next_pipe(PIPE_EOF) else: key = get_key(events) count_table[key]['count'] += 1 if host_key in events[0].data: count_table[key]['hosts'].add(events[0].data[host_key])
def count_tuple_callback(events): # type: (list[Event]) -> None if events is PIPE_EOF: converter = get_type_converter(results) converted_results = { converter(k): v for k, v in results.items() } total = sum(result['count'] for result in converted_results.values()) for key, result in sorted(converted_results.items(), key=lambda kr: (kr[1]['count'], kr[0])): hosts = result.pop('hosts') # type: set if len(hosts) > 0: result['hosts'] = list(sorted(hosts)) result['total_hosts'] = len(hosts) result['key'] = key result['percent'] = float(result['count']) / total next_pipe([Event(EVENT_TYPE_GENERIC, 0, result)]) next_pipe(PIPE_EOF) else: piece = events[0].data key = events[0].data['key'] key = tuple(key) if len(node.arguments) > 1 else key results[key]['count'] += piece['count'] if host_key in piece: results[key]['hosts'].add(piece[host_key]) elif 'hosts' in piece: results[key]['hosts'].update(piece['hosts'])
def stream_events(self, events, finalize=True): """Stream :class:`~Event` objects through the engine.""" for event in events: if not isinstance(event, Event): event = Event.from_data(event) self.stream_event(event) if finalize: self.finalize()
def get_events(cls): """Get output events from test_data.json.""" if cls._events is None: with open(cls.EVENTS_FILE, "r") as f: data = json.load(f) cls._events = [Event.from_data(d) for d in data] return cls._events
def count_total_callback(events): if events is PIPE_EOF: if len(hosts): summary['total_hosts'] = len(hosts) summary['hosts'] = list(sorted(hosts)) next_pipe([Event(EVENT_TYPE_GENERIC, 0, summary)]) next_pipe(PIPE_EOF) else: summary['count'] += 1 if host_key in events[0].data: hosts.add(events[0].data[host_key])
def count_total_aggregates(events): # type: (list[Event]) -> None if events is PIPE_EOF: hosts = result.pop('hosts') # type: set if len(hosts) > 0: result['hosts'] = list(sorted(hosts)) result['total_hosts'] = len(hosts) next_pipe([Event(EVENT_TYPE_GENERIC, 0, result)]) next_pipe(PIPE_EOF) else: piece = events[0].data result['count'] += piece['count'] if host_key in piece: result['hosts'].add(piece[host_key]) elif 'hosts' in piece: results['hosts'].update(piece['hosts'])
def test_map_reduce_analytics(self): """Test map reduce functionality of python engines.""" input_events = defaultdict(list) host_results = [] for i, host in enumerate("abcdefghijklmnop"): events = [] for event_number in range(10): data = { 'number': event_number, 'a': host + '-a-' + str(event_number), 'b': -event_number } events.append(Event.from_data(data)) input_events[host] = events query_text = 'generic where true | sort a | head 5 | sort b' analytic = parse_analytic({ 'query': query_text, 'metadata': { 'id': 'test-analytic' } }) host_engine = get_engine(analytic) # Map across multiple 'hosts' for hostname, host_events in input_events.items(): for result in host_engine(host_events): # type: AnalyticOutput for event in result.events: event.data['hostname'] = hostname host_results.append(result) # Reduce across multiple 'hosts' reducer = get_reducer(analytic) reduced_results = reducer(host_results) expected_a = ['a-a-{}'.format(value) for value in range(10)][:5][::-1] actual_a = [ event.data['a'] for result in reduced_results for event in result.events ] self.validate_results(actual_a, expected_a, query_text)
def test_aggregate_multiple_key_counts(self): """Test that counts are aggregated correctly with multiple keys.""" input_results = [ ('host1', ['key1', 'key2', 'key3'], 2), ('host2', ['key1', 'key2', 'key3'], 4), ('host3', ['key1', 'key2', 'key3'], 2), ('host4', ['key1', 'key2', 'key5'], 7), ('host5', ['key1', 'key2', 'key5'], 9), ('host2', ['key2', 'key3', 'key4'], 5), ('host1', ['key4', 'key2', 'key5'], 3), ] random.shuffle(input_results) input_counts = [ Event.from_data({ 'hostname': h, 'key': k, 'count': c }) for h, k, c in input_results ] expected_counts = [ (('key4', 'key2', 'key5'), ['host1'], 3), (('key2', 'key3', 'key4'), ['host2'], 5), (('key1', 'key2', 'key3'), ['host1', 'host2', 'host3'], 2 + 4 + 2), (('key1', 'key2', 'key5'), ['host4', 'host5'], 7 + 9), ] reducer = get_reducer('| count a b c', config={'flatten': True}) reduced_counts = reducer(input_counts) self.assertEqual(len(expected_counts), len(reduced_counts)) for (key, hosts, count), event in zip(expected_counts, reduced_counts): data = event.data # type: dict self.assertEqual(key, data['key']) self.assertEqual(hosts, data['hosts']) self.assertEqual(len(hosts), data['total_hosts']) self.assertEqual(count, data['count'])
def reduce_events(self, inputs, analytic_id=None, finalize=True): """Run an event through the reducers registered with :meth:`~add_reducer` and :meth:`~add_post_processor`. :param AnalyticOutput|Event|dict inputs: Mapped results to reduce :param str analytic_id: Optional analytic id to add to generated AnalyticOutput results :param bool finalize: Send the finalize signal when input is exhausted. """ for data in inputs: if isinstance(data, AnalyticOutput): analytic_id = data.analytic_id or analytic_id events = data.events elif isinstance(data, Event): events = [data] elif isinstance(data, dict): events = [Event.from_data(data)] else: raise ValueError("Unable to reduce {}".format(data)) for reducer in self._reducer_hooks[analytic_id]: reducer(events) if finalize: self.finalize()
def test_relationship_pid_collision(self): """Confirm that the field used for tracking lineage can be dynamically set.""" config = { 'flatten': True, 'pid_key': 'unique_pid', 'ppid_key': 'unique_ppid' } events = [ Event.from_data(d) for d in [{ "event_type": "process", "pid": 1001, "ppid": 1000, "unique_pid": "host1-1001", "unique_ppid": "host1-1000", "process_name": "explorer.exe", "subtype": "create" }, { "event_type": "process", "pid": 1002, "ppid": 1001, "unique_pid": "host1-1002", "unique_ppid": "host1-1001", "process_name": "powershell.exe", "subtype": "create" }, { "event_type": "process", "pid": 1003, "ppid": 1002, "unique_pid": "host1-1003", "unique_ppid": "host1-1002", "process_name": "whoami.exe", "subtype": "create" }, { "event_type": "process", "pid": 1001, "ppid": 1000, "unique_pid": "host2-1001", "unique_ppid": "host2-1000", "process_name": "explorer.exe", "subtype": "create" }, { "event_type": "process", "pid": 1002, "ppid": 1001, "unique_pid": "host2-1002", "unique_ppid": "host2-1001", "process_name": "cmd.exe", "subtype": "create" }, { "event_type": "process", "pid": 1003, "ppid": 1002, "unique_pid": "host2-1003", "unique_ppid": "host2-1002", "process_name": "whoami.exe", "subtype": "create" }] ] query = "process where child of [process where process_name == 'powershell.exe']" output = self.get_output(queries=[parse_query(query)], config=config, events=events) event_ids = [event.data['unique_pid'] for event in output] self.validate_results(event_ids, ['host1-1003'], "Relationships failed due to pid collision")