Exemplo n.º 1
0
 def test_event_load(self):
     """Test that events can be loaded from valid data buffers or full json."""
     event_type = 'process'
     event_time = 131509374020000000
     event_data = {
         "command_line": "\\??\\C:\\Windows\\system32\\conhost.exe",
         "event_type_full": "process_event",
         "parent_process_name": "csrss.exe",
         "parent_process_path": "C:\\Windows\\System32\\csrss.exe",
         "pid": 3080,
         "ppid": 372,
         "process_name": "conhost.exe",
         "process_path": "C:\\Windows\\System32\\conhost.exe",
         "serial_event_id": 49,
         "timestamp": 131509374020000000,
         "user_domain": "vagrant",
         "user_name": "vagrant",
     }
     expected_event = Event(event_type, event_time, event_data)
     from_full_event = Event.from_data({
         'event_timestamp': event_time,
         'event_type': 4,
         'data_buffer': event_data
     })
     from_buffer = Event.from_data(event_data)
     self.assertEqual(from_full_event, expected_event,
                      "Full event didn't load properly")
     self.assertEqual(from_buffer, expected_event,
                      "Event buffer didn't load properly")
Exemplo n.º 2
0
    def test_nested_data(self):
        """Test that highly structured is also searchable."""
        event_1 = {
            'top': [{
                'middle': {
                    'abc': 0
                }
            }, {
                'middle2': ['def', 'ghi']
            }]
        }
        event_2 = {
            'top': [{
                'middle': {
                    'abc': 123
                }
            }, {
                'middle2': ['tuv', 'wxyz']
            }]
        }
        events = [
            Event(EVENT_TYPE_GENERIC, 1, event_1),
            Event(EVENT_TYPE_GENERIC, 2, event_2)
        ]

        query = parse_query('generic where top[0].middle.abc == 123')
        results = self.get_output(queries=[query],
                                  events=events,
                                  config={'flatten': True})
        self.assertEqual(len(results), 1, "Missing or extra results")
        self.assertEqual(results[0].data, event_2,
                         "Failed to match on correct event")
Exemplo n.º 3
0
 def test_post_processor(self):
     """Test that post-processing of analytic results works."""
     data = [Event.from_data({'num': i}) for i in range(100)]
     query = '| head 10'
     processor = get_post_processor(query, {'flatten': True})
     results = processor(data)
     self.validate_results(results, data[:10], query)
Exemplo n.º 4
0
            def count_tuple_callback(events):  # type: (list[Event]) -> None
                if events is PIPE_EOF:
                    # This may seem a little tricky, but we need to effectively learn the type(s) to perform comparison
                    # Python 3 doesn't allow you to use a key function that returns various types
                    converter = get_type_converter(count_table)
                    converted_count_table = {
                        converter(k): v
                        for k, v in count_table.items()
                    }
                    total = sum(tbl['count'] for tbl in count_table.values())

                    for key, details in sorted(converted_count_table.items(),
                                               key=lambda kv:
                                               (kv[1]['count'], kv[0])):
                        hosts = details.pop('hosts')
                        if len(hosts):
                            details['hosts'] = list(sorted(hosts))
                            details['total_hosts'] = len(hosts)

                        details['key'] = key
                        details['percent'] = float(details['count']) / total
                        next_pipe([Event(EVENT_TYPE_GENERIC, 0, details)])
                    next_pipe(PIPE_EOF)
                else:
                    key = get_key(events)

                    count_table[key]['count'] += 1
                    if host_key in events[0].data:
                        count_table[key]['hosts'].add(events[0].data[host_key])
Exemplo n.º 5
0
            def count_tuple_callback(events):  # type: (list[Event]) -> None
                if events is PIPE_EOF:
                    converter = get_type_converter(results)
                    converted_results = {
                        converter(k): v
                        for k, v in results.items()
                    }

                    total = sum(result['count']
                                for result in converted_results.values())

                    for key, result in sorted(converted_results.items(),
                                              key=lambda kr:
                                              (kr[1]['count'], kr[0])):
                        hosts = result.pop('hosts')  # type: set
                        if len(hosts) > 0:
                            result['hosts'] = list(sorted(hosts))
                            result['total_hosts'] = len(hosts)
                        result['key'] = key
                        result['percent'] = float(result['count']) / total
                        next_pipe([Event(EVENT_TYPE_GENERIC, 0, result)])
                    next_pipe(PIPE_EOF)
                else:
                    piece = events[0].data
                    key = events[0].data['key']
                    key = tuple(key) if len(node.arguments) > 1 else key
                    results[key]['count'] += piece['count']
                    if host_key in piece:
                        results[key]['hosts'].add(piece[host_key])
                    elif 'hosts' in piece:
                        results[key]['hosts'].update(piece['hosts'])
Exemplo n.º 6
0
 def stream_events(self, events, finalize=True):
     """Stream :class:`~Event` objects through the engine."""
     for event in events:
         if not isinstance(event, Event):
             event = Event.from_data(event)
         self.stream_event(event)
     if finalize:
         self.finalize()
Exemplo n.º 7
0
    def get_events(cls):
        """Get output events from test_data.json."""
        if cls._events is None:

            with open(cls.EVENTS_FILE, "r") as f:
                data = json.load(f)
            cls._events = [Event.from_data(d) for d in data]
        return cls._events
Exemplo n.º 8
0
            def count_total_callback(events):
                if events is PIPE_EOF:
                    if len(hosts):
                        summary['total_hosts'] = len(hosts)
                        summary['hosts'] = list(sorted(hosts))

                    next_pipe([Event(EVENT_TYPE_GENERIC, 0, summary)])
                    next_pipe(PIPE_EOF)
                else:
                    summary['count'] += 1
                    if host_key in events[0].data:
                        hosts.add(events[0].data[host_key])
Exemplo n.º 9
0
            def count_total_aggregates(events):  # type: (list[Event]) -> None
                if events is PIPE_EOF:
                    hosts = result.pop('hosts')  # type: set
                    if len(hosts) > 0:
                        result['hosts'] = list(sorted(hosts))
                        result['total_hosts'] = len(hosts)

                    next_pipe([Event(EVENT_TYPE_GENERIC, 0, result)])
                    next_pipe(PIPE_EOF)
                else:
                    piece = events[0].data
                    result['count'] += piece['count']

                    if host_key in piece:
                        result['hosts'].add(piece[host_key])
                    elif 'hosts' in piece:
                        results['hosts'].update(piece['hosts'])
Exemplo n.º 10
0
    def test_map_reduce_analytics(self):
        """Test map reduce functionality of python engines."""
        input_events = defaultdict(list)
        host_results = []

        for i, host in enumerate("abcdefghijklmnop"):
            events = []
            for event_number in range(10):
                data = {
                    'number': event_number,
                    'a': host + '-a-' + str(event_number),
                    'b': -event_number
                }
                events.append(Event.from_data(data))
            input_events[host] = events

        query_text = 'generic where true | sort a | head 5 | sort b'
        analytic = parse_analytic({
            'query': query_text,
            'metadata': {
                'id': 'test-analytic'
            }
        })
        host_engine = get_engine(analytic)

        # Map across multiple 'hosts'
        for hostname, host_events in input_events.items():
            for result in host_engine(host_events):  # type: AnalyticOutput
                for event in result.events:
                    event.data['hostname'] = hostname
                host_results.append(result)

        # Reduce across multiple 'hosts'
        reducer = get_reducer(analytic)
        reduced_results = reducer(host_results)

        expected_a = ['a-a-{}'.format(value) for value in range(10)][:5][::-1]
        actual_a = [
            event.data['a'] for result in reduced_results
            for event in result.events
        ]
        self.validate_results(actual_a, expected_a, query_text)
Exemplo n.º 11
0
    def test_aggregate_multiple_key_counts(self):
        """Test that counts are aggregated correctly with multiple keys."""
        input_results = [
            ('host1', ['key1', 'key2', 'key3'], 2),
            ('host2', ['key1', 'key2', 'key3'], 4),
            ('host3', ['key1', 'key2', 'key3'], 2),
            ('host4', ['key1', 'key2', 'key5'], 7),
            ('host5', ['key1', 'key2', 'key5'], 9),
            ('host2', ['key2', 'key3', 'key4'], 5),
            ('host1', ['key4', 'key2', 'key5'], 3),
        ]

        random.shuffle(input_results)
        input_counts = [
            Event.from_data({
                'hostname': h,
                'key': k,
                'count': c
            }) for h, k, c in input_results
        ]

        expected_counts = [
            (('key4', 'key2', 'key5'), ['host1'], 3),
            (('key2', 'key3', 'key4'), ['host2'], 5),
            (('key1', 'key2', 'key3'), ['host1', 'host2', 'host3'], 2 + 4 + 2),
            (('key1', 'key2', 'key5'), ['host4', 'host5'], 7 + 9),
        ]

        reducer = get_reducer('| count a b c', config={'flatten': True})
        reduced_counts = reducer(input_counts)

        self.assertEqual(len(expected_counts), len(reduced_counts))
        for (key, hosts, count), event in zip(expected_counts, reduced_counts):
            data = event.data  # type: dict
            self.assertEqual(key, data['key'])
            self.assertEqual(hosts, data['hosts'])
            self.assertEqual(len(hosts), data['total_hosts'])
            self.assertEqual(count, data['count'])
Exemplo n.º 12
0
    def reduce_events(self, inputs, analytic_id=None, finalize=True):
        """Run an event through the reducers registered with :meth:`~add_reducer` and :meth:`~add_post_processor`.

        :param AnalyticOutput|Event|dict inputs: Mapped results to reduce
        :param str analytic_id: Optional analytic id to add to generated AnalyticOutput results
        :param bool finalize: Send the finalize signal when input is exhausted.
        """
        for data in inputs:
            if isinstance(data, AnalyticOutput):
                analytic_id = data.analytic_id or analytic_id
                events = data.events
            elif isinstance(data, Event):
                events = [data]
            elif isinstance(data, dict):
                events = [Event.from_data(data)]
            else:
                raise ValueError("Unable to reduce {}".format(data))

            for reducer in self._reducer_hooks[analytic_id]:
                reducer(events)

        if finalize:
            self.finalize()
Exemplo n.º 13
0
    def test_relationship_pid_collision(self):
        """Confirm that the field used for tracking lineage can be dynamically set."""
        config = {
            'flatten': True,
            'pid_key': 'unique_pid',
            'ppid_key': 'unique_ppid'
        }
        events = [
            Event.from_data(d) for d in [{
                "event_type": "process",
                "pid": 1001,
                "ppid": 1000,
                "unique_pid": "host1-1001",
                "unique_ppid": "host1-1000",
                "process_name": "explorer.exe",
                "subtype": "create"
            }, {
                "event_type": "process",
                "pid": 1002,
                "ppid": 1001,
                "unique_pid": "host1-1002",
                "unique_ppid": "host1-1001",
                "process_name": "powershell.exe",
                "subtype": "create"
            }, {
                "event_type": "process",
                "pid": 1003,
                "ppid": 1002,
                "unique_pid": "host1-1003",
                "unique_ppid": "host1-1002",
                "process_name": "whoami.exe",
                "subtype": "create"
            }, {
                "event_type": "process",
                "pid": 1001,
                "ppid": 1000,
                "unique_pid": "host2-1001",
                "unique_ppid": "host2-1000",
                "process_name": "explorer.exe",
                "subtype": "create"
            }, {
                "event_type": "process",
                "pid": 1002,
                "ppid": 1001,
                "unique_pid": "host2-1002",
                "unique_ppid": "host2-1001",
                "process_name": "cmd.exe",
                "subtype": "create"
            }, {
                "event_type": "process",
                "pid": 1003,
                "ppid": 1002,
                "unique_pid": "host2-1003",
                "unique_ppid": "host2-1002",
                "process_name": "whoami.exe",
                "subtype": "create"
            }]
        ]

        query = "process where child of [process where process_name == 'powershell.exe']"
        output = self.get_output(queries=[parse_query(query)],
                                 config=config,
                                 events=events)
        event_ids = [event.data['unique_pid'] for event in output]
        self.validate_results(event_ids, ['host1-1003'],
                              "Relationships failed due to pid collision")