def testBuffer(self): items = range(1, 11) circular_buffer = bufferlib.CircularBuffer(10) self.assertEqual(len(circular_buffer), 10) self.assertEqual(circular_buffer.size, 10) self.assertTrue(circular_buffer.GetCurrent() is None) for item in items: circular_buffer.Append(item) self.assertEqual(circular_buffer.GetCurrent(), item) self.assertEqual(circular_buffer.size, 10) content = list(circular_buffer) self.assertEqual(items, content) circular_buffer.Append(11) self.assertEqual( [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], list(circular_buffer.Flush())) self.assertIsNone(circular_buffer.GetCurrent()) new_items = range(1, 51) for item in new_items: circular_buffer.Append(item) self.assertEqual(circular_buffer.GetCurrent(), item) self.assertEqual(circular_buffer.size, 10) self.assertEqual(range(41, 51), list(circular_buffer))
def ParseOptions(self, options): """Parses the options and initializes the front-end. Args: options: the command line arguments (instance of argparse.Namespace). Raises: BadConfigOption: if the options are invalid. """ super(PsortFrontend, self).ParseOptions(options) self._output_format = getattr(options, u'output_format', None) if not self._output_format: raise errors.BadConfigOption(u'Missing output format.') if not output_manager.OutputManager.HasOutputClass(self._output_format): raise errors.BadConfigOption( u'Unsupported output format: {0:s}.'.format(self._output_format)) self._output_filename = getattr(options, u'write', None) self._filter_expression = getattr(options, u'filter', None) if self._filter_expression: self._filter_object = filters.GetFilter(self._filter_expression) if not self._filter_object: raise errors.BadConfigOption( u'Invalid filter expression: {0:s}'.format(self._filter_expression)) # Check to see if we need to create a circular buffer. if getattr(options, u'slicer', None): self._slice_size = getattr(options, u'slice_size', 5) self._filter_buffer = bufferlib.CircularBuffer(self._slice_size) self._preferred_language = getattr(options, u'preferred_language', u'en-US')
def testBuffer(self): """Tests the circular buffer.""" items = range(1, 11) circular_buffer = bufferlib.CircularBuffer(10) self.assertEqual(len(circular_buffer), 10) self.assertEqual(circular_buffer.size, 10) current_item = circular_buffer.GetCurrent() self.assertIsNone(current_item) for item in items: circular_buffer.Append(item) current_item = circular_buffer.GetCurrent() self.assertEqual(current_item, item) self.assertEqual(circular_buffer.size, 10) content = list(circular_buffer) self.assertEqual(items, content) circular_buffer.Append(11) expected_items = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11] items = list(circular_buffer.Flush()) self.assertEqual(items, expected_items) self.assertIsNone(circular_buffer.GetCurrent()) items = range(1, 51) for item in items: circular_buffer.Append(item) self.assertEqual(circular_buffer.GetCurrent(), item) self.assertEqual(circular_buffer.size, 10) expected_items = range(41, 51) items = list(circular_buffer) self.assertEqual(items, expected_items)
def ParseOptions(self, options): """Parses the options and initializes the front-end. Args: options: the command line arguments (instance of argparse.Namespace). Raises: BadConfigOption: if the options are invalid. """ super(PsortFrontend, self).ParseOptions(options) output_format = getattr(options, 'output_format', None) if not output_format: raise errors.BadConfigOption(u'Missing output format.') self._output_module_class = output_lib.GetOutputFormatter( output_format) if not self._output_module_class: raise errors.BadConfigOption( u'Invalid output format: {0:s}.'.format(output_format)) self._output_stream = getattr(options, 'write', None) if not self._output_stream: self._output_stream = sys.stdout self._filter_expression = getattr(options, 'filter', None) if self._filter_expression: self._filter_object = filters.GetFilter(self._filter_expression) if not self._filter_object: raise errors.BadConfigOption( u'Invalid filter expression: {0:s}'.format( self._filter_expression)) # Check to see if we need to create a circular buffer. if getattr(options, 'slicer', None): self._slice_size = getattr(options, 'slice_size', 5) self._filter_buffer = bufferlib.CircularBuffer( self._slice_size)
def _ExportEvents( self, storage_reader, output_module, deduplicate_events=True, event_filter=None, time_slice=None, use_time_slicer=False): """Exports events using an output module. Args: storage_reader (StorageReader): storage reader. output_module (OutputModule): output module. deduplicate_events (Optional[bool]): True if events should be deduplicated. event_filter (Optional[EventObjectFilter]): event filter. time_slice (Optional[TimeRange]): time range that defines a time slice to filter events. use_time_slicer (Optional[bool]): True if the 'time slicer' should be used. The 'time slicer' will provide a context of events around an event of interest. """ self._status = definitions.STATUS_INDICATOR_EXPORTING time_slice_buffer = None time_slice_range = None if time_slice: if time_slice.event_timestamp is not None: time_slice_range = storage_time_range.TimeRange( time_slice.start_timestamp, time_slice.end_timestamp) if use_time_slicer: time_slice_buffer = bufferlib.CircularBuffer(time_slice.duration) filter_limit = getattr(event_filter, 'limit', None) forward_entries = 0 self._events_status.number_of_filtered_events = 0 self._events_status.number_of_events_from_time_slice = 0 for event in storage_reader.GetSortedEvents(time_range=time_slice_range): event_data_identifier = event.GetEventDataIdentifier() event_data = storage_reader.GetEventDataByIdentifier( event_data_identifier) event_data_stream_identifier = event_data.GetEventDataStreamIdentifier() if event_data_stream_identifier: event_data_stream = storage_reader.GetEventDataStreamByIdentifier( event_data_stream_identifier) else: event_data_stream = None event_identifier = event.GetIdentifier() event_tag = self._event_tag_index.GetEventTagByIdentifier( storage_reader, event_identifier) if time_slice_range and event.timestamp != time_slice.event_timestamp: self._events_status.number_of_events_from_time_slice += 1 if event_filter: filter_match = event_filter.Match( event, event_data, event_data_stream, event_tag) else: filter_match = None # pylint: disable=singleton-comparison if filter_match == False: if not time_slice_buffer: self._events_status.number_of_filtered_events += 1 elif forward_entries == 0: time_slice_buffer.Append((event, event_data)) self._events_status.number_of_filtered_events += 1 elif forward_entries <= time_slice_buffer.size: self._ExportEvent( storage_reader, output_module, event, event_data, event_data_stream, deduplicate_events=deduplicate_events) self._number_of_consumed_events += 1 self._events_status.number_of_events_from_time_slice += 1 forward_entries += 1 else: # We reached the maximum size of the time slice and don't need to # include other entries. self._events_status.number_of_filtered_events += 1 forward_entries = 0 else: # pylint: disable=singleton-comparison if filter_match == True and time_slice_buffer: # Empty the time slice buffer. for event_in_buffer, event_data_in_buffer in ( time_slice_buffer.Flush()): self._ExportEvent( storage_reader, output_module, event_in_buffer, event_data_in_buffer, event_data_stream, deduplicate_events=deduplicate_events) self._number_of_consumed_events += 1 self._events_status.number_of_filtered_events += 1 self._events_status.number_of_events_from_time_slice += 1 forward_entries = 1 self._ExportEvent( storage_reader, output_module, event, event_data, event_data_stream, deduplicate_events=deduplicate_events) self._number_of_consumed_events += 1 # pylint: disable=singleton-comparison if (filter_match == True and filter_limit and filter_limit == self._number_of_consumed_events): break self._FlushExportBuffer(storage_reader, output_module)
def ProcessStorage(self, output_module, storage_file, analysis_plugins, event_queue_producers, deduplicate_events=True, preferred_encoding=u'utf-8', time_slice=None, use_time_slicer=False): """Processes a plaso storage file. Args: output_module: an output module (instance of OutputModule). storage_file: the storage file object (instance of StorageFile). analysis_plugins: list of analysis plugin objects (instance of AnalysisPlugin). event_queue_producers: list of event queue producer objects (instance of ItemQueueProducer). deduplicate_events: optional boolean value to indicate if the event objects should be deduplicated. The default is True. preferred_encoding: optional preferred encoding. The default is "utf-8". time_slice: optional time slice object (instance of TimeSlice). The default is None. use_time_slicer: optional boolean value to indicate the 'time slicer' should be used. The default is False. The 'time slicer' will provide a context of events around an event of interest. Returns: A counter (an instance of counter.Counter) that contains the analysis plugin results or None. Raises: RuntimeError: if a non-recoverable situation is encountered. """ if time_slice: if time_slice.event_timestamp: pfilter.TimeRangeCache.SetLowerTimestamp( time_slice.start_timestamp) pfilter.TimeRangeCache.SetUpperTimestamp( time_slice.end_timestamp) elif use_time_slicer: self._filter_buffer = bufferlib.CircularBuffer( time_slice.duration) with storage_file: storage_file.SetStoreLimit(self._filter_object) # TODO: allow for single processing. # TODO: add upper queue limit. analysis_output_queue = multi_process.MultiProcessingQueue( timeout=5) if analysis_plugins: logging.info(u'Starting analysis plugins.') # Within all preprocessing objects, try to get the last one that has # time zone information stored in it, the highest chance of it # containing the information we are seeking (defaulting to the last # one). pre_objs = storage_file.GetStorageInformation() pre_obj = pre_objs[-1] for obj in pre_objs: if getattr(obj, u'time_zone_str', u''): pre_obj = obj # Fill in the collection information. pre_obj.collection_information = {} if preferred_encoding: cmd_line = u' '.join(sys.argv) try: pre_obj.collection_information[ u'cmd_line'] = cmd_line.decode(preferred_encoding) except UnicodeDecodeError: pass pre_obj.collection_information[u'file_processed'] = ( self._storage_file) pre_obj.collection_information[ u'method'] = u'Running Analysis Plugins' analysis_plugin_names = [ plugin.NAME for plugin in analysis_plugins ] pre_obj.collection_information[ u'plugins'] = analysis_plugin_names time_of_run = timelib.Timestamp.GetNow() pre_obj.collection_information[u'time_of_run'] = time_of_run pre_obj.counter = collections.Counter() # Assign the preprocessing object to the storage. # This is normally done in the construction of the storage object, # however we cannot do that here since the preprocessing object is # stored inside the storage file, so we need to open it first to # be able to read it in, before we make changes to it. Thus we need # to access this protected member of the class. # pylint: disable=protected-access storage_file._pre_obj = pre_obj knowledge_base_object = knowledge_base.KnowledgeBase( pre_obj=pre_obj) # Now we need to start all the plugins. for analysis_plugin in analysis_plugins: analysis_report_queue_producer = queue.ItemQueueProducer( analysis_output_queue) completion_event = multiprocessing.Event() analysis_mediator_object = analysis_mediator.AnalysisMediator( analysis_report_queue_producer, knowledge_base_object, data_location=self._data_location, completion_event=completion_event) analysis_process = multiprocessing.Process( name=u'Analysis {0:s}'.format( analysis_plugin.plugin_name), target=analysis_plugin.RunPlugin, args=(analysis_mediator_object, )) process_info = PsortAnalysisProcess( completion_event, analysis_plugin, analysis_process) self._analysis_process_info.append(process_info) analysis_process.start() logging.info(u'Plugin: [{0:s}] started.'.format( analysis_plugin.plugin_name)) else: event_queue_producers = [] output_buffer = output_interface.EventBuffer( output_module, deduplicate_events) with output_buffer: counter = self.ProcessOutput( storage_file, output_buffer, my_filter=self._filter_object, filter_buffer=self._filter_buffer, analysis_queues=event_queue_producers) for information in storage_file.GetStorageInformation(): if hasattr(information, u'counter'): counter[u'Stored Events'] += information.counter[u'total'] if not self._quiet_mode: logging.info(u'Output processing is done.') # Get all reports and tags from analysis plugins. self._ProcessAnalysisPlugins(analysis_plugins, analysis_output_queue, storage_file, counter, preferred_encoding=preferred_encoding) if self._output_file_object: self._output_file_object.close() self._output_file_object = None if self._filter_object and not counter[u'Limited By']: counter[u'Filter By Date'] = (counter[u'Stored Events'] - counter[u'Events Included'] - counter[u'Events Filtered Out']) return counter
def _ExportEvents(self, storage_reader, output_module, deduplicate_events=True, event_filter=None, time_slice=None, use_time_slicer=False): """Exports events using an output module. Args: storage_reader (StorageReader): storage reader. output_module (OutputModule): output module. deduplicate_events (Optional[bool]): True if events should be deduplicated. event_filter (Optional[FilterObject]): event filter. time_slice (Optional[TimeRange]): time range that defines a time slice to filter events. use_time_slicer (Optional[bool]): True if the 'time slicer' should be used. The 'time slicer' will provide a context of events around an event of interest. Returns: collections.Counter: counter that tracks the number of unique events read from storage. """ self._status = definitions.PROCESSING_STATUS_EXPORTING time_slice_buffer = None time_slice_range = None if time_slice: if time_slice.event_timestamp is not None: time_slice_range = storage_time_range.TimeRange( time_slice.start_timestamp, time_slice.end_timestamp) if use_time_slicer: time_slice_buffer = bufferlib.CircularBuffer( time_slice.duration) filter_limit = getattr(event_filter, 'limit', None) forward_entries = 0 number_of_filtered_events = 0 number_of_events_from_time_slice = 0 for event in storage_reader.GetSortedEvents( time_range=time_slice_range): event_data_identifier = event.GetEventDataIdentifier() if event_data_identifier: event_data = storage_reader.GetEventDataByIdentifier( event_data_identifier) if event_data: for attribute_name, attribute_value in event_data.GetAttributes( ): setattr(event, attribute_name, attribute_value) event_identifier = event.GetIdentifier() event.tag = self._event_tag_index.GetEventTagByIdentifier( storage_reader, event_identifier) if time_slice_range and event.timestamp != time_slice.event_timestamp: number_of_events_from_time_slice += 1 if event_filter: filter_match = event_filter.Match(event) else: filter_match = None # pylint: disable=singleton-comparison if filter_match == False: if not time_slice_buffer: number_of_filtered_events += 1 elif forward_entries == 0: time_slice_buffer.Append(event) number_of_filtered_events += 1 elif forward_entries <= time_slice_buffer.size: self._ExportEvent(output_module, event, deduplicate_events=deduplicate_events) self._number_of_consumed_events += 1 number_of_events_from_time_slice += 1 forward_entries += 1 else: # We reached the maximum size of the time slice and don't need to # include other entries. number_of_filtered_events += 1 forward_entries = 0 else: # pylint: disable=singleton-comparison if filter_match == True and time_slice_buffer: # Empty the time slice buffer. for event_in_buffer in time_slice_buffer.Flush(): self._ExportEvent( output_module, event_in_buffer, deduplicate_events=deduplicate_events) self._number_of_consumed_events += 1 number_of_filtered_events += 1 number_of_events_from_time_slice += 1 forward_entries = 1 self._ExportEvent(output_module, event, deduplicate_events=deduplicate_events) self._number_of_consumed_events += 1 # pylint: disable=singleton-comparison if (filter_match == True and filter_limit and filter_limit == self._number_of_consumed_events): break self._FlushExportBuffer(output_module) events_counter = collections.Counter() events_counter['Events filtered'] = number_of_filtered_events events_counter[ 'Events from time slice'] = number_of_events_from_time_slice events_counter['Events processed'] = self._number_of_consumed_events if self._number_of_duplicate_events: events_counter['Duplicate events removed'] = ( self._number_of_duplicate_events) if self._number_of_macb_grouped_events: events_counter['Events MACB grouped'] = ( self._number_of_macb_grouped_events) if filter_limit: events_counter['Limited By'] = filter_limit return events_counter
def ProcessStorage(self, output_module, storage_file, storage_file_path, analysis_plugins, event_queue_producers, command_line_arguments=None, deduplicate_events=True, preferred_encoding=u'utf-8', time_slice=None, use_time_slicer=False): """Processes a plaso storage file. Args: output_module: an output module (instance of OutputModule). storage_file: the storage file object (instance of StorageFile). storage_file_path: string containing the path of the storage file. analysis_plugins: list of analysis plugin objects (instance of AnalysisPlugin). event_queue_producers: list of event queue producer objects (instance of ItemQueueProducer). command_line_arguments: optional string of the command line arguments or None if not set. deduplicate_events: optional boolean value to indicate if the event objects should be deduplicated. preferred_encoding: optional preferred encoding. time_slice: optional time slice object (instance of TimeSlice). use_time_slicer: optional boolean value to indicate the 'time slicer' should be used. The 'time slicer' will provide a context of events around an event of interest. Returns: A counter (an instance of collections.Counter) that tracks the number of events extracted from storage, and the analysis plugin results. Raises: RuntimeError: if a non-recoverable situation is encountered. """ time_slice = None if time_slice: if time_slice.event_timestamp is not None: time_slice = storage_time_range.TimeRange( time_slice.start_timestamp, time_slice.end_timestamp) elif use_time_slicer: self._filter_buffer = bufferlib.CircularBuffer( time_slice.duration) with storage_file: # TODO: allow for single processing. # TODO: add upper queue limit. analysis_queue_port = None if self._use_zeromq: analysis_report_incoming_queue = zeromq_queue.ZeroMQPullBindQueue( delay_open=False, port=None, linger_seconds=5) analysis_queue_port = analysis_report_incoming_queue.port else: analysis_report_incoming_queue = multi_process.MultiProcessingQueue( timeout=5) pre_obj = self._GetLastGoodPreprocess(storage_file) if pre_obj is None: pre_obj = event.PreprocessObject() if analysis_plugins: self._StartAnalysisPlugins( storage_file_path, analysis_plugins, pre_obj, analysis_queue_port=analysis_queue_port, analysis_report_incoming_queue= analysis_report_incoming_queue, command_line_arguments=command_line_arguments) # Assign the preprocessing object to the storage. # This is normally done in the construction of the storage object, # however we cannot do that here since the preprocessing object is # stored inside the storage file, so we need to open it first to # be able to read it in, before we make changes to it. Thus we need # to access this protected member of the class. # pylint: disable=protected-access storage_file._pre_obj = pre_obj else: event_queue_producers = [] output_buffer = output_event_buffer.EventBuffer( output_module, deduplicate_events) with output_buffer: counter = self.ProcessEventsFromStorage( storage_file, output_buffer, analysis_queues=event_queue_producers, filter_buffer=self._filter_buffer, my_filter=self._filter_object, time_slice=time_slice) for information in storage_file.GetStorageInformation(): if hasattr(information, u'counter'): counter[u'Stored Events'] += information.counter[u'total'] if not self._quiet_mode: logging.info(u'Output processing is done.') # Get all reports and tags from analysis plugins. self._ProcessAnalysisPlugins(analysis_plugins, analysis_report_incoming_queue, storage_file, counter, preferred_encoding=preferred_encoding) if self._filter_object and not counter[u'Limited By']: counter[u'Filter By Date'] = (counter[u'Stored Events'] - counter[u'Events Included'] - counter[u'Events Filtered Out']) return counter
def _ExportEvents( self, storage_reader, event_buffer, event_filter=None, time_slice=None, use_time_slicer=False): """Exports events using an output module. Args: storage_reader (StorageReader): storage reader. event_buffer (EventBuffer): event buffer. event_filter (Optional[FilterObject]): event filter. time_slice (Optional[TimeRange]): time range that defines a time slice to filter events. use_time_slicer (Optional[bool]): True if the 'time slicer' should be used. The 'time slicer' will provide a context of events around an event of interest. Returns: collections.Counter: counter that tracks the number of unique events read from storage. """ self._status = definitions.PROCESSING_STATUS_EXPORTING time_slice_buffer = None if time_slice: if time_slice.event_timestamp is not None: time_slice = storage_time_range.TimeRange( time_slice.start_timestamp, time_slice.end_timestamp) if use_time_slicer: time_slice_buffer = bufferlib.CircularBuffer(time_slice.duration) filter_limit = getattr(event_filter, u'limit', None) forward_entries = 0 number_of_filtered_events = 0 number_of_events_from_time_slice = 0 for event in storage_reader.GetEvents(time_range=time_slice): if event_filter: filter_match = event_filter.Match(event) else: filter_match = None # pylint: disable=singleton-comparison if filter_match == False: if not time_slice_buffer: number_of_filtered_events += 1 elif forward_entries == 0: time_slice_buffer.Append(event) number_of_filtered_events += 1 elif forward_entries <= time_slice_buffer.size: event_buffer.Append(event) self._number_of_consumed_events += 1 number_of_events_from_time_slice += 1 forward_entries += 1 else: # We reached the maximum size of the time slice and don't need to # include other entries. number_of_filtered_events += 1 forward_entries = 0 else: # pylint: disable=singleton-comparison if filter_match == True and time_slice_buffer: # Empty the time slice buffer. for event_in_buffer in time_slice_buffer.Flush(): event_buffer.Append(event_in_buffer) self._number_of_consumed_events += 1 number_of_filtered_events += 1 number_of_events_from_time_slice += 1 forward_entries = 1 event_buffer.Append(event) self._number_of_consumed_events += 1 # pylint: disable=singleton-comparison if (filter_match == True and filter_limit and filter_limit == self._number_of_consumed_events): break events_counter = collections.Counter() events_counter[u'Events filtered'] = number_of_filtered_events events_counter[u'Events from time slice'] = number_of_events_from_time_slice events_counter[u'Events processed'] = self._number_of_consumed_events if event_buffer.duplicate_counter: events_counter[u'Duplicate events removed'] = ( event_buffer.duplicate_counter) if filter_limit: events_counter[u'Limited By'] = filter_limit return events_counter