def __init__(self): """Initializes a psort multi-processing engine.""" super(PsortMultiProcessEngine, self).__init__() self._analysis_plugins = {} self._completed_analysis_processes = set() self._data_location = None self._event_filter_expression = None self._event_queues = {} self._event_tag_index = event_tag_index.EventTagIndex() self._events_status = processing_status.EventsStatus() # The export event heap is used to make sure the events are sorted in # a deterministic way. self._export_event_heap = PsortEventHeap() self._export_event_timestamp = 0 self._knowledge_base = None self._memory_profiler = None self._merge_task = None self._number_of_consumed_event_tags = 0 self._number_of_consumed_events = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_consumed_warnings = 0 self._number_of_produced_event_tags = 0 self._number_of_produced_events = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 self._number_of_produced_warnings = 0 self._processing_configuration = None self._processing_profiler = None self._serializers_profiler = None self._status = definitions.STATUS_INDICATOR_IDLE self._status_update_callback = None self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT
def __init__(self): """Initializes an output and formatting multi-processing engine.""" super(OutputAndFormattingMultiProcessEngine, self).__init__() # The export event heap is used to make sure the events are sorted in # a deterministic way. self._event_tag_index = event_tag_index.EventTagIndex() self._events_status = processing_status.EventsStatus() self._export_event_heap = PsortEventHeap() self._export_event_timestamp = 0 self._knowledge_base = None self._number_of_consumed_events = 0 self._processing_configuration = None self._status = definitions.STATUS_INDICATOR_IDLE self._status_update_callback = None
def __init__(self, worker_memory_limit=None, worker_timeout=None): """Initializes a psort multi-processing engine. Args: worker_memory_limit (Optional[int]): maximum amount of memory a worker is allowed to consume, where None represents the default memory limit and 0 represents no limit. worker_timeout (Optional[float]): number of minutes before a worker process that is not providing status updates is considered inactive, where None or 0.0 represents the default timeout. """ if worker_memory_limit is None: worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT if not worker_timeout: worker_timeout = definitions.DEFAULT_WORKER_TIMEOUT super(PsortMultiProcessEngine, self).__init__() self._analysis_plugins = {} self._completed_analysis_processes = set() self._data_location = None self._event_filter_expression = None self._event_queues = {} self._event_tag_index = event_tag_index.EventTagIndex() self._events_status = processing_status.EventsStatus() # The export event heap is used to make sure the events are sorted in # a deterministic way. self._export_event_heap = PsortEventHeap() self._export_event_timestamp = 0 self._knowledge_base = None self._memory_profiler = None self._merge_task = None self._number_of_consumed_event_tags = 0 self._number_of_consumed_events = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_consumed_warnings = 0 self._number_of_produced_event_tags = 0 self._number_of_produced_events = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 self._number_of_produced_warnings = 0 self._processing_configuration = None self._processing_profiler = None self._serializers_profiler = None self._status = definitions.STATUS_INDICATOR_IDLE self._status_update_callback = None self._worker_memory_limit = worker_memory_limit self._worker_timeout = worker_timeout
def __init__(self, use_zeromq=True): """Initializes an engine object. Args: use_zeromq (Optional[bool]): True if ZeroMQ should be used for queuing instead of Python's multiprocessing queue. """ super(PsortMultiProcessEngine, self).__init__() self._analysis_plugins = {} self._completed_analysis_processes = set() self._data_location = None self._event_filter_expression = None self._event_queues = {} self._event_tag_index = event_tag_index.EventTagIndex() self._events_status = processing_status.EventsStatus() # The export event heap is used to make sure the events are sorted in # a deterministic way. self._export_event_heap = PsortEventHeap() self._export_event_timestamp = 0 self._guppy_memory_profiler = None self._knowledge_base = None self._memory_profiler = None self._merge_task = None self._number_of_consumed_event_tags = 0 self._number_of_consumed_events = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_consumed_warnings = 0 self._number_of_duplicate_events = 0 self._number_of_macb_grouped_events = 0 self._number_of_produced_event_tags = 0 self._number_of_produced_events = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 self._number_of_produced_warnings = 0 self._processing_configuration = None self._processing_profiler = None self._serializers_profiler = None self._status = definitions.PROCESSING_STATUS_IDLE self._status_update_callback = None self._use_zeromq = use_zeromq self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT
def ExportEvents( self, knowledge_base_object, storage_reader, output_module, processing_configuration, deduplicate_events=True, event_filter=None, status_update_callback=None, time_slice=None, use_time_slicer=False): """Exports events using an output module. Args: knowledge_base_object (KnowledgeBase): contains information from the source data needed for processing. storage_reader (StorageReader): storage reader. output_module (OutputModule): output module. processing_configuration (ProcessingConfiguration): processing configuration. deduplicate_events (Optional[bool]): True if events should be deduplicated. event_filter (Optional[EventObjectFilter]): event filter. status_update_callback (Optional[function]): callback function for status updates. time_slice (Optional[TimeSlice]): slice of time to output. use_time_slicer (Optional[bool]): True if the 'time slicer' should be used. The 'time slicer' will provide a context of events around an event of interest. """ self._events_status = processing_status.EventsStatus() self._knowledge_base = knowledge_base_object self._processing_configuration = processing_configuration self._status_update_callback = status_update_callback total_number_of_events = 0 for session in storage_reader.GetSessions(): total_number_of_events += session.parsers_counter['total'] self._events_status.total_number_of_events = total_number_of_events output_module.WriteHeader() self._StartStatusUpdateThread() self._StartProfiling(self._processing_configuration.profiling) try: self._ExportEvents( storage_reader, output_module, deduplicate_events=deduplicate_events, event_filter=event_filter, time_slice=time_slice, use_time_slicer=use_time_slicer) finally: # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() output_module.WriteFooter() self._StopProfiling() self._UpdateForemanProcessStatus() if self._status_update_callback: self._status_update_callback(self._processing_status) # Reset values. self._status_update_callback = None self._processing_configuration = None self._knowledge_base = None self._events_status = None
def AnalyzeEvents( self, knowledge_base_object, storage_writer, data_location, analysis_plugins, processing_configuration, event_filter=None, event_filter_expression=None, status_update_callback=None): """Analyzes events in a plaso storage. Args: knowledge_base_object (KnowledgeBase): contains information from the source data needed for processing. storage_writer (StorageWriter): storage writer. data_location (str): path to the location that data files should be loaded from. analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that should be run and their names. processing_configuration (ProcessingConfiguration): processing configuration. event_filter (Optional[EventObjectFilter]): event filter. event_filter_expression (Optional[str]): event filter expression. status_update_callback (Optional[function]): callback function for status updates. Raises: KeyboardInterrupt: if a keyboard interrupt was raised. """ if not analysis_plugins: return keyboard_interrupt = False self._analysis_plugins = {} self._data_location = data_location self._event_filter_expression = event_filter_expression self._events_status = processing_status.EventsStatus() self._knowledge_base = knowledge_base_object self._status_update_callback = status_update_callback self._processing_configuration = processing_configuration self._StartProfiling(self._processing_configuration.profiling) # Set up the storage writer before the analysis processes. storage_writer.StartTaskStorage() self._StartAnalysisProcesses(storage_writer, analysis_plugins) # Start the status update thread after open of the storage writer # so we don't have to clean up the thread if the open fails. self._StartStatusUpdateThread() try: # Open the storage file after creating the worker processes otherwise # the session store will remain locked as long as the worker processes # are alive. storage_writer.Open() storage_writer.WriteSessionStart() try: storage_writer.WriteSessionConfiguration() self._AnalyzeEvents( storage_writer, analysis_plugins, event_filter=event_filter) self._status = definitions.STATUS_INDICATOR_FINALIZING except KeyboardInterrupt: keyboard_interrupt = True self._abort = True self._processing_status.aborted = True if self._status_update_callback: self._status_update_callback(self._processing_status) finally: storage_writer.WriteSessionCompletion(aborted=self._abort) storage_writer.Close() finally: # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() try: self._StopAnalysisProcesses(abort=self._abort) except KeyboardInterrupt: keyboard_interrupt = True self._AbortKill() # The abort can leave the main process unresponsive # due to incorrectly finalized IPC. self._KillProcess(os.getpid()) self._StopProfiling() # Reset values. self._analysis_plugins = {} self._data_location = None self._event_filter_expression = None self._knowledge_base = None self._processing_configuration = None self._status_update_callback = None if keyboard_interrupt: raise KeyboardInterrupt if keyboard_interrupt: raise KeyboardInterrupt
def AnalyzeEvents(self, session, knowledge_base_object, storage_writer, data_location, analysis_plugins, processing_configuration, event_filter=None, event_filter_expression=None, status_update_callback=None, storage_file_path=None): """Analyzes events in a Plaso storage. Args: session (Session): session in which the events are analyzed. knowledge_base_object (KnowledgeBase): contains information from the source data needed for processing. storage_writer (StorageWriter): storage writer. data_location (str): path to the location that data files should be loaded from. analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that should be run and their names. processing_configuration (ProcessingConfiguration): processing configuration. event_filter (Optional[EventObjectFilter]): event filter. event_filter_expression (Optional[str]): event filter expression. status_update_callback (Optional[function]): callback function for status updates. storage_file_path (Optional[str]): path to the session storage file. Returns: ProcessingStatus: processing status. Raises: KeyboardInterrupt: if a keyboard interrupt was raised. ValueError: if analysis plugins are missing. """ if not analysis_plugins: raise ValueError('Missing analysis plugins') abort_kill = False keyboard_interrupt = False queue_full = False self._analysis_plugins = {} self._data_location = data_location self._event_filter_expression = event_filter_expression self._events_status = processing_status.EventsStatus() self._knowledge_base = knowledge_base_object self._processing_configuration = processing_configuration self._session = session self._status_update_callback = status_update_callback self._storage_file_path = storage_file_path stored_event_labels_counter = {} if storage_writer.HasAttributeContainers('event_label_count'): stored_event_labels_counter = { event_label_count.label: event_label_count for event_label_count in storage_writer.GetAttributeContainers( 'event_label_count') } self._event_labels_counter = collections.Counter() if storage_writer.HasAttributeContainers('parser_count'): parsers_counter = { parser_count.name: parser_count.number_of_events for parser_count in storage_writer.GetAttributeContainers( 'parser_count') } total_number_of_events = parsers_counter['total'] else: total_number_of_events = 0 for stored_session in storage_writer.GetSessions(): total_number_of_events += stored_session.parsers_counter[ 'total'] self._events_status.total_number_of_events = total_number_of_events # Set up the storage writer before the analysis processes. self._StartTaskStorage(definitions.STORAGE_FORMAT_SQLITE) self._StartAnalysisProcesses(analysis_plugins) self._StartProfiling(self._processing_configuration.profiling) # Start the status update thread after open of the storage writer # so we don't have to clean up the thread if the open fails. self._StartStatusUpdateThread() try: self._AnalyzeEvents(storage_writer, analysis_plugins, event_filter=event_filter) for key, value in self._event_labels_counter.items(): event_label_count = stored_event_labels_counter.get(key, None) if event_label_count: event_label_count.number_of_events += value storage_writer.UpdateAttributeContainer(event_label_count) else: event_label_count = counts.EventLabelCount( label=key, number_of_events=value) storage_writer.AddAttributeContainer(event_label_count) self._status = definitions.STATUS_INDICATOR_FINALIZING except errors.QueueFull: queue_full = True self._abort = True except KeyboardInterrupt: keyboard_interrupt = True self._abort = True finally: self._processing_status.aborted = self._abort session.aborted = self._abort # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() self._StopProfiling() # Update the status view one last time before the analysis processses are # stopped. self._UpdateStatus() if queue_full: # TODO: handle abort on queue full more elegant. abort_kill = True else: try: self._StopAnalysisProcesses(abort=self._abort) except KeyboardInterrupt: keyboard_interrupt = True abort_kill = True if abort_kill: self._AbortKill() # The abort can leave the main process unresponsive # due to incorrectly finalized IPC. self._KillProcess(os.getpid()) try: self._StopTaskStorage(definitions.STORAGE_FORMAT_SQLITE, session.identifier, abort=self._abort) except (IOError, OSError) as exception: logger.error( 'Unable to stop task storage with error: {0!s}'.format( exception)) if self._abort: logger.debug('Analysis aborted.') else: logger.debug('Analysis completed.') # Update the status view one last time. self._UpdateStatus() # Reset values. self._analysis_plugins = {} self._data_location = None self._event_filter_expression = None self._knowledge_base = None self._processing_configuration = None self._session = None self._status_update_callback = None self._storage_file_path = None if keyboard_interrupt: raise KeyboardInterrupt return self._processing_status