def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec): """Processes a path specification. Args: extraction_worker (worker.ExtractionWorker): extraction worker. parser_mediator (ParserMediator): parser mediator. path_spec (dfvfs.PathSpec): path specification. """ self._current_display_name = parser_mediator.GetDisplayNameForPathSpec( path_spec) try: extraction_worker.ProcessPathSpec(parser_mediator, path_spec) except dfvfs_errors.CacheFullError: # TODO: signal engine of failure. self._abort = True logger.error(( 'ABORT: detected cache full error while processing path spec: ' '{0:s}').format(self._current_display_name)) except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionWarning(( 'unable to process path specification with error: ' '{0!s}').format(exception), path_spec=path_spec) if self._processing_configuration.debug_output: logger.warning(( 'Unhandled exception while processing path specification: ' '{0:s}.').format(self._current_display_name)) logger.exception(exception)
def _StartWorkerProcess(self, process_name, storage_writer): """Creates, starts, monitors and registers a worker process. Args: process_name (str): process name. storage_writer (StorageWriter): storage writer for a session storage used to create task storage. Returns: MultiProcessWorkerProcess: extraction worker process or None if the process could not be started. """ process_name = 'Worker_{0:02d}'.format(self._last_worker_number) logger.debug('Starting worker process {0:s}'.format(process_name)) if self._use_zeromq: queue_name = '{0:s} task queue'.format(process_name) task_queue = zeromq_queue.ZeroMQRequestConnectQueue( delay_open=True, linger_seconds=0, name=queue_name, port=self._task_queue_port, timeout_seconds=self._TASK_QUEUE_TIMEOUT_SECONDS) else: task_queue = self._task_queue process = worker_process.WorkerProcess( task_queue, storage_writer, self.knowledge_base, self._session_identifier, self._processing_configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, name=process_name) # Remove all possible log handlers to prevent a child process from logging # to the main process log file and garbling the log. The log handlers are # recreated after the worker process has been started. for handler in logging.root.handlers: logging.root.removeHandler(handler) handler.close() process.start() loggers.ConfigureLogging( debug_output=self._debug_output, filename=self._log_filename, mode='a', quiet_mode=self._quiet_mode) try: self._StartMonitoringProcess(process) except (IOError, KeyError) as exception: pid = process.pid logger.error(( 'Unable to monitor replacement worker process: {0:s} ' '(PID: {1:d}) with error: {2!s}').format( process_name, pid, exception)) self._TerminateProcess(process) return None self._RegisterProcess(process) self._last_worker_number += 1 return process
def _StartWorkerProcess(self, process_name, storage_writer): """Creates, starts, monitors and registers a worker process. Args: process_name (str): process name. storage_writer (StorageWriter): storage writer for a session storage used to create task storage. Returns: MultiProcessWorkerProcess: extraction worker process or None on error. """ analysis_plugin = self._analysis_plugins.get(process_name, None) if not analysis_plugin: logger.error('Missing analysis plugin: {0:s}'.format(process_name)) return None queue_name = '{0:s} output event queue'.format(process_name) output_event_queue = zeromq_queue.ZeroMQPushBindQueue( name=queue_name, timeout_seconds=self._QUEUE_TIMEOUT) # Open the queue so it can bind to a random port, and we can get the # port number to use in the input queue. output_event_queue.Open() self._event_queues[process_name] = output_event_queue queue_name = '{0:s} input event queue'.format(process_name) input_event_queue = zeromq_queue.ZeroMQPullConnectQueue( name=queue_name, delay_open=True, port=output_event_queue.port, timeout_seconds=self._QUEUE_TIMEOUT) process = analysis_process.AnalysisProcess( input_event_queue, storage_writer, self._knowledge_base, analysis_plugin, self._processing_configuration, data_location=self._data_location, event_filter_expression=self._event_filter_expression, name=process_name) process.start() logger.info('Started analysis plugin: {0:s} (PID: {1:d}).'.format( process_name, process.pid)) try: self._StartMonitoringProcess(process) except (IOError, KeyError) as exception: logger.error( ('Unable to monitor analysis plugin: {0:s} (PID: {1:d}) ' 'with error: {2!s}').format(process_name, process.pid, exception)) process.terminate() return None self._RegisterProcess(process) return process
def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec): """Processes a path specification. Args: extraction_worker (worker.ExtractionWorker): extraction worker. parser_mediator (ParserMediator): parser mediator. path_spec (dfvfs.PathSpec): path specification. """ self._current_display_name = parser_mediator.GetDisplayNameForPathSpec( path_spec) try: extraction_worker.ProcessPathSpec(parser_mediator, path_spec) except dfvfs_errors.CacheFullError: # TODO: signal engine of failure. self._abort = True logger.error(( 'ABORT: detected cache full error while processing path spec: ' '{0:s}').format(self._current_display_name)) except Exception as exception: # pylint: disable=broad-except parser_mediator.ProduceExtractionError(( 'unable to process path specification with error: ' '{0!s}').format(exception), path_spec=path_spec) if self._processing_configuration.debug_output: logger.warning(( 'Unhandled exception while processing path specification: ' '{0:s}.').format(self._current_display_name)) logger.exception(exception)
def _KillProcess(self, pid): """Issues a SIGKILL or equivalent to the process. Args: pid (int): process identifier (PID). """ if sys.platform.startswith('win'): process_terminate = 1 handle = ctypes.windll.kernel32.OpenProcess( process_terminate, False, pid) ctypes.windll.kernel32.TerminateProcess(handle, -1) ctypes.windll.kernel32.CloseHandle(handle) else: try: os.kill(pid, signal.SIGKILL) except OSError as exception: logger.error('Unable to kill process {0:d} with error: {1!s}'.format( pid, exception))
def _StartAnalysisProcesses(self, storage_writer, analysis_plugins): """Starts the analysis processes. Args: storage_writer (StorageWriter): storage writer. analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that should be run and their names. """ logger.info('Starting analysis plugins.') for analysis_plugin in analysis_plugins.values(): self._analysis_plugins[analysis_plugin.NAME] = analysis_plugin process = self._StartWorkerProcess(analysis_plugin.NAME, storage_writer) if not process: logger.error('Unable to create analysis process: {0:s}'.format( analysis_plugin.NAME)) logger.info('Analysis plugins running')
def _StartProcessStatusRPCServer(self): """Starts the process status RPC server.""" if self._rpc_server: return self._rpc_server = plaso_xmlrpc.XMLProcessStatusRPCServer( self._GetStatus) hostname = 'localhost' # Try the PID as port number first otherwise pick something random # between 1024 and 60000. if self._pid < 1024 or self._pid > 60000: port = random.randint(1024, 60000) else: port = self._pid if not self._rpc_server.Start(hostname, port): port = 0 for _ in range(self._NUMBER_OF_RPC_SERVER_START_ATTEMPTS): port = random.randint(1024, 60000) if self._rpc_server.Start(hostname, port): break port = 0 if not port: logger.error( ('Unable to start a process status RPC server for {0!s} ' '(PID: {1:d})').format(self._name, self._pid)) self._rpc_server = None return self.rpc_port.value = port logger.debug('Process: {0!s} process status RPC server started'.format( self._name))
def _StartProcessStatusRPCServer(self): """Starts the process status RPC server.""" if self._rpc_server: return self._rpc_server = plaso_xmlrpc.XMLProcessStatusRPCServer(self._GetStatus) hostname = 'localhost' # Try the PID as port number first otherwise pick something random # between 1024 and 60000. if self._pid < 1024 or self._pid > 60000: port = random.randint(1024, 60000) else: port = self._pid if not self._rpc_server.Start(hostname, port): port = 0 for _ in range(self._NUMBER_OF_RPC_SERVER_START_ATTEMPTS): port = random.randint(1024, 60000) if self._rpc_server.Start(hostname, port): break port = 0 if not port: logger.error(( 'Unable to start a process status RPC server for {0!s} ' '(PID: {1:d})').format(self._name, self._pid)) self._rpc_server = None return self.rpc_port.value = port logger.debug( 'Process: {0!s} process status RPC server started'.format(self._name))
def ProcessSources(self, session_identifier, source_path_specs, storage_writer, processing_configuration, enable_sigsegv_handler=False, number_of_worker_processes=0, status_update_callback=None, worker_memory_limit=None): """Processes the sources and extract events. Args: session_identifier (str): identifier of the session. source_path_specs (list[dfvfs.PathSpec]): path specifications of the sources to process. storage_writer (StorageWriter): storage writer for a session storage. processing_configuration (ProcessingConfiguration): processing configuration. enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler should be enabled. number_of_worker_processes (Optional[int]): number of worker processes. status_update_callback (Optional[function]): callback function for status updates. worker_memory_limit (Optional[int]): maximum amount of memory a worker is allowed to consume, where None represents the default memory limit and 0 represents no limit. Returns: ProcessingStatus: processing status. """ if number_of_worker_processes < 1: # One worker for each "available" CPU (minus other processes). # The number here is derived from the fact that the engine starts up: # * A main process. # # If we want to utilize all CPUs on the system we therefore need to start # up workers that amounts to the total number of CPUs - the other # processes. try: cpu_count = multiprocessing.cpu_count() - 1 if cpu_count <= self._WORKER_PROCESSES_MINIMUM: cpu_count = self._WORKER_PROCESSES_MINIMUM elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM: cpu_count = self._WORKER_PROCESSES_MAXIMUM except NotImplementedError: logger.error(( 'Unable to determine number of CPUs defaulting to {0:d} worker ' 'processes.').format(self._WORKER_PROCESSES_MINIMUM)) cpu_count = self._WORKER_PROCESSES_MINIMUM number_of_worker_processes = cpu_count self._enable_sigsegv_handler = enable_sigsegv_handler self._number_of_worker_processes = number_of_worker_processes if worker_memory_limit is None: self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT else: self._worker_memory_limit = worker_memory_limit # Keep track of certain values so we can spawn new extraction workers. self._processing_configuration = processing_configuration self._debug_output = processing_configuration.debug_output self._log_filename = processing_configuration.log_filename self._session_identifier = session_identifier self._status_update_callback = status_update_callback self._storage_writer = storage_writer # Set up the task queue. if not self._use_zeromq: self._task_queue = multi_process_queue.MultiProcessingQueue( maximum_number_of_queued_items=self._maximum_number_of_tasks) else: task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( delay_open=True, linger_seconds=0, maximum_items=1, name='main_task_queue', timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS) self._task_queue = task_outbound_queue # The ZeroMQ backed queue must be started first, so we can save its port. # TODO: raises: attribute-defined-outside-init # self._task_queue.name = 'Task queue' self._task_queue.Open() self._task_queue_port = self._task_queue.port self._StartProfiling(self._processing_configuration.profiling) self._task_manager.StartProfiling( self._processing_configuration.profiling, self._name) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) # Set up the storage writer before the worker processes. storage_writer.StartTaskStorage() for worker_number in range(number_of_worker_processes): # First argument to _StartWorkerProcess is not used. extraction_process = self._StartWorkerProcess('', storage_writer) if not extraction_process: logger.error('Unable to create worker process: {0:d}'.format( worker_number)) self._StartStatusUpdateThread() try: # Open the storage file after creating the worker processes otherwise # the ZIP storage file will remain locked as long as the worker processes # are alive. storage_writer.Open() storage_writer.WriteSessionStart() try: storage_writer.WritePreprocessingInformation( self.knowledge_base) self._ProcessSources(source_path_specs, storage_writer) finally: storage_writer.WriteSessionCompletion(aborted=self._abort) storage_writer.Close() finally: # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) self._task_manager.StopProfiling() self._StopProfiling() try: self._StopExtractionProcesses(abort=self._abort) except KeyboardInterrupt: self._AbortKill() # The abort can leave the main process unresponsive # due to incorrectly finalized IPC. self._KillProcess(os.getpid()) # The task queue should be closed by _StopExtractionProcesses, this # close is a failsafe, primarily due to MultiProcessingQueue's # blocking behavior. self._task_queue.Close(abort=True) if self._processing_status.error_path_specs: task_storage_abort = True else: task_storage_abort = self._abort try: storage_writer.StopTaskStorage(abort=task_storage_abort) except (IOError, OSError) as exception: logger.error( 'Unable to stop task storage with error: {0!s}'.format( exception)) if self._abort: logger.debug('Processing aborted.') self._processing_status.aborted = True else: logger.debug('Processing completed.') # Reset values. self._enable_sigsegv_handler = None self._number_of_worker_processes = None self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT self._processing_configuration = None self._session_identifier = None self._status_update_callback = None self._storage_writer = None return self._processing_status
def _UpdateProcessingStatus(self, pid, process_status, used_memory): """Updates the processing status. Args: pid (int): process identifier (PID) of the worker process. process_status (dict[str, object]): status values received from the worker process. used_memory (int): size of used memory in bytes. Raises: KeyError: if the process is not registered with the engine. """ self._RaiseIfNotRegistered(pid) if not process_status: return process = self._processes_per_pid[pid] processing_status = process_status.get('processing_status', None) self._RaiseIfNotMonitored(pid) display_name = process_status.get('display_name', '') number_of_consumed_event_tags = process_status.get( 'number_of_consumed_event_tags', None) number_of_produced_event_tags = process_status.get( 'number_of_produced_event_tags', None) number_of_consumed_events = process_status.get( 'number_of_consumed_events', None) number_of_produced_events = process_status.get( 'number_of_produced_events', None) number_of_consumed_reports = process_status.get( 'number_of_consumed_reports', None) number_of_produced_reports = process_status.get( 'number_of_produced_reports', None) number_of_consumed_sources = process_status.get( 'number_of_consumed_sources', None) number_of_produced_sources = process_status.get( 'number_of_produced_sources', None) number_of_consumed_warnings = process_status.get( 'number_of_consumed_warnings', None) number_of_produced_warnings = process_status.get( 'number_of_produced_warnings', None) if processing_status != definitions.STATUS_INDICATOR_IDLE: last_activity_timestamp = process_status.get( 'last_activity_timestamp', 0.0) if last_activity_timestamp: last_activity_timestamp += self._PROCESS_WORKER_TIMEOUT current_timestamp = time.time() if current_timestamp > last_activity_timestamp: logger.error(( 'Process {0:s} (PID: {1:d}) has not reported activity within ' 'the timeout period.').format(process.name, pid)) processing_status = definitions.STATUS_INDICATOR_NOT_RESPONDING self._processing_status.UpdateWorkerStatus( process.name, processing_status, pid, used_memory, display_name, number_of_consumed_sources, number_of_produced_sources, number_of_consumed_events, number_of_produced_events, number_of_consumed_event_tags, number_of_produced_event_tags, number_of_consumed_reports, number_of_produced_reports, number_of_consumed_warnings, number_of_produced_warnings) task_identifier = process_status.get('task_identifier', '') if not task_identifier: return try: self._task_manager.UpdateTaskAsProcessingByIdentifier( task_identifier) return except KeyError: logger.debug( 'Worker {0:s} is processing unknown task: {1:s}.'.format( process.name, task_identifier))
def _MergeTaskStorage(self, storage_writer): """Merges a task storage with the session storage. This function checks all task stores that are ready to merge and updates the scheduled tasks. Note that to prevent this function holding up the task scheduling loop only the first available task storage is merged. Args: storage_writer (StorageWriter): storage writer for a session storage used to merge task storage. """ if self._processing_profiler: self._processing_profiler.StartTiming('merge_check') for task_identifier in storage_writer.GetProcessedTaskIdentifiers(): try: task = self._task_manager.GetProcessedTaskByIdentifier( task_identifier) self._task_manager.SampleTaskStatus(task, 'processed') to_merge = self._task_manager.CheckTaskToMerge(task) if not to_merge: storage_writer.RemoveProcessedTaskStorage(task) self._task_manager.RemoveTask(task) self._task_manager.SampleTaskStatus( task, 'removed_processed') else: storage_writer.PrepareMergeTaskStorage(task) self._task_manager.UpdateTaskAsPendingMerge(task) except KeyError: logger.error( 'Unable to retrieve task: {0:s} to prepare it to be merged.' .format(task_identifier)) continue if self._processing_profiler: self._processing_profiler.StopTiming('merge_check') task = None if not self._storage_merge_reader_on_hold: task = self._task_manager.GetTaskPendingMerge(self._merge_task) # Limit the number of attribute containers from a single task-based # storage file that are merged per loop to keep tasks flowing. if task or self._storage_merge_reader: self._status = definitions.STATUS_INDICATOR_MERGING if self._processing_profiler: self._processing_profiler.StartTiming('merge') if task: if self._storage_merge_reader: self._merge_task_on_hold = self._merge_task self._storage_merge_reader_on_hold = self._storage_merge_reader self._task_manager.SampleTaskStatus( self._merge_task_on_hold, 'merge_on_hold') self._merge_task = task try: self._storage_merge_reader = storage_writer.StartMergeTaskStorage( task) self._task_manager.SampleTaskStatus(task, 'merge_started') except IOError as exception: logger.error( ('Unable to merge results of task: {0:s} ' 'with error: {1!s}').format(task.identifier, exception)) self._storage_merge_reader = None if self._storage_merge_reader: fully_merged = self._storage_merge_reader.MergeAttributeContainers( maximum_number_of_containers=self. _MAXIMUM_NUMBER_OF_CONTAINERS) else: # TODO: Do something more sensible when this happens, perhaps # retrying the task once that is implemented. For now, we mark the task # as fully merged because we can't continue with it. fully_merged = True if self._processing_profiler: self._processing_profiler.StopTiming('merge') if fully_merged: try: self._task_manager.CompleteTask(self._merge_task) except KeyError as exception: logger.error( 'Unable to complete task: {0:s} with error: {1!s}'. format(self._merge_task.identifier, exception)) if not self._storage_merge_reader_on_hold: self._merge_task = None self._storage_merge_reader = None else: self._merge_task = self._merge_task_on_hold self._storage_merge_reader = self._storage_merge_reader_on_hold self._merge_task_on_hold = None self._storage_merge_reader_on_hold = None self._task_manager.SampleTaskStatus( self._merge_task, 'merge_resumed') self._status = definitions.STATUS_INDICATOR_RUNNING self._number_of_produced_events = storage_writer.number_of_events self._number_of_produced_sources = storage_writer.number_of_event_sources self._number_of_produced_warnings = storage_writer.number_of_warnings
def _GetEventIdentifiers(self, event): """Retrieves different identifiers of the event. Every event contains event data, which consists of attributes and values. These attributes and values can be represented as a string and used for sorting and uniquely identifying events. This function determines multiple identifiers: * an identifier of the attributes and values without the timestamp description (or usage). This is referred to as the MACB group identifier. * an identifier of the attributes and values including the timestamp description (or usage). This is referred to as the event content identifier. The identifier without the timestamp description can be used to group events that have the same MACB (modification, access, change, birth) timestamps. The PsortEventHeap will store these events individually and relies on PsortMultiProcessEngine to do the actual grouping of events. Args: event (EventObject): event. Returns: tuple: contains: str: identifier of the event MACB group or None if the event cannot be grouped. str: identifier of the event content. """ attributes = [] attribute_string = 'data_type: {0:s}'.format(event.data_type) attributes.append(attribute_string) for attribute_name, attribute_value in sorted(event.GetAttributes()): if attribute_name in self._IDENTIFIER_EXCLUDED_ATTRIBUTES: continue if not attribute_value: continue if attribute_name == 'pathspec': attribute_value = attribute_value.comparable elif isinstance(attribute_value, dict): attribute_value = sorted(attribute_value.items()) elif isinstance(attribute_value, set): attribute_value = sorted(list(attribute_value)) elif isinstance(attribute_value, py2to3.BYTES_TYPE): attribute_value = repr(attribute_value) try: attribute_string = '{0:s}: {1!s}'.format( attribute_name, attribute_value) except UnicodeDecodeError: logger.error( 'Failed to decode attribute {0:s}'.format(attribute_name)) attributes.append(attribute_string) # The 'atime', 'ctime', 'crtime', 'mtime' are included for backwards # compatibility with the filestat parser. if event.timestamp_desc in ('atime', 'ctime', 'crtime', 'mtime', definitions.TIME_DESCRIPTION_LAST_ACCESS, definitions.TIME_DESCRIPTION_CHANGE, definitions.TIME_DESCRIPTION_CREATION, definitions.TIME_DESCRIPTION_MODIFICATION): macb_group_identifier = ', '.join(attributes) else: macb_group_identifier = None attributes.insert(0, event.timestamp_desc) content_identifier = ', '.join(attributes) return macb_group_identifier, content_identifier
def _CheckStatusAnalysisProcess(self, pid): """Checks the status of an analysis process. Args: pid (int): process ID (PID) of a registered analysis process. Raises: KeyError: if the process is not registered with the engine. """ # TODO: Refactor this method, simplify and separate concerns (monitoring # vs management). self._RaiseIfNotRegistered(pid) if pid in self._completed_analysis_processes: status_indicator = definitions.PROCESSING_STATUS_COMPLETED process_status = { 'processing_status': status_indicator} used_memory = 0 else: process = self._processes_per_pid[pid] process_status = self._QueryProcessStatus(process) if process_status is None: process_is_alive = False else: process_is_alive = True process_information = self._process_information_per_pid[pid] used_memory = process_information.GetUsedMemory() or 0 if self._worker_memory_limit and used_memory > self._worker_memory_limit: logger.warning(( 'Process: {0:s} (PID: {1:d}) killed because it exceeded the ' 'memory limit: {2:d}.').format( process.name, pid, self._worker_memory_limit)) self._KillProcess(pid) if isinstance(process_status, dict): self._rpc_errors_per_pid[pid] = 0 status_indicator = process_status.get('processing_status', None) if status_indicator == definitions.PROCESSING_STATUS_COMPLETED: self._completed_analysis_processes.add(pid) else: rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1 self._rpc_errors_per_pid[pid] = rpc_errors if rpc_errors > self._MAXIMUM_RPC_ERRORS: process_is_alive = False if process_is_alive: rpc_port = process.rpc_port.value logger.warning(( 'Unable to retrieve process: {0:s} (PID: {1:d}) status via ' 'RPC socket: http://localhost:{2:d}').format( process.name, pid, rpc_port)) processing_status_string = 'RPC error' status_indicator = definitions.PROCESSING_STATUS_RUNNING else: processing_status_string = 'killed' status_indicator = definitions.PROCESSING_STATUS_KILLED process_status = { 'processing_status': processing_status_string} self._UpdateProcessingStatus(pid, process_status, used_memory) if status_indicator in definitions.PROCESSING_ERROR_STATUS: logger.error(( 'Process {0:s} (PID: {1:d}) is not functioning correctly. ' 'Status code: {2!s}.').format( process.name, pid, status_indicator)) self._TerminateProcessByPid(pid)
def _UpdateProcessingStatus(self, pid, process_status, used_memory): """Updates the processing status. Args: pid (int): process identifier (PID) of the worker process. process_status (dict[str, object]): status values received from the worker process. used_memory (int): size of used memory in bytes. Raises: KeyError: if the process is not registered with the engine. """ self._RaiseIfNotRegistered(pid) if not process_status: return process = self._processes_per_pid[pid] status_indicator = process_status.get('processing_status', None) self._RaiseIfNotMonitored(pid) display_name = process_status.get('display_name', '') number_of_consumed_event_tags = process_status.get( 'number_of_consumed_event_tags', None) number_of_produced_event_tags = process_status.get( 'number_of_produced_event_tags', None) number_of_consumed_events = process_status.get( 'number_of_consumed_events', None) number_of_produced_events = process_status.get( 'number_of_produced_events', None) number_of_consumed_reports = process_status.get( 'number_of_consumed_reports', None) number_of_produced_reports = process_status.get( 'number_of_produced_reports', None) number_of_consumed_sources = process_status.get( 'number_of_consumed_sources', None) number_of_produced_sources = process_status.get( 'number_of_produced_sources', None) number_of_consumed_warnings = process_status.get( 'number_of_consumed_warnings', None) number_of_produced_warnings = process_status.get( 'number_of_produced_warnings', None) if status_indicator != definitions.STATUS_INDICATOR_IDLE: last_activity_timestamp = process_status.get( 'last_activity_timestamp', 0.0) if last_activity_timestamp: last_activity_timestamp += self._worker_timeout current_timestamp = time.time() if current_timestamp > last_activity_timestamp: logger.error(( 'Process {0:s} (PID: {1:d}) has not reported activity within ' 'the timeout period.').format(process.name, pid)) status_indicator = definitions.STATUS_INDICATOR_NOT_RESPONDING self._processing_status.UpdateWorkerStatus( process.name, status_indicator, pid, used_memory, display_name, number_of_consumed_sources, number_of_produced_sources, number_of_consumed_events, number_of_produced_events, number_of_consumed_event_tags, number_of_produced_event_tags, number_of_consumed_reports, number_of_produced_reports, number_of_consumed_warnings, number_of_produced_warnings)
def _GetEventIdentifiers(self, event, event_data, event_data_stream): """Retrieves different identifiers of the event. The event data attributes and values can be represented as a string and used for sorting and uniquely identifying events. This function determines multiple identifiers: * an identifier of the attributes and values without the timestamp description (or usage). This is referred to as the MACB group identifier. * an identifier of the attributes and values including the timestamp description (or usage). This is referred to as the event content identifier. The identifier without the timestamp description can be used to group events that have the same MACB (modification, access, change, birth) timestamps. The PsortEventHeap will store these events individually and relies on PsortMultiProcessEngine to do the actual grouping of events. Args: event (EventObject): event. event_data (EventData): event data. event_data_stream (EventDataStream): event data stream. Returns: tuple: containing: str: identifier of the event MACB group or None if the event cannot be grouped. str: identifier of the event content. """ attributes = [] attribute_string = 'data_type: {0:s}'.format(event_data.data_type) attributes.append(attribute_string) event_attributes = list(event_data.GetAttributes()) if event_data_stream: event_data_stream_attributes = event_data_stream.GetAttributes() event_attributes.extend(event_data_stream_attributes) for attribute_name, attribute_value in sorted(event_attributes): # The filestat parser operates on file entry level and has no event data # stream with a path specification. Therefore we need filename and inode # to make sure events of different file entries are considered unique. if event_data.data_type == 'fs:stat' and attribute_name in ( 'filename', 'inode'): pass elif attribute_name in self._IDENTIFIER_EXCLUDED_ATTRIBUTES: continue if not attribute_value: continue # Note that support for event_data.pathspec is kept for backwards # compatibility. The current value is event_data_stream.path_spec. if attribute_name in ('path_spec', 'pathspec'): attribute_value = attribute_value.comparable elif isinstance(attribute_value, dict): attribute_value = sorted(attribute_value.items()) elif isinstance(attribute_value, set): attribute_value = sorted(list(attribute_value)) elif isinstance(attribute_value, bytes): attribute_value = repr(attribute_value) try: attribute_string = '{0:s}: {1!s}'.format( attribute_name, attribute_value) except UnicodeDecodeError: logger.error('Failed to decode attribute {0:s}'.format( attribute_name)) attributes.append(attribute_string) # The 'atime', 'ctime', 'crtime', 'mtime' are included for backwards # compatibility with the filestat parser. if event.timestamp_desc in ( 'atime', 'ctime', 'crtime', 'mtime', definitions.TIME_DESCRIPTION_LAST_ACCESS, definitions.TIME_DESCRIPTION_CHANGE, definitions.TIME_DESCRIPTION_CREATION, definitions.TIME_DESCRIPTION_MODIFICATION): macb_group_identifier = ', '.join(attributes) else: macb_group_identifier = None timestamp_desc = event.timestamp_desc if timestamp_desc is None: logger.warning('Missing timestamp_desc attribute') timestamp_desc = definitions.TIME_DESCRIPTION_UNKNOWN attributes.insert(0, timestamp_desc) content_identifier = ', '.join(attributes) return macb_group_identifier, content_identifier
def _AnalyzeEvents(self, storage_writer, analysis_plugins, event_filter=None): """Analyzes events in a plaso storage. Args: storage_writer (StorageWriter): storage writer. analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that should be run and their names. event_filter (Optional[EventObjectFilter]): event filter. Returns: collections.Counter: counter containing information about the events processed and filtered. Raises: RuntimeError: if a non-recoverable situation is encountered. """ self._status = definitions.STATUS_INDICATOR_RUNNING self._number_of_consumed_events = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_consumed_warnings = 0 self._number_of_produced_events = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 self._number_of_produced_warnings = 0 number_of_filtered_events = 0 logger.debug('Processing events.') filter_limit = getattr(event_filter, 'limit', None) for event in storage_writer.GetSortedEvents(): event_data_identifier = event.GetEventDataIdentifier() event_data = storage_writer.GetEventDataByIdentifier( event_data_identifier) event_data_stream_identifier = event_data.GetEventDataStreamIdentifier() if event_data_stream_identifier: event_data_stream = storage_writer.GetEventDataStreamByIdentifier( event_data_stream_identifier) else: event_data_stream = None event_identifier = event.GetIdentifier() event_tag = self._event_tag_index.GetEventTagByIdentifier( storage_writer, event_identifier) if event_filter: filter_match = event_filter.Match( event, event_data, event_data_stream, event_tag) else: filter_match = None # pylint: disable=singleton-comparison if filter_match == False: number_of_filtered_events += 1 continue for event_queue in self._event_queues.values(): # TODO: Check for premature exit of analysis plugins. event_queue.PushItem((event, event_data, event_data_stream)) self._number_of_consumed_events += 1 if (event_filter and filter_limit and filter_limit == self._number_of_consumed_events): break logger.debug('Finished pushing events to analysis plugins.') # Signal that we have finished adding events. for event_queue in self._event_queues.values(): event_queue.PushItem(plaso_queue.QueueAbort(), block=False) logger.debug('Processing analysis plugin results.') # TODO: use a task based approach. plugin_names = list(analysis_plugins.keys()) while plugin_names: for plugin_name in list(plugin_names): if self._abort: break # TODO: temporary solution. task = tasks.Task() task.storage_format = definitions.STORAGE_FORMAT_SQLITE task.identifier = plugin_name merge_ready = storage_writer.CheckTaskReadyForMerge(task) if merge_ready: storage_writer.PrepareMergeTaskStorage(task) self._status = definitions.STATUS_INDICATOR_MERGING event_queue = self._event_queues[plugin_name] del self._event_queues[plugin_name] event_queue.Close() storage_merge_reader = storage_writer.StartMergeTaskStorage(task) storage_merge_reader.MergeAttributeContainers( callback=self._MergeEventTag) # TODO: temporary solution. plugin_names.remove(plugin_name) self._status = definitions.STATUS_INDICATOR_RUNNING self._number_of_produced_event_tags = ( storage_writer.number_of_event_tags) self._number_of_produced_reports = ( storage_writer.number_of_analysis_reports) try: storage_writer.StopTaskStorage(abort=self._abort) except (IOError, OSError) as exception: logger.error('Unable to stop task storage with error: {0!s}'.format( exception)) if self._abort: logger.debug('Processing aborted.') else: logger.debug('Processing completed.') events_counter = collections.Counter() events_counter['Events filtered'] = number_of_filtered_events events_counter['Events processed'] = self._number_of_consumed_events return events_counter
def ProcessSources( self, session, source_path_specs, storage_writer, processing_configuration, enable_sigsegv_handler=False, status_update_callback=None): """Processes the sources and extract events. Args: session (Session): session in which the sources are processed. source_path_specs (list[dfvfs.PathSpec]): path specifications of the sources to process. storage_writer (StorageWriter): storage writer for a session storage. processing_configuration (ProcessingConfiguration): processing configuration. enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler should be enabled. status_update_callback (Optional[function]): callback function for status updates. Returns: ProcessingStatus: processing status. """ self._enable_sigsegv_handler = enable_sigsegv_handler # Keep track of certain values so we can spawn new extraction workers. self._processing_configuration = processing_configuration self._debug_output = processing_configuration.debug_output self._log_filename = processing_configuration.log_filename self._session_identifier = session.identifier self._status_update_callback = status_update_callback self._storage_writer = storage_writer # Set up the task queue. task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( delay_open=True, linger_seconds=0, maximum_items=1, name='main_task_queue', timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS) self._task_queue = task_outbound_queue # The ZeroMQ backed queue must be started first, so we can save its port. # TODO: raises: attribute-defined-outside-init # self._task_queue.name = 'Task queue' self._task_queue.Open() self._task_queue_port = self._task_queue.port self._StartProfiling(self._processing_configuration.profiling) self._task_manager.StartProfiling( self._processing_configuration.profiling, self._name) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) # Set up the storage writer before the worker processes. storage_writer.StartTaskStorage() for worker_number in range(self._number_of_worker_processes): # First argument to _StartWorkerProcess is not used. extraction_process = self._StartWorkerProcess('', storage_writer) if not extraction_process: logger.error('Unable to create worker process: {0:d}'.format( worker_number)) self._StartStatusUpdateThread() # TODO: decouple session and storage writer? session.source_configurations = ( self.knowledge_base.GetSourceConfigurationArtifacts()) try: # Open the storage file after creating the worker processes otherwise # the ZIP storage file will remain locked as long as the worker processes # are alive. storage_writer.Open() storage_writer.WriteSessionStart() try: storage_writer.WriteSessionConfiguration() self._ProcessSources(source_path_specs, storage_writer) finally: storage_writer.WriteSessionCompletion(aborted=self._abort) storage_writer.Close() finally: # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) self._task_manager.StopProfiling() self._StopProfiling() try: self._StopExtractionProcesses(abort=self._abort) except KeyboardInterrupt: self._AbortKill() # The abort can leave the main process unresponsive # due to incorrectly finalized IPC. self._KillProcess(os.getpid()) # The task queue should be closed by _StopExtractionProcesses, this # close is a failsafe. self._task_queue.Close(abort=True) if self._processing_status.error_path_specs: task_storage_abort = True else: task_storage_abort = self._abort try: storage_writer.StopTaskStorage(abort=task_storage_abort) except (IOError, OSError) as exception: logger.error('Unable to stop task storage with error: {0!s}'.format( exception)) if self._abort: logger.debug('Processing aborted.') self._processing_status.aborted = True else: logger.debug('Processing completed.') # Reset values. self._enable_sigsegv_handler = None self._processing_configuration = None self._session_identifier = None self._status_update_callback = None self._storage_writer = None return self._processing_status
def __init__( self, maximum_number_of_tasks=None, number_of_worker_processes=0, worker_memory_limit=None, worker_timeout=None): """Initializes an engine. Args: maximum_number_of_tasks (Optional[int]): maximum number of concurrent tasks, where 0 represents no limit. number_of_worker_processes (Optional[int]): number of worker processes. worker_memory_limit (Optional[int]): maximum amount of memory a worker is allowed to consume, where None represents the default memory limit and 0 represents no limit. worker_timeout (Optional[float]): number of minutes before a worker process that is not providing status updates is considered inactive, where None or 0.0 represents the default timeout. """ if maximum_number_of_tasks is None: maximum_number_of_tasks = self._MAXIMUM_NUMBER_OF_TASKS if number_of_worker_processes < 1: # One worker for each "available" CPU (minus other processes). # The number here is derived from the fact that the engine starts up: # * A main process. # # If we want to utilize all CPUs on the system we therefore need to start # up workers that amounts to the total number of CPUs - the other # processes. try: cpu_count = multiprocessing.cpu_count() - 1 if cpu_count <= self._WORKER_PROCESSES_MINIMUM: cpu_count = self._WORKER_PROCESSES_MINIMUM elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM: cpu_count = self._WORKER_PROCESSES_MAXIMUM except NotImplementedError: logger.error(( 'Unable to determine number of CPUs defaulting to {0:d} worker ' 'processes.').format(self._WORKER_PROCESSES_MINIMUM)) cpu_count = self._WORKER_PROCESSES_MINIMUM number_of_worker_processes = cpu_count if worker_memory_limit is None: worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT if not worker_timeout: worker_timeout = definitions.DEFAULT_WORKER_TIMEOUT super(TaskMultiProcessEngine, self).__init__() self._enable_sigsegv_handler = False self._last_worker_number = 0 self._maximum_number_of_tasks = maximum_number_of_tasks self._merge_task = None self._merge_task_on_hold = None self._number_of_consumed_event_tags = 0 self._number_of_consumed_events = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_consumed_warnings = 0 self._number_of_produced_event_tags = 0 self._number_of_produced_events = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 self._number_of_produced_warnings = 0 self._number_of_worker_processes = number_of_worker_processes self._path_spec_extractor = extractors.PathSpecExtractor() self._processing_configuration = None self._redis_client = None self._resolver_context = context.Context() self._session_identifier = None self._status = definitions.STATUS_INDICATOR_IDLE self._storage_merge_reader = None self._storage_merge_reader_on_hold = None self._task_queue = None self._task_queue_port = None self._task_manager = task_manager.TaskManager() self._worker_memory_limit = worker_memory_limit self._worker_timeout = worker_timeout
def _StartWorkerProcess(self, process_name, storage_writer): """Creates, starts, monitors and registers a worker process. Args: process_name (str): process name. storage_writer (StorageWriter): storage writer for a session storage used to create task storage. Returns: MultiProcessWorkerProcess: extraction worker process or None on error. """ analysis_plugin = self._analysis_plugins.get(process_name, None) if not analysis_plugin: logger.error('Missing analysis plugin: {0:s}'.format(process_name)) return None if self._use_zeromq: queue_name = '{0:s} output event queue'.format(process_name) output_event_queue = zeromq_queue.ZeroMQPushBindQueue( name=queue_name, timeout_seconds=self._QUEUE_TIMEOUT) # Open the queue so it can bind to a random port, and we can get the # port number to use in the input queue. output_event_queue.Open() else: output_event_queue = multi_process_queue.MultiProcessingQueue( timeout=self._QUEUE_TIMEOUT) self._event_queues[process_name] = output_event_queue if self._use_zeromq: queue_name = '{0:s} input event queue'.format(process_name) input_event_queue = zeromq_queue.ZeroMQPullConnectQueue( name=queue_name, delay_open=True, port=output_event_queue.port, timeout_seconds=self._QUEUE_TIMEOUT) else: input_event_queue = output_event_queue process = analysis_process.AnalysisProcess( input_event_queue, storage_writer, self._knowledge_base, analysis_plugin, self._processing_configuration, data_location=self._data_location, event_filter_expression=self._event_filter_expression, name=process_name) process.start() logger.info('Started analysis plugin: {0:s} (PID: {1:d}).'.format( process_name, process.pid)) try: self._StartMonitoringProcess(process) except (IOError, KeyError) as exception: logger.error(( 'Unable to monitor analysis plugin: {0:s} (PID: {1:d}) ' 'with error: {2!s}').format(process_name, process.pid, exception)) process.terminate() return None self._RegisterProcess(process) return process
def _UpdateProcessingStatus(self, pid, process_status, used_memory): """Updates the processing status. Args: pid (int): process identifier (PID) of the worker process. process_status (dict[str, object]): status values received from the worker process. used_memory (int): size of used memory in bytes. Raises: KeyError: if the process is not registered with the engine. """ self._RaiseIfNotRegistered(pid) if not process_status: return process = self._processes_per_pid[pid] processing_status = process_status.get('processing_status', None) self._RaiseIfNotMonitored(pid) display_name = process_status.get('display_name', '') number_of_consumed_errors = process_status.get( 'number_of_consumed_errors', None) number_of_produced_errors = process_status.get( 'number_of_produced_errors', None) number_of_consumed_event_tags = process_status.get( 'number_of_consumed_event_tags', None) number_of_produced_event_tags = process_status.get( 'number_of_produced_event_tags', None) number_of_consumed_events = process_status.get( 'number_of_consumed_events', None) number_of_produced_events = process_status.get( 'number_of_produced_events', None) number_of_consumed_reports = process_status.get( 'number_of_consumed_reports', None) number_of_produced_reports = process_status.get( 'number_of_produced_reports', None) number_of_consumed_sources = process_status.get( 'number_of_consumed_sources', None) number_of_produced_sources = process_status.get( 'number_of_produced_sources', None) if processing_status != definitions.PROCESSING_STATUS_IDLE: last_activity_timestamp = process_status.get( 'last_activity_timestamp', 0.0) if last_activity_timestamp: last_activity_timestamp += self._PROCESS_WORKER_TIMEOUT current_timestamp = time.time() if current_timestamp > last_activity_timestamp: logger.error(( 'Process {0:s} (PID: {1:d}) has not reported activity within ' 'the timeout period.').format(process.name, pid)) processing_status = definitions.PROCESSING_STATUS_NOT_RESPONDING self._processing_status.UpdateWorkerStatus( process.name, processing_status, pid, used_memory, display_name, number_of_consumed_sources, number_of_produced_sources, number_of_consumed_events, number_of_produced_events, number_of_consumed_event_tags, number_of_produced_event_tags, number_of_consumed_errors, number_of_produced_errors, number_of_consumed_reports, number_of_produced_reports)
def _GetEventIdentifiers(self, event): """Retrieves different identifiers of the event. Every event contains event data, which consists of attributes and values. These attributes and values can be represented as a string and used for sorting and uniquely identifying events. This function determines multiple identifiers: * an identifier of the attributes and values without the timestamp description (or usage). This is referred to as the MACB group identifier. * an identifier of the attributes and values including the timestamp description (or usage). This is referred to as the event content identifier. The identifier without the timestamp description can be used to group events that have the same MACB (modification, access, change, birth) timestamps. The PsortEventHeap will store these events individually and relies on PsortMultiProcessEngine to do the actual grouping of events. Args: event (EventObject): event. Returns: tuple: containing: str: identifier of the event MACB group or None if the event cannot be grouped. str: identifier of the event content. """ attributes = [] attribute_string = 'data_type: {0:s}'.format(event.data_type) attributes.append(attribute_string) for attribute_name, attribute_value in sorted(event.GetAttributes()): if attribute_name in self._IDENTIFIER_EXCLUDED_ATTRIBUTES: continue if not attribute_value: continue if attribute_name == 'pathspec': attribute_value = attribute_value.comparable elif isinstance(attribute_value, dict): attribute_value = sorted(attribute_value.items()) elif isinstance(attribute_value, set): attribute_value = sorted(list(attribute_value)) elif isinstance(attribute_value, py2to3.BYTES_TYPE): attribute_value = repr(attribute_value) try: attribute_string = '{0:s}: {1!s}'.format( attribute_name, attribute_value) except UnicodeDecodeError: logger.error('Failed to decode attribute {0:s}'.format( attribute_name)) attributes.append(attribute_string) # The 'atime', 'ctime', 'crtime', 'mtime' are included for backwards # compatibility with the filestat parser. if event.timestamp_desc in ( 'atime', 'ctime', 'crtime', 'mtime', definitions.TIME_DESCRIPTION_LAST_ACCESS, definitions.TIME_DESCRIPTION_CHANGE, definitions.TIME_DESCRIPTION_CREATION, definitions.TIME_DESCRIPTION_MODIFICATION): macb_group_identifier = ', '.join(attributes) else: macb_group_identifier = None attributes.insert(0, event.timestamp_desc) content_identifier = ', '.join(attributes) return macb_group_identifier, content_identifier
def ProcessSources( self, session_identifier, source_path_specs, storage_writer, processing_configuration, enable_sigsegv_handler=False, number_of_worker_processes=0, status_update_callback=None, worker_memory_limit=None): """Processes the sources and extract events. Args: session_identifier (str): identifier of the session. source_path_specs (list[dfvfs.PathSpec]): path specifications of the sources to process. storage_writer (StorageWriter): storage writer for a session storage. processing_configuration (ProcessingConfiguration): processing configuration. enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler should be enabled. number_of_worker_processes (Optional[int]): number of worker processes. status_update_callback (Optional[function]): callback function for status updates. worker_memory_limit (Optional[int]): maximum amount of memory a worker is allowed to consume, where None represents the default memory limit and 0 represents no limit. Returns: ProcessingStatus: processing status. """ if number_of_worker_processes < 1: # One worker for each "available" CPU (minus other processes). # The number here is derived from the fact that the engine starts up: # * A main process. # # If we want to utilize all CPUs on the system we therefore need to start # up workers that amounts to the total number of CPUs - the other # processes. try: cpu_count = multiprocessing.cpu_count() - 1 if cpu_count <= self._WORKER_PROCESSES_MINIMUM: cpu_count = self._WORKER_PROCESSES_MINIMUM elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM: cpu_count = self._WORKER_PROCESSES_MAXIMUM except NotImplementedError: logger.error(( 'Unable to determine number of CPUs defaulting to {0:d} worker ' 'processes.').format(self._WORKER_PROCESSES_MINIMUM)) cpu_count = self._WORKER_PROCESSES_MINIMUM number_of_worker_processes = cpu_count self._enable_sigsegv_handler = enable_sigsegv_handler self._number_of_worker_processes = number_of_worker_processes if worker_memory_limit is None: self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT else: self._worker_memory_limit = worker_memory_limit # Keep track of certain values so we can spawn new extraction workers. self._processing_configuration = processing_configuration self._debug_output = processing_configuration.debug_output self._log_filename = processing_configuration.log_filename self._session_identifier = session_identifier self._status_update_callback = status_update_callback self._storage_writer = storage_writer # Set up the task queue. if not self._use_zeromq: self._task_queue = multi_process_queue.MultiProcessingQueue( maximum_number_of_queued_items=self._maximum_number_of_tasks) else: task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue( delay_open=True, linger_seconds=0, maximum_items=1, name='main_task_queue', timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS) self._task_queue = task_outbound_queue # The ZeroMQ backed queue must be started first, so we can save its port. # TODO: raises: attribute-defined-outside-init # self._task_queue.name = 'Task queue' self._task_queue.Open() self._task_queue_port = self._task_queue.port self._StartProfiling(self._processing_configuration.profiling) self._task_manager.StartProfiling( self._processing_configuration.profiling, self._name) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) # Set up the storage writer before the worker processes. storage_writer.StartTaskStorage() for worker_number in range(number_of_worker_processes): # First argument to _StartWorkerProcess is not used. extraction_process = self._StartWorkerProcess('', storage_writer) if not extraction_process: logger.error('Unable to create worker process: {0:d}'.format( worker_number)) self._StartStatusUpdateThread() try: # Open the storage file after creating the worker processes otherwise # the ZIP storage file will remain locked as long as the worker processes # are alive. storage_writer.Open() storage_writer.WriteSessionStart() try: storage_writer.WritePreprocessingInformation(self.knowledge_base) self._ProcessSources(source_path_specs, storage_writer) finally: storage_writer.WriteSessionCompletion(aborted=self._abort) storage_writer.Close() finally: # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) self._task_manager.StopProfiling() self._StopProfiling() try: self._StopExtractionProcesses(abort=self._abort) except KeyboardInterrupt: self._AbortKill() # The abort can leave the main process unresponsive # due to incorrectly finalized IPC. self._KillProcess(os.getpid()) # The task queue should be closed by _StopExtractionProcesses, this # close is a failsafe, primarily due to MultiProcessingQueue's # blocking behavior. self._task_queue.Close(abort=True) if self._processing_status.error_path_specs: task_storage_abort = True else: task_storage_abort = self._abort try: storage_writer.StopTaskStorage(abort=task_storage_abort) except (IOError, OSError) as exception: logger.error('Unable to stop task storage with error: {0!s}'.format( exception)) if self._abort: logger.debug('Processing aborted.') self._processing_status.aborted = True else: logger.debug('Processing completed.') # Reset values. self._enable_sigsegv_handler = None self._number_of_worker_processes = None self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT self._processing_configuration = None self._session_identifier = None self._status_update_callback = None self._storage_writer = None return self._processing_status
def _CheckStatusWorkerProcess(self, pid): """Checks the status of a worker process. If a worker process is not responding the process is terminated and a replacement process is started. Args: pid (int): process ID (PID) of a registered worker process. Raises: KeyError: if the process is not registered with the engine. """ # TODO: Refactor this method, simplify and separate concerns (monitoring # vs management). self._RaiseIfNotRegistered(pid) process = self._processes_per_pid[pid] process_status = self._QueryProcessStatus(process) if process_status is None: process_is_alive = False else: process_is_alive = True process_information = self._process_information_per_pid[pid] used_memory = process_information.GetUsedMemory() or 0 if self._worker_memory_limit and used_memory > self._worker_memory_limit: logger.warning(( 'Process: {0:s} (PID: {1:d}) killed because it exceeded the ' 'memory limit: {2:d}.').format( process.name, pid, self._worker_memory_limit)) self._KillProcess(pid) if isinstance(process_status, dict): self._rpc_errors_per_pid[pid] = 0 status_indicator = process_status.get('processing_status', None) else: rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1 self._rpc_errors_per_pid[pid] = rpc_errors if rpc_errors > self._MAXIMUM_RPC_ERRORS: process_is_alive = False if process_is_alive: rpc_port = process.rpc_port.value logger.warning(( 'Unable to retrieve process: {0:s} (PID: {1:d}) status via ' 'RPC socket: http://localhost:{2:d}').format( process.name, pid, rpc_port)) processing_status_string = 'RPC error' status_indicator = definitions.PROCESSING_STATUS_RUNNING else: processing_status_string = 'killed' status_indicator = definitions.PROCESSING_STATUS_KILLED process_status = { 'processing_status': processing_status_string} self._UpdateProcessingStatus(pid, process_status, used_memory) # _UpdateProcessingStatus can also change the status of the worker, # So refresh the status if applicable. for worker_status in self._processing_status.workers_status: if worker_status.pid == pid: status_indicator = worker_status.status break if status_indicator in definitions.PROCESSING_ERROR_STATUS: logger.error(( 'Process {0:s} (PID: {1:d}) is not functioning correctly. ' 'Status code: {2!s}.').format(process.name, pid, status_indicator)) self._TerminateProcessByPid(pid) logger.info('Starting replacement worker process for {0:s}'.format( process.name)) replacement_process_attempts = 0 replacement_process = None while replacement_process_attempts < self._MAXIMUM_REPLACEMENT_RETRIES: replacement_process_attempts += 1 replacement_process = self._StartWorkerProcess( process.name, self._storage_writer) if not replacement_process: time.sleep(self._REPLACEMENT_WORKER_RETRY_DELAY) break if not replacement_process: logger.error( 'Unable to create replacement worker process for: {0:s}'.format( process.name))
def _CheckStatusAnalysisProcess(self, pid): """Checks the status of an analysis process. Args: pid (int): process ID (PID) of a registered analysis process. Raises: KeyError: if the process is not registered with the engine. """ # TODO: Refactor this method, simplify and separate concerns (monitoring # vs management). self._RaiseIfNotRegistered(pid) if pid in self._completed_analysis_processes: status_indicator = definitions.STATUS_INDICATOR_COMPLETED process_status = { 'processing_status': status_indicator} used_memory = 0 else: process = self._processes_per_pid[pid] process_status = self._QueryProcessStatus(process) if process_status is None: process_is_alive = False else: process_is_alive = True process_information = self._process_information_per_pid[pid] used_memory = process_information.GetUsedMemory() or 0 if self._worker_memory_limit and used_memory > self._worker_memory_limit: logger.warning(( 'Process: {0:s} (PID: {1:d}) killed because it exceeded the ' 'memory limit: {2:d}.').format( process.name, pid, self._worker_memory_limit)) self._KillProcess(pid) if isinstance(process_status, dict): self._rpc_errors_per_pid[pid] = 0 status_indicator = process_status.get('processing_status', None) if status_indicator == definitions.STATUS_INDICATOR_COMPLETED: self._completed_analysis_processes.add(pid) else: rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1 self._rpc_errors_per_pid[pid] = rpc_errors if rpc_errors > self._MAXIMUM_RPC_ERRORS: process_is_alive = False if process_is_alive: rpc_port = process.rpc_port.value logger.warning(( 'Unable to retrieve process: {0:s} (PID: {1:d}) status via ' 'RPC socket: http://localhost:{2:d}').format( process.name, pid, rpc_port)) processing_status_string = 'RPC error' status_indicator = definitions.STATUS_INDICATOR_RUNNING else: processing_status_string = 'killed' status_indicator = definitions.STATUS_INDICATOR_KILLED process_status = { 'processing_status': processing_status_string} self._UpdateProcessingStatus(pid, process_status, used_memory) if status_indicator in definitions.ERROR_STATUS_INDICATORS: logger.error(( 'Process {0:s} (PID: {1:d}) is not functioning correctly. ' 'Status code: {2!s}.').format( process.name, pid, status_indicator)) self._TerminateProcessByPid(pid)
def _Main(self): """The main loop.""" # We need a resolver context per process to prevent multi processing # issues with file objects stored in images. resolver_context = context.Context() for credential_configuration in self._processing_configuration.credentials: resolver.Resolver.key_chain.SetCredential( credential_configuration.path_spec, credential_configuration.credential_type, credential_configuration.credential_data) self._parser_mediator = parsers_mediator.ParserMediator( None, self._knowledge_base, artifacts_filter_helper=self._artifacts_filter_helper, preferred_year=self._processing_configuration.preferred_year, resolver_context=resolver_context, temporary_directory=self._processing_configuration.temporary_directory) self._parser_mediator.SetEventExtractionConfiguration( self._processing_configuration.event_extraction) self._parser_mediator.SetInputSourceConfiguration( self._processing_configuration.input_source) # We need to initialize the parser and hasher objects after the process # has forked otherwise on Windows the "fork" will fail with # a PickleError for Python modules that cannot be pickled. self._extraction_worker = worker.EventExtractionWorker( parser_filter_expression=( self._processing_configuration.parser_filter_expression)) self._extraction_worker.SetExtractionConfiguration( self._processing_configuration.extraction) self._parser_mediator.StartProfiling( self._processing_configuration.profiling, self._name, self._process_information) self._StartProfiling(self._processing_configuration.profiling) if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(self._processing_profiler) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: self._storage_writer.SetStorageProfiler(self._storage_profiler) logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format( self._name, self._pid)) self._status = definitions.STATUS_INDICATOR_RUNNING try: logger.debug('{0!s} (PID: {1:d}) started monitoring task queue.'.format( self._name, self._pid)) while not self._abort: try: task = self._task_queue.PopItem() except (errors.QueueClose, errors.QueueEmpty) as exception: logger.debug('ConsumeItems exiting with exception {0:s}.'.format( type(exception))) break if isinstance(task, plaso_queue.QueueAbort): logger.debug('ConsumeItems exiting, dequeued QueueAbort object.') break self._ProcessTask(task) logger.debug('{0!s} (PID: {1:d}) stopped monitoring task queue.'.format( self._name, self._pid)) # All exceptions need to be caught here to prevent the process # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except logger.warning( 'Unhandled exception in process: {0!s} (PID: {1:d}).'.format( self._name, self._pid)) logger.exception(exception) self._abort = True if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(None) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(None) if self._storage_profiler: self._storage_writer.SetStorageProfiler(None) self._StopProfiling() self._parser_mediator.StopProfiling() self._extraction_worker = None self._parser_mediator = None self._storage_writer = None if self._abort: self._status = definitions.STATUS_INDICATOR_ABORTED else: self._status = definitions.STATUS_INDICATOR_COMPLETED logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format( self._name, self._pid)) try: self._task_queue.Close(abort=self._abort) except errors.QueueAlreadyClosed: logger.error('Queue for {0:s} was already closed.'.format(self.name))
def _MergeTaskStorage(self, storage_writer): """Merges a task storage with the session storage. This function checks all task stores that are ready to merge and updates the scheduled tasks. Note that to prevent this function holding up the task scheduling loop only the first available task storage is merged. Args: storage_writer (StorageWriter): storage writer for a session storage used to merge task storage. """ if self._processing_profiler: self._processing_profiler.StartTiming('merge_check') for task_identifier in storage_writer.GetProcessedTaskIdentifiers(): try: task = self._task_manager.GetProcessedTaskByIdentifier(task_identifier) self._task_manager.SampleTaskStatus(task, 'processed') to_merge = self._task_manager.CheckTaskToMerge(task) if not to_merge: storage_writer.RemoveProcessedTaskStorage(task) self._task_manager.RemoveTask(task) self._task_manager.SampleTaskStatus(task, 'removed_processed') else: storage_writer.PrepareMergeTaskStorage(task) self._task_manager.UpdateTaskAsPendingMerge(task) except KeyError: logger.error( 'Unable to retrieve task: {0:s} to prepare it to be merged.'.format( task_identifier)) continue if self._processing_profiler: self._processing_profiler.StopTiming('merge_check') task = None if not self._storage_merge_reader_on_hold: task = self._task_manager.GetTaskPendingMerge(self._merge_task) # Limit the number of attribute containers from a single task-based # storage file that are merged per loop to keep tasks flowing. if task or self._storage_merge_reader: self._status = definitions.STATUS_INDICATOR_MERGING if self._processing_profiler: self._processing_profiler.StartTiming('merge') if task: if self._storage_merge_reader: self._merge_task_on_hold = self._merge_task self._storage_merge_reader_on_hold = self._storage_merge_reader self._task_manager.SampleTaskStatus( self._merge_task_on_hold, 'merge_on_hold') self._merge_task = task try: self._storage_merge_reader = storage_writer.StartMergeTaskStorage( task) self._task_manager.SampleTaskStatus(task, 'merge_started') except IOError as exception: logger.error(( 'Unable to merge results of task: {0:s} ' 'with error: {1!s}').format(task.identifier, exception)) self._storage_merge_reader = None if self._storage_merge_reader: fully_merged = self._storage_merge_reader.MergeAttributeContainers( maximum_number_of_containers=self._MAXIMUM_NUMBER_OF_CONTAINERS) else: # TODO: Do something more sensible when this happens, perhaps # retrying the task once that is implemented. For now, we mark the task # as fully merged because we can't continue with it. fully_merged = True if self._processing_profiler: self._processing_profiler.StopTiming('merge') if fully_merged: try: self._task_manager.CompleteTask(self._merge_task) except KeyError as exception: logger.error( 'Unable to complete task: {0:s} with error: {1!s}'.format( self._merge_task.identifier, exception)) if not self._storage_merge_reader_on_hold: self._merge_task = None self._storage_merge_reader = None else: self._merge_task = self._merge_task_on_hold self._storage_merge_reader = self._storage_merge_reader_on_hold self._merge_task_on_hold = None self._storage_merge_reader_on_hold = None self._task_manager.SampleTaskStatus( self._merge_task, 'merge_resumed') self._status = definitions.STATUS_INDICATOR_RUNNING self._number_of_produced_events = storage_writer.number_of_events self._number_of_produced_sources = storage_writer.number_of_event_sources self._number_of_produced_warnings = storage_writer.number_of_warnings
def _Main(self): """The main loop.""" self._StartProfiling(self._processing_configuration.profiling) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: self._storage_writer.SetStorageProfiler(self._storage_profiler) logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format( self._name, self._pid)) # Creating the threading event in the constructor will cause a pickle # error on Windows when an analysis process is created. self._foreman_status_wait_event = threading.Event() self._status = definitions.PROCESSING_STATUS_ANALYZING task = tasks.Task() # TODO: temporary solution. task.identifier = self._analysis_plugin.plugin_name self._task = task storage_writer = self._storage_writer.CreateTaskStorage(task) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) storage_writer.Open() self._analysis_mediator = analysis_mediator.AnalysisMediator( storage_writer, self._knowledge_base, data_location=self._data_location) # TODO: set event_filter_expression in mediator. storage_writer.WriteTaskStart() try: logger.debug( '{0!s} (PID: {1:d}) started monitoring event queue.'.format( self._name, self._pid)) while not self._abort: try: event = self._event_queue.PopItem() except (errors.QueueClose, errors.QueueEmpty) as exception: logger.debug('ConsumeItems exiting with exception {0:s}.'.format( type(exception))) break if isinstance(event, plaso_queue.QueueAbort): logger.debug('ConsumeItems exiting, dequeued QueueAbort object.') break self._ProcessEvent(self._analysis_mediator, event) self._number_of_consumed_events += 1 if self._guppy_memory_profiler: self._guppy_memory_profiler.Sample() logger.debug( '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format( self._name, self._pid)) if not self._abort: self._status = definitions.PROCESSING_STATUS_REPORTING self._analysis_mediator.ProduceAnalysisReport(self._analysis_plugin) # All exceptions need to be caught here to prevent the process # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except logger.warning( 'Unhandled exception in process: {0!s} (PID: {1:d}).'.format( self._name, self._pid)) logger.exception(exception) self._abort = True finally: storage_writer.WriteTaskCompletion(aborted=self._abort) storage_writer.Close() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) try: self._storage_writer.FinalizeTaskStorage(task) except IOError: pass if self._abort: self._status = definitions.PROCESSING_STATUS_ABORTED else: self._status = definitions.PROCESSING_STATUS_COMPLETED self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT) logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format( self._name, self._pid)) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(None) if self._storage_profiler: self._storage_writer.SetStorageProfiler(None) self._StopProfiling() self._analysis_mediator = None self._foreman_status_wait_event = None self._storage_writer = None self._task = None try: self._event_queue.Close(abort=self._abort) except errors.QueueAlreadyClosed: logger.error('Queue for {0:s} was already closed.'.format(self.name))
def _Main(self): """The main loop.""" self._StartProfiling(self._processing_configuration.profiling) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler( self._serializers_profiler) if self._storage_profiler: self._storage_writer.SetStorageProfiler(self._storage_profiler) logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format( self._name, self._pid)) # Creating the threading event in the constructor will cause a pickle # error on Windows when an analysis process is created. self._foreman_status_wait_event = threading.Event() self._status = definitions.PROCESSING_STATUS_ANALYZING task = tasks.Task() # TODO: temporary solution. task.identifier = self._analysis_plugin.plugin_name self._task = task storage_writer = self._storage_writer.CreateTaskStorage(task) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) storage_writer.Open() self._analysis_mediator = analysis_mediator.AnalysisMediator( storage_writer, self._knowledge_base, data_location=self._data_location) # TODO: set event_filter_expression in mediator. storage_writer.WriteTaskStart() try: logger.debug( '{0!s} (PID: {1:d}) started monitoring event queue.'.format( self._name, self._pid)) while not self._abort: try: event = self._event_queue.PopItem() except (errors.QueueClose, errors.QueueEmpty) as exception: logger.debug( 'ConsumeItems exiting with exception {0:s}.'.format( type(exception))) break if isinstance(event, plaso_queue.QueueAbort): logger.debug( 'ConsumeItems exiting, dequeued QueueAbort object.') break self._ProcessEvent(self._analysis_mediator, event) self._number_of_consumed_events += 1 if self._guppy_memory_profiler: self._guppy_memory_profiler.Sample() logger.debug( '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format( self._name, self._pid)) if not self._abort: self._status = definitions.PROCESSING_STATUS_REPORTING self._analysis_mediator.ProduceAnalysisReport( self._analysis_plugin) # All exceptions need to be caught here to prevent the process # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except logger.warning( 'Unhandled exception in process: {0!s} (PID: {1:d}).'.format( self._name, self._pid)) logger.exception(exception) self._abort = True finally: storage_writer.WriteTaskCompletion(aborted=self._abort) storage_writer.Close() if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) try: self._storage_writer.FinalizeTaskStorage(task) except IOError: pass if self._abort: self._status = definitions.PROCESSING_STATUS_ABORTED else: self._status = definitions.PROCESSING_STATUS_COMPLETED self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT) logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format( self._name, self._pid)) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(None) if self._storage_profiler: self._storage_writer.SetStorageProfiler(None) self._StopProfiling() self._analysis_mediator = None self._foreman_status_wait_event = None self._storage_writer = None self._task = None try: self._event_queue.Close(abort=self._abort) except errors.QueueAlreadyClosed: logger.error('Queue for {0:s} was already closed.'.format( self.name))
def _AnalyzeEvents(self, storage_writer, analysis_plugins, event_filter=None): """Analyzes events in a plaso storage. Args: storage_writer (StorageWriter): storage writer. analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that should be run and their names. event_filter (Optional[FilterObject]): event filter. Returns: collections.Counter: counter containing information about the events processed and filtered. Raises: RuntimeError: if a non-recoverable situation is encountered. """ self._status = definitions.PROCESSING_STATUS_RUNNING self._number_of_consumed_errors = 0 self._number_of_consumed_events = 0 self._number_of_consumed_reports = 0 self._number_of_consumed_sources = 0 self._number_of_produced_errors = 0 self._number_of_produced_events = 0 self._number_of_produced_reports = 0 self._number_of_produced_sources = 0 number_of_filtered_events = 0 logger.debug('Processing events.') filter_limit = getattr(event_filter, 'limit', None) for event in storage_writer.GetSortedEvents(): event_data_identifier = event.GetEventDataIdentifier() if event_data_identifier: event_data = storage_writer.GetEventDataByIdentifier( event_data_identifier) if event_data: for attribute_name, attribute_value in event_data.GetAttributes(): setattr(event, attribute_name, attribute_value) event_identifier = event.GetIdentifier() event.tag = self._event_tag_index.GetEventTagByIdentifier( storage_writer, event_identifier) if event_filter: filter_match = event_filter.Match(event) else: filter_match = None # pylint: disable=singleton-comparison if filter_match == False: number_of_filtered_events += 1 continue for event_queue in self._event_queues.values(): # TODO: Check for premature exit of analysis plugins. event_queue.PushItem(event) self._number_of_consumed_events += 1 if (event_filter and filter_limit and filter_limit == self._number_of_consumed_events): break logger.debug('Finished pushing events to analysis plugins.') # Signal that we have finished adding events. for event_queue in self._event_queues.values(): event_queue.PushItem(plaso_queue.QueueAbort(), block=False) logger.debug('Processing analysis plugin results.') # TODO: use a task based approach. plugin_names = [plugin_name for plugin_name in analysis_plugins.keys()] while plugin_names: for plugin_name in list(plugin_names): if self._abort: break # TODO: temporary solution. task = tasks.Task() task.identifier = plugin_name merge_ready = storage_writer.CheckTaskReadyForMerge(task) if merge_ready: storage_writer.PrepareMergeTaskStorage(task) self._status = definitions.PROCESSING_STATUS_MERGING event_queue = self._event_queues[plugin_name] del self._event_queues[plugin_name] event_queue.Close() storage_merge_reader = storage_writer.StartMergeTaskStorage(task) storage_merge_reader.MergeAttributeContainers( callback=self._MergeEventTag) # TODO: temporary solution. plugin_names.remove(plugin_name) self._status = definitions.PROCESSING_STATUS_RUNNING self._number_of_produced_event_tags = ( storage_writer.number_of_event_tags) self._number_of_produced_reports = ( storage_writer.number_of_analysis_reports) try: storage_writer.StopTaskStorage(abort=self._abort) except (IOError, OSError) as exception: logger.error('Unable to stop task storage with error: {0!s}'.format( exception)) if self._abort: logger.debug('Processing aborted.') else: logger.debug('Processing completed.') events_counter = collections.Counter() events_counter['Events filtered'] = number_of_filtered_events events_counter['Events processed'] = self._number_of_consumed_events return events_counter
def _Main(self): """The main loop.""" # We need a resolver context per process to prevent multi processing # issues with file objects stored in images. resolver_context = context.Context() for credential_configuration in self._processing_configuration.credentials: resolver.Resolver.key_chain.SetCredential( credential_configuration.path_spec, credential_configuration.credential_type, credential_configuration.credential_data) self._parser_mediator = parsers_mediator.ParserMediator( None, self._knowledge_base, preferred_year=self._processing_configuration.preferred_year, resolver_context=resolver_context, temporary_directory=self._processing_configuration.temporary_directory) self._parser_mediator.SetEventExtractionConfiguration( self._processing_configuration.event_extraction) self._parser_mediator.SetInputSourceConfiguration( self._processing_configuration.input_source) # We need to initialize the parser and hasher objects after the process # has forked otherwise on Windows the "fork" will fail with # a PickleError for Python modules that cannot be pickled. self._extraction_worker = worker.EventExtractionWorker( parser_filter_expression=( self._processing_configuration.parser_filter_expression)) self._extraction_worker.SetExtractionConfiguration( self._processing_configuration.extraction) self._parser_mediator.StartProfiling( self._processing_configuration.profiling, self._name, self._process_information) self._StartProfiling(self._processing_configuration.profiling) if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(self._processing_profiler) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: self._storage_writer.SetStorageProfiler(self._storage_profiler) logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format( self._name, self._pid)) self._status = definitions.PROCESSING_STATUS_RUNNING try: logger.debug('{0!s} (PID: {1:d}) started monitoring task queue.'.format( self._name, self._pid)) while not self._abort: try: task = self._task_queue.PopItem() except (errors.QueueClose, errors.QueueEmpty) as exception: logger.debug('ConsumeItems exiting with exception {0:s}.'.format( type(exception))) break if isinstance(task, plaso_queue.QueueAbort): logger.debug('ConsumeItems exiting, dequeued QueueAbort object.') break self._ProcessTask(task) logger.debug('{0!s} (PID: {1:d}) stopped monitoring task queue.'.format( self._name, self._pid)) # All exceptions need to be caught here to prevent the process # from being killed by an uncaught exception. except Exception as exception: # pylint: disable=broad-except logger.warning( 'Unhandled exception in process: {0!s} (PID: {1:d}).'.format( self._name, self._pid)) logger.exception(exception) self._abort = True if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(None) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(None) if self._storage_profiler: self._storage_writer.SetStorageProfiler(None) self._StopProfiling() self._parser_mediator.StopProfiling() self._extraction_worker = None self._parser_mediator = None self._storage_writer = None if self._abort: self._status = definitions.PROCESSING_STATUS_ABORTED else: self._status = definitions.PROCESSING_STATUS_COMPLETED logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format( self._name, self._pid)) try: self._task_queue.Close(abort=self._abort) except errors.QueueAlreadyClosed: logger.error('Queue for {0:s} was already closed.'.format(self.name))