示例#1
0
    def _StartWorkerProcess(self, process_name):
        """Creates, starts, monitors and registers a worker process.

    Args:
      process_name (str): process name.

    Returns:
      MultiProcessWorkerProcess: extraction worker process or None if the
          process could not be started.
    """
        logger.debug('Starting worker process {0:s}'.format(process_name))

        queue_name = '{0:s} task queue'.format(process_name)
        task_queue = zeromq_queue.ZeroMQRequestConnectQueue(
            delay_open=True,
            linger_seconds=0,
            name=queue_name,
            port=self._task_queue_port,
            timeout_seconds=self._TASK_QUEUE_TIMEOUT_SECONDS)

        process = extraction_process.ExtractionWorkerProcess(
            task_queue,
            self.collection_filters_helper,
            self.knowledge_base,
            self._processing_configuration,
            enable_sigsegv_handler=self._enable_sigsegv_handler,
            name=process_name)

        # Remove all possible log handlers to prevent a child process from logging
        # to the main process log file and garbling the log. The log handlers are
        # recreated after the worker process has been started.
        for handler in logging.root.handlers:
            logging.root.removeHandler(handler)
            handler.close()

        process.start()

        loggers.ConfigureLogging(debug_output=self._debug_output,
                                 filename=self._log_filename,
                                 mode='a',
                                 quiet_mode=self._quiet_mode)

        try:
            self._StartMonitoringProcess(process)

        except (IOError, KeyError) as exception:
            pid = process.pid
            logger.error(
                ('Unable to monitor replacement worker process: {0:s} '
                 '(PID: {1:d}) with error: {2!s}').format(
                     process_name, pid, exception))

            self._TerminateProcess(process)
            return None

        self._RegisterProcess(process)

        self._last_worker_number += 1

        return process
示例#2
0
    def _StartWorkerProcess(self, process_name):
        """Creates, starts, monitors and registers a worker process.

    Args:
      process_name (str): process name.

    Returns:
      MultiProcessWorkerProcess: extraction worker process or None on error.
    """
        analysis_plugin = self._analysis_plugins.get(process_name, None)
        if not analysis_plugin:
            logger.error('Missing analysis plugin: {0:s}'.format(process_name))
            return None

        queue_name = '{0:s} output event queue'.format(process_name)
        output_event_queue = zeromq_queue.ZeroMQPushBindQueue(
            name=queue_name, timeout_seconds=self._QUEUE_TIMEOUT)
        # Open the queue so it can bind to a random port, and we can get the
        # port number to use in the input queue.
        output_event_queue.Open()

        self._event_queues[process_name] = output_event_queue

        queue_name = '{0:s} input event queue'.format(process_name)
        input_event_queue = zeromq_queue.ZeroMQPullConnectQueue(
            name=queue_name,
            delay_open=True,
            port=output_event_queue.port,
            timeout_seconds=self._QUEUE_TIMEOUT)

        process = analysis_process.AnalysisProcess(
            input_event_queue,
            self._knowledge_base,
            self._session,
            analysis_plugin,
            self._processing_configuration,
            data_location=self._data_location,
            event_filter_expression=self._event_filter_expression,
            name=process_name)

        process.start()

        logger.info('Started analysis plugin: {0:s} (PID: {1:d}).'.format(
            process_name, process.pid))

        try:
            self._StartMonitoringProcess(process)
        except (IOError, KeyError) as exception:
            logger.error(
                ('Unable to monitor analysis plugin: {0:s} (PID: {1:d}) '
                 'with error: {2!s}').format(process_name, process.pid,
                                             exception))

            process.terminate()
            return None

        self._RegisterProcess(process)
        return process
示例#3
0
    def _StartAnalysisProcesses(self, analysis_plugins):
        """Starts the analysis processes.

    Args:
      analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that
          should be run and their names.
    """
        logger.info('Starting analysis plugins.')

        for analysis_plugin in analysis_plugins.values():
            self._analysis_plugins[analysis_plugin.NAME] = analysis_plugin

            process = self._StartWorkerProcess(analysis_plugin.NAME)
            if not process:
                logger.error('Unable to create analysis process: {0:s}'.format(
                    analysis_plugin.NAME))

        logger.info('Analysis plugins running')
示例#4
0
文件: engine.py 项目: cshanahan/plaso
  def _KillProcess(self, pid):
    """Issues a SIGKILL or equivalent to the process.

    Args:
      pid (int): process identifier (PID).
    """
    if sys.platform.startswith('win'):
      process_terminate = 1
      handle = ctypes.windll.kernel32.OpenProcess(
          process_terminate, False, pid)
      ctypes.windll.kernel32.TerminateProcess(handle, -1)
      ctypes.windll.kernel32.CloseHandle(handle)

    else:
      try:
        os.kill(pid, signal.SIGKILL)
      except OSError as exception:
        logger.error('Unable to kill process {0:d} with error: {1!s}'.format(
            pid, exception))
示例#5
0
    def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec):
        """Processes a path specification.

    Args:
      extraction_worker (worker.ExtractionWorker): extraction worker.
      parser_mediator (ParserMediator): parser mediator.
      path_spec (dfvfs.PathSpec): path specification.
    """
        excluded_find_specs = None
        if self._collection_filters_helper:
            excluded_find_specs = (self._collection_filters_helper.
                                   excluded_file_system_find_specs)

        self._current_display_name = parser_mediator.GetDisplayNameForPathSpec(
            path_spec)

        try:
            self._CacheFileSystem(path_spec)

            extraction_worker.ProcessPathSpec(
                parser_mediator,
                path_spec,
                excluded_find_specs=excluded_find_specs)

        except dfvfs_errors.CacheFullError:
            # TODO: signal engine of failure.
            self._abort = True
            logger.error((
                'ABORT: detected cache full error while processing path spec: '
                '{0:s}').format(self._current_display_name))

        except Exception as exception:  # pylint: disable=broad-except
            parser_mediator.ProduceExtractionWarning(
                ('unable to process path specification with error: '
                 '{0!s}').format(exception),
                path_spec=path_spec)

            if self._processing_configuration.debug_output:
                logger.warning((
                    'Unhandled exception while processing path specification: '
                    '{0:s}.').format(self._current_display_name))
                logger.exception(exception)
示例#6
0
    def _StartProcessStatusRPCServer(self):
        """Starts the process status RPC server."""
        if self._rpc_server:
            return

        self._rpc_server = plaso_xmlrpc.XMLProcessStatusRPCServer(
            self._GetStatus)

        hostname = 'localhost'

        # Try the PID as port number first otherwise pick something random
        # between 1024 and 60000.
        if self._pid < 1024 or self._pid > 60000:
            port = random.randint(1024, 60000)
        else:
            port = self._pid

        if not self._rpc_server.Start(hostname, port):
            port = 0
            for _ in range(self._NUMBER_OF_RPC_SERVER_START_ATTEMPTS):
                port = random.randint(1024, 60000)
                if self._rpc_server.Start(hostname, port):
                    break

                port = 0

        if not port:
            logger.error(
                ('Unable to start a process status RPC server for {0!s} '
                 '(PID: {1:d})').format(self._name, self._pid))
            self._rpc_server = None
            return

        self.rpc_port.value = port

        logger.debug('Process: {0!s} process status RPC server started'.format(
            self._name))
示例#7
0
    def _CheckStatusAnalysisProcess(self, pid):
        """Checks the status of an analysis process.

    Args:
      pid (int): process ID (PID) of a registered analysis process.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
        # TODO: Refactor this method, simplify and separate concerns (monitoring
        # vs management).
        self._RaiseIfNotRegistered(pid)

        if pid in self._completed_analysis_processes:
            status_indicator = definitions.STATUS_INDICATOR_COMPLETED
            process_status = {'processing_status': status_indicator}
            used_memory = 0

        else:
            process = self._processes_per_pid[pid]

            process_status = self._QueryProcessStatus(process)
            if process_status is None:
                process_is_alive = False
            else:
                process_is_alive = True

            process_information = self._process_information_per_pid[pid]
            used_memory = process_information.GetUsedMemory() or 0

            if self._worker_memory_limit and used_memory > self._worker_memory_limit:
                logger.warning((
                    'Process: {0:s} (PID: {1:d}) killed because it exceeded the '
                    'memory limit: {2:d}.').format(process.name, pid,
                                                   self._worker_memory_limit))
                self._KillProcess(pid)

            if isinstance(process_status, dict):
                self._rpc_errors_per_pid[pid] = 0
                status_indicator = process_status.get('processing_status',
                                                      None)

                if status_indicator == definitions.STATUS_INDICATOR_COMPLETED:
                    self._completed_analysis_processes.add(pid)

            else:
                rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1
                self._rpc_errors_per_pid[pid] = rpc_errors

                if rpc_errors > self._MAXIMUM_RPC_ERRORS:
                    process_is_alive = False

                if process_is_alive:
                    rpc_port = process.rpc_port.value
                    logger.warning((
                        'Unable to retrieve process: {0:s} (PID: {1:d}) status via '
                        'RPC socket: http://localhost:{2:d}').format(
                            process.name, pid, rpc_port))

                    processing_status_string = 'RPC error'
                    status_indicator = definitions.STATUS_INDICATOR_RUNNING
                else:
                    processing_status_string = 'killed'
                    status_indicator = definitions.STATUS_INDICATOR_KILLED

                process_status = {
                    'processing_status': processing_status_string
                }

        self._UpdateProcessingStatus(pid, process_status, used_memory)

        if status_indicator in definitions.ERROR_STATUS_INDICATORS:
            logger.error(
                ('Process {0:s} (PID: {1:d}) is not functioning correctly. '
                 'Status code: {2!s}.').format(process.name, pid,
                                               status_indicator))

            self._TerminateProcessByPid(pid)
示例#8
0
    def ProcessSources(self,
                       source_configurations,
                       storage_writer,
                       session_identifier,
                       processing_configuration,
                       enable_sigsegv_handler=False,
                       status_update_callback=None,
                       storage_file_path=None):
        """Processes the sources and extract events.

    Args:
      source_configurations (list[SourceConfigurationArtifact]): configurations
          of the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      session_identifier (str): the identifier of the session the tasks are
          part of.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler
          should be enabled.
      status_update_callback (Optional[function]): callback function for status
          updates.
      storage_file_path (Optional[str]): path to the session storage file.

    Returns:
      ProcessingStatus: processing status.
    """
        self._enable_sigsegv_handler = enable_sigsegv_handler

        # Keep track of certain values so we can spawn new extraction workers.
        self._processing_configuration = processing_configuration

        self._debug_output = processing_configuration.debug_output
        self._log_filename = processing_configuration.log_filename
        self._status_update_callback = status_update_callback
        self._storage_file_path = storage_file_path
        self._storage_writer = storage_writer
        self._task_storage_format = processing_configuration.task_storage_format

        # Set up the task queue.
        task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
            delay_open=True,
            linger_seconds=0,
            maximum_items=1,
            name='main_task_queue',
            timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS)
        self._task_queue = task_outbound_queue

        # The ZeroMQ backed queue must be started first, so we can save its port.
        # TODO: raises: attribute-defined-outside-init
        # self._task_queue.name = 'Task queue'
        self._task_queue.Open()
        self._task_queue_port = self._task_queue.port

        # Set up the task storage before the worker processes.
        self._StartTaskStorage(self._task_storage_format)

        for worker_number in range(self._number_of_worker_processes):
            process_name = 'Worker_{0:02d}'.format(self._last_worker_number)
            worker_process = self._StartWorkerProcess(process_name)
            if not worker_process:
                logger.error('Unable to create worker process: {0:d}'.format(
                    worker_number))

        self._StartProfiling(self._processing_configuration.profiling)
        self._task_manager.StartProfiling(
            self._processing_configuration.profiling, self._name)

        if self._serializers_profiler:
            storage_writer.SetSerializersProfiler(self._serializers_profiler)

        if self._storage_profiler:
            storage_writer.SetStorageProfiler(self._storage_profiler)

        self._StartStatusUpdateThread()

        self._parsers_counter = collections.Counter({
            parser_count.name: parser_count
            for parser_count in self._storage_writer.GetAttributeContainers(
                'parser_count')
        })

        try:
            self._ProcessSources(source_configurations, storage_writer,
                                 session_identifier)

        finally:
            # Stop the status update thread after close of the storage writer
            # so we include the storage sync to disk in the status updates.
            self._StopStatusUpdateThread()

            if self._serializers_profiler:
                storage_writer.SetSerializersProfiler(None)

            if self._storage_profiler:
                storage_writer.SetStorageProfiler(None)

            self._task_manager.StopProfiling()
            self._StopProfiling()

        try:
            self._StopExtractionProcesses(abort=self._abort)

        except KeyboardInterrupt:
            self._AbortKill()

            # The abort can leave the main process unresponsive
            # due to incorrectly finalized IPC.
            self._KillProcess(os.getpid())

        # The task queue should be closed by _StopExtractionProcesses, this
        # close is a failsafe.
        self._task_queue.Close(abort=True)

        if self._processing_status.error_path_specs:
            task_storage_abort = True
        else:
            task_storage_abort = self._abort

        try:
            self._StopTaskStorage(self._task_storage_format,
                                  session_identifier,
                                  abort=task_storage_abort)
        except (IOError, OSError) as exception:
            logger.error(
                'Unable to stop task storage with error: {0!s}'.format(
                    exception))

        if self._abort:
            logger.debug('Processing aborted.')
            self._processing_status.aborted = True
        else:
            logger.debug('Processing completed.')

        # Update the status view one last time.
        self._UpdateStatus()

        # Reset values.
        self._enable_sigsegv_handler = None

        self._processing_configuration = None

        self._status_update_callback = None
        self._storage_file_path = None
        self._storage_writer = None
        self._task_storage_format = None

        return self._processing_status
示例#9
0
    def _MergeTaskStorage(self,
                          storage_writer,
                          session_identifier,
                          maximum_number_of_containers=0):
        """Merges a task storage with the session storage.

    This function checks all task stores that are ready to merge and updates
    the scheduled tasks. Note that to prevent this function holding up
    the task scheduling loop only the first available task storage is merged.

    Args:
      storage_writer (StorageWriter): storage writer for a session storage used
          to merge task storage.
      session_identifier (str): the identifier of the session the tasks are
          part of.
      maximum_number_of_containers (Optional[int]): maximum number of
          containers to merge, where 0 represent no limit.

    Returns:
      int: number of containers merged.
    """
        if self._processing_profiler:
            self._processing_profiler.StartTiming('merge_check')

        task_identifiers = self._GetProcessedTaskIdentifiers(
            self._task_storage_format, session_identifier)

        for task_identifier in task_identifiers:
            try:
                task = self._task_manager.GetProcessedTaskByIdentifier(
                    task_identifier)

                self._task_manager.SampleTaskStatus(task, 'processed')

                to_merge = self._task_manager.CheckTaskToMerge(task)
                if not to_merge:
                    self._RemoveProcessedTaskStorage(self._task_storage_format,
                                                     task)

                    self._task_manager.RemoveTask(task)
                    self._task_manager.SampleTaskStatus(
                        task, 'removed_processed')

                else:
                    self._PrepareMergeTaskStorage(self._task_storage_format,
                                                  task)
                    self._task_manager.UpdateTaskAsPendingMerge(task)

            except KeyError as exception:
                logger.error(
                    'Unable to retrieve task: {0:s} to prepare it to be merged '
                    'with error: {1!s}.'.format(task_identifier, exception))
                continue

        if self._processing_profiler:
            self._processing_profiler.StopTiming('merge_check')

        number_of_containers = 0

        task = None
        if not self._storage_merge_reader_on_hold:
            task = self._task_manager.GetTaskPendingMerge(self._merge_task)

        if task or self._storage_merge_reader:
            self._status = definitions.STATUS_INDICATOR_MERGING

            if self._processing_profiler:
                self._processing_profiler.StartTiming('merge')

            if task:
                if self._storage_merge_reader:
                    self._merge_task_on_hold = self._merge_task
                    self._storage_merge_reader_on_hold = self._storage_merge_reader

                    self._task_manager.SampleTaskStatus(
                        self._merge_task_on_hold, 'merge_on_hold')

                self._merge_task = task
                try:
                    self._storage_merge_reader = self._StartMergeTaskStorage(
                        self._storage_writer, self._task_storage_format, task)

                    self._task_manager.SampleTaskStatus(task, 'merge_started')

                except IOError as exception:
                    logger.error(
                        ('Unable to merge results of task: {0:s} '
                         'with error: {1!s}').format(task.identifier,
                                                     exception))
                    self._storage_merge_reader = None

            if self._storage_merge_reader:
                fully_merged = self._storage_merge_reader.MergeAttributeContainers(
                    maximum_number_of_containers=maximum_number_of_containers)

                number_of_containers = self._storage_merge_reader.number_of_containers

            else:
                # TODO: Do something more sensible when this happens, perhaps
                # retrying the task once that is implemented. For now, we mark the task
                # as fully merged because we can't continue with it.
                fully_merged = True

            if self._processing_profiler:
                self._processing_profiler.StopTiming('merge')

            if fully_merged:
                self._storage_merge_reader.Close()

                for key, value in self._storage_merge_reader.parsers_counter.items(
                ):
                    parser_count = self._parsers_counter.get(key, None)
                    if parser_count:
                        parser_count.number_of_events += value
                        self._storage_writer.UpdateAttributeContainer(
                            parser_count)
                    else:
                        parser_count = counts.ParserCount(
                            name=key, number_of_events=value)
                        self._parsers_counter[key] = parser_count
                        self._storage_writer.AddAttributeContainer(
                            parser_count)

                self._RemoveMergeTaskStorage(self._task_storage_format,
                                             self._merge_task)

                try:
                    self._task_manager.CompleteTask(self._merge_task)

                except KeyError as exception:
                    logger.error(
                        'Unable to complete task: {0:s} with error: {1!s}'.
                        format(self._merge_task.identifier, exception))

                if not self._storage_merge_reader_on_hold:
                    self._merge_task = None
                    self._storage_merge_reader = None
                else:
                    self._merge_task = self._merge_task_on_hold
                    self._storage_merge_reader = self._storage_merge_reader_on_hold

                    self._merge_task_on_hold = None
                    self._storage_merge_reader_on_hold = None

                    self._task_manager.SampleTaskStatus(
                        self._merge_task, 'merge_resumed')

            self._status = definitions.STATUS_INDICATOR_RUNNING
            self._number_of_produced_events = storage_writer.number_of_events
            self._number_of_produced_sources = storage_writer.number_of_event_sources
            self._number_of_produced_extraction_warnings = (
                storage_writer.number_of_extraction_warnings)

        return number_of_containers
示例#10
0
    def _MergeAttributeContainer(self, storage_writer, merge_helper,
                                 container):
        """Merges an attribute container from a task store into the storage writer.

    Args:
      storage_writer (StorageWriter): storage writer.
      merge_helper (ExtractionTaskMergeHelper): helper to merge attribute
          containers.
      container (AttributeContainer): attribute container.
    """
        if container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT:
            event_data_identifier = container.GetEventDataIdentifier()
            event_data_lookup_key = event_data_identifier.CopyToString()

            event_data_identifier = merge_helper.GetAttributeContainerIdentifier(
                event_data_lookup_key)

            if event_data_identifier:
                container.SetEventDataIdentifier(event_data_identifier)
            else:
                identifier = container.GetIdentifier()
                identifier_string = identifier.CopyToString()

                # TODO: store this as a merge warning so this is preserved
                # in the storage file.
                logger.error(
                    ('Unable to merge event attribute container: {0:s} since '
                     'corresponding event data: {1:s} could not be found.'
                     ).format(identifier_string, event_data_lookup_key))
                return

        elif container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT_DATA:
            event_data_stream_identifier = container.GetEventDataStreamIdentifier(
            )
            event_data_stream_lookup_key = None
            if event_data_stream_identifier:
                event_data_stream_lookup_key = (
                    event_data_stream_identifier.CopyToString())

                event_data_stream_identifier = (
                    merge_helper.GetAttributeContainerIdentifier(
                        event_data_stream_lookup_key))

            if event_data_stream_identifier:
                container.SetEventDataStreamIdentifier(
                    event_data_stream_identifier)
            elif event_data_stream_lookup_key:
                identifier = container.GetIdentifier()
                identifier_string = identifier.CopyToString()

                # TODO: store this as a merge warning so this is preserved
                # in the storage file.
                logger.error((
                    'Unable to merge event data attribute container: {0:s} since '
                    'corresponding event data stream: {1:s} could not be '
                    'found.').format(identifier_string,
                                     event_data_stream_lookup_key))
                return

        elif container.CONTAINER_TYPE == 'windows_eventlog_message_string':
            message_file_identifier = container.GetMessageFileIdentifier()
            message_file_lookup_key = message_file_identifier.CopyToString()

            message_file_identifier = merge_helper.GetAttributeContainerIdentifier(
                message_file_lookup_key)

            if message_file_identifier:
                container.SetMessageFileIdentifier(message_file_identifier)
            else:
                identifier = container.GetIdentifier()
                identifier_string = identifier.CopyToString()

                # TODO: store this as a merge warning so this is preserved
                # in the storage file.
                logger.error((
                    'Unable to merge Windows EventLog message string attribute '
                    'container: {0:s} since corresponding Windows EventLog message '
                    'file: {1:s} could not be found.').format(
                        identifier_string, message_file_lookup_key))
                return

        lookup_key = None
        if container.CONTAINER_TYPE in (self._CONTAINER_TYPE_EVENT_DATA,
                                        self._CONTAINER_TYPE_EVENT_DATA_STREAM,
                                        'windows_eventlog_message_file'):
            # Preserve the lookup key before adding it to the attribute container
            # store.
            identifier = container.GetIdentifier()
            lookup_key = identifier.CopyToString()

        storage_writer.AddAttributeContainer(container)

        if lookup_key:
            identifier = container.GetIdentifier()
            merge_helper.SetAttributeContainerIdentifier(
                lookup_key, identifier)

        if container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT:
            parser_name = merge_helper.event_data_parser_mappings.get(
                event_data_lookup_key, 'N/A')
            self._parsers_counter[parser_name] += 1
            self._parsers_counter['total'] += 1

            self._number_of_produced_events += 1

        elif container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT_DATA:
            parser_name = container.parser.split('/')[-1]
            merge_helper.event_data_parser_mappings[lookup_key] = parser_name

        elif container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT_SOURCE:
            self._number_of_produced_sources += 1
示例#11
0
    def _Main(self):
        """The main loop."""
        # We need a resolver context per process to prevent multi processing
        # issues with file objects stored in images.
        self._resolver_context = context.Context()

        for credential_configuration in self._processing_configuration.credentials:
            resolver.Resolver.key_chain.SetCredential(
                credential_configuration.path_spec,
                credential_configuration.credential_type,
                credential_configuration.credential_data)

        self._parser_mediator = self._CreateParserMediator(
            self._knowledge_base, self._resolver_context,
            self._processing_configuration)

        # We need to initialize the parser and hasher objects after the process
        # has forked otherwise on Windows the "fork" will fail with
        # a PickleError for Python modules that cannot be pickled.
        self._extraction_worker = worker.EventExtractionWorker(
            parser_filter_expression=(
                self._processing_configuration.parser_filter_expression))

        self._extraction_worker.SetExtractionConfiguration(
            self._processing_configuration.extraction)

        self._parser_mediator.StartProfiling(
            self._processing_configuration.profiling, self._name,
            self._process_information)
        self._StartProfiling(self._processing_configuration.profiling)

        if self._analyzers_profiler:
            self._extraction_worker.SetAnalyzersProfiler(
                self._analyzers_profiler)

        if self._processing_profiler:
            self._extraction_worker.SetProcessingProfiler(
                self._processing_profiler)

        logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format(
            self._name, self._pid))

        self._status = definitions.STATUS_INDICATOR_RUNNING

        try:
            logger.debug(
                '{0!s} (PID: {1:d}) started monitoring task queue.'.format(
                    self._name, self._pid))

            while not self._abort:
                try:
                    task = self._task_queue.PopItem()
                except (errors.QueueClose, errors.QueueEmpty) as exception:
                    logger.debug(
                        'ConsumeItems exiting with exception: {0!s}.'.format(
                            type(exception)))
                    break

                if isinstance(task, plaso_queue.QueueAbort):
                    logger.debug(
                        'ConsumeItems exiting, dequeued QueueAbort object.')
                    break

                self._ProcessTask(task)

            logger.debug(
                '{0!s} (PID: {1:d}) stopped monitoring task queue.'.format(
                    self._name, self._pid))

        # All exceptions need to be caught here to prevent the process
        # from being killed by an uncaught exception.
        except Exception as exception:  # pylint: disable=broad-except
            logger.warning(
                'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
                    self._name, self._pid))
            logger.exception(exception)

            self._abort = True

        if self._analyzers_profiler:
            self._extraction_worker.SetAnalyzersProfiler(None)

        if self._processing_profiler:
            self._extraction_worker.SetProcessingProfiler(None)

        self._StopProfiling()
        self._parser_mediator.StopProfiling()

        self._extraction_worker = None
        self._file_system_cache = []
        self._parser_mediator = None
        self._resolver_context = None

        if self._abort:
            self._status = definitions.STATUS_INDICATOR_ABORTED
        else:
            self._status = definitions.STATUS_INDICATOR_COMPLETED

        logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format(
            self._name, self._pid))

        try:
            self._task_queue.Close(abort=self._abort)
        except errors.QueueAlreadyClosed:
            logger.error('Queue for {0:s} was already closed.'.format(
                self.name))
示例#12
0
    def _GetEventDataContentIdentifier(self, event_data, event_data_stream):
        """Retrieves the event data content identifier.

    Args:
      event_data (EventData): event data.
      event_data_stream (EventDataStream): event data stream.

    Returns:
      str: identifier of the event data content.
    """
        event_data_identifier = event_data.GetIdentifier()
        lookup_key = event_data_identifier.CopyToString()

        content_identifier = self._event_data_content_identifier_cache.get(
            lookup_key, None)
        if not content_identifier:
            event_attributes = list(event_data.GetAttributes())
            if event_data_stream:
                event_data_stream_attributes = event_data_stream.GetAttributes(
                )
                event_attributes.extend(event_data_stream_attributes)

            attributes = ['data_type: {0:s}'.format(event_data.data_type)]

            for attribute_name, attribute_value in sorted(event_attributes):
                if (attribute_name in self._IDENTIFIER_EXCLUDED_ATTRIBUTES
                        or attribute_value is None):
                    continue

                # Note that support for event_data.pathspec is kept for backwards
                # compatibility. The current value is event_data_stream.path_spec.
                if attribute_name in ('path_spec', 'pathspec'):
                    attribute_value = attribute_value.comparable

                elif isinstance(attribute_value, dict):
                    attribute_value = sorted(attribute_value.items())

                elif isinstance(attribute_value, set):
                    attribute_value = sorted(list(attribute_value))

                elif isinstance(attribute_value, bytes):
                    attribute_value = repr(attribute_value)

                try:
                    attribute_string = '{0:s}: {1!s}'.format(
                        attribute_name, attribute_value)
                except UnicodeDecodeError:
                    logger.error('Failed to decode attribute {0:s}'.format(
                        attribute_name))
                attributes.append(attribute_string)

            content_identifier = ', '.join(attributes)

            if len(self._event_data_content_identifier_cache) >= (
                    self._MAXIMUM_CACHED_IDENTIFIERS):
                self._event_data_content_identifier_cache.popitem(last=True)

            self._event_data_content_identifier_cache[
                lookup_key] = content_identifier

        self._event_data_content_identifier_cache.move_to_end(lookup_key,
                                                              last=False)
        return content_identifier
示例#13
0
文件: engine.py 项目: cshanahan/plaso
  def _CheckStatusWorkerProcess(self, pid):
    """Checks the status of a worker process.

    If a worker process is not responding the process is terminated and
    a replacement process is started.

    Args:
      pid (int): process ID (PID) of a registered worker process.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
    # TODO: Refactor this method, simplify and separate concerns (monitoring
    # vs management).
    self._RaiseIfNotRegistered(pid)

    process = self._processes_per_pid[pid]

    process_status = self._QueryProcessStatus(process)
    if process_status is None:
      process_is_alive = False
    else:
      process_is_alive = True

    process_information = self._process_information_per_pid[pid]
    used_memory = process_information.GetUsedMemory() or 0

    if self._worker_memory_limit and used_memory > self._worker_memory_limit:
      logger.warning((
          'Process: {0:s} (PID: {1:d}) killed because it exceeded the '
          'memory limit: {2:d}.').format(
              process.name, pid, self._worker_memory_limit))
      self._KillProcess(pid)

    if isinstance(process_status, dict):
      self._rpc_errors_per_pid[pid] = 0
      status_indicator = process_status.get('processing_status', None)

    else:
      rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1
      self._rpc_errors_per_pid[pid] = rpc_errors

      if rpc_errors > self._MAXIMUM_RPC_ERRORS:
        process_is_alive = False

      if process_is_alive:
        rpc_port = process.rpc_port.value
        logger.warning((
            'Unable to retrieve process: {0:s} (PID: {1:d}) status via '
            'RPC socket: http://localhost:{2:d}').format(
                process.name, pid, rpc_port))

        processing_status_string = 'RPC error'
        status_indicator = definitions.STATUS_INDICATOR_RUNNING
      else:
        processing_status_string = 'killed'
        status_indicator = definitions.STATUS_INDICATOR_KILLED

      process_status = {
          'processing_status': processing_status_string}

    self._UpdateProcessingStatus(pid, process_status, used_memory)

    # _UpdateProcessingStatus can also change the status of the worker,
    # So refresh the status if applicable.
    for worker_status in self._processing_status.workers_status:
      if worker_status.pid == pid:
        status_indicator = worker_status.status
        break

    if status_indicator in definitions.ERROR_STATUS_INDICATORS:
      logger.error((
          'Process {0:s} (PID: {1:d}) is not functioning correctly. '
          'Status code: {2!s}.').format(process.name, pid, status_indicator))

      self._TerminateProcessByPid(pid)

      replacement_process = None
      for replacement_process_attempt in range(
          self._MAXIMUM_REPLACEMENT_RETRIES):
        logger.info((
            'Attempt: {0:d} to start replacement worker process for '
            '{1:s}').format(replacement_process_attempt + 1, process.name))

        replacement_process = self._StartWorkerProcess(process.name)
        if replacement_process:
          break

        time.sleep(self._REPLACEMENT_WORKER_RETRY_DELAY)

      if not replacement_process:
        logger.error(
            'Unable to create replacement worker process for: {0:s}'.format(
                process.name))
示例#14
0
    def _Main(self):
        """The main loop."""
        self._StartProfiling(self._processing_configuration.profiling)

        logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format(
            self._name, self._pid))

        # Creating the threading event in the constructor will cause a pickle
        # error on Windows when an analysis process is created.
        self._foreman_status_wait_event = threading.Event()
        self._status = definitions.STATUS_INDICATOR_ANALYZING

        task = tasks.Task()
        task.storage_format = definitions.STORAGE_FORMAT_SQLITE
        # TODO: temporary solution.
        task.identifier = self._analysis_plugin.plugin_name

        self._task = task

        task_storage_writer = self._storage_factory.CreateTaskStorageWriter(
            definitions.STORAGE_FORMAT_SQLITE)

        if self._serializers_profiler:
            task_storage_writer.SetSerializersProfiler(
                self._serializers_profiler)

        if self._storage_profiler:
            task_storage_writer.SetStorageProfiler(self._storage_profiler)

        storage_file_path = self._GetTaskStorageFilePath(
            definitions.STORAGE_FORMAT_SQLITE, task)
        task_storage_writer.Open(path=storage_file_path)

        self._analysis_mediator = self._CreateAnalysisMediator(
            self._session, self._knowledge_base,
            self._processing_configuration, self._data_location)
        self._analysis_mediator.SetStorageWriter(task_storage_writer)

        # TODO: set event_filter_expression in mediator.

        task_storage_writer.AddAttributeContainer(task)

        try:
            logger.debug(
                '{0!s} (PID: {1:d}) started monitoring event queue.'.format(
                    self._name, self._pid))

            while not self._abort:
                try:
                    queued_object = self._event_queue.PopItem()

                except (errors.QueueClose, errors.QueueEmpty) as exception:
                    logger.debug(
                        'ConsumeItems exiting with exception {0!s}.'.format(
                            type(exception)))
                    break

                if isinstance(queued_object, plaso_queue.QueueAbort):
                    logger.debug(
                        'ConsumeItems exiting, dequeued QueueAbort object.')
                    break

                self._ProcessEvent(self._analysis_mediator, *queued_object)

                self._number_of_consumed_events += 1

            logger.debug(
                '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format(
                    self._name, self._pid))

            if not self._abort:
                self._status = definitions.STATUS_INDICATOR_REPORTING

                self._analysis_mediator.ProduceAnalysisReport(
                    self._analysis_plugin)

        # All exceptions need to be caught here to prevent the process
        # from being killed by an uncaught exception.
        except Exception as exception:  # pylint: disable=broad-except
            logger.warning(
                'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
                    self._name, self._pid))
            logger.exception(exception)

            self._abort = True

        finally:
            task.aborted = self._abort
            task_storage_writer.UpdateAttributeContainer(task)

            task_storage_writer.Close()

            if self._serializers_profiler:
                task_storage_writer.SetSerializersProfiler(None)

            if self._storage_profiler:
                task_storage_writer.SetStorageProfiler(None)

        try:
            self._FinalizeTaskStorageWriter(definitions.STORAGE_FORMAT_SQLITE,
                                            task)
        except IOError as exception:
            logger.warning(
                'Unable to finalize task storage with error: {0!s}'.format(
                    exception))

        if self._abort:
            self._status = definitions.STATUS_INDICATOR_ABORTED
        else:
            self._status = definitions.STATUS_INDICATOR_COMPLETED

        logger.debug('Wait for foreman status wait event')
        self._foreman_status_wait_event.clear()
        self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT)

        logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format(
            self._name, self._pid))

        self._StopProfiling()

        self._analysis_mediator = None
        self._foreman_status_wait_event = None
        self._task = None

        try:
            self._event_queue.Close(abort=self._abort)
        except errors.QueueAlreadyClosed:
            logger.error('Queue for {0:s} was already closed.'.format(
                self.name))
示例#15
0
    def _MergeTaskStorage(self, storage_writer, session_identifier):
        """Merges a task storage with the session storage.

    This function checks all task stores that are ready to merge and updates
    the scheduled tasks. Note that to prevent this function holding up
    the task scheduling loop only the first available task storage is merged.

    Args:
      storage_writer (StorageWriter): storage writer for a session storage used
          to merge task storage.
      session_identifier (str): the identifier of the session the tasks are
          part of.
    """
        if self._processing_profiler:
            self._processing_profiler.StartTiming('merge_check')

        task_identifiers = self._GetProcessedTaskIdentifiers(
            self._task_storage_format, session_identifier)

        for task_identifier in task_identifiers:
            try:
                task = self._task_manager.GetProcessedTaskByIdentifier(
                    task_identifier)

                self._task_manager.SampleTaskStatus(task, 'processed')

                to_merge = self._task_manager.CheckTaskToMerge(task)
                if not to_merge:
                    self._RemoveProcessedTaskStorage(self._task_storage_format,
                                                     task)

                    self._task_manager.RemoveTask(task)
                    self._task_manager.SampleTaskStatus(
                        task, 'removed_processed')

                else:
                    self._PrepareMergeTaskStorage(self._task_storage_format,
                                                  task)
                    self._task_manager.UpdateTaskAsPendingMerge(task)

            except KeyError as exception:
                logger.error(
                    'Unable to retrieve task: {0:s} to prepare it to be merged '
                    'with error: {1!s}.'.format(task_identifier, exception))
                continue

        if self._processing_profiler:
            self._processing_profiler.StopTiming('merge_check')

        task = None
        if not self._task_merge_helper_on_hold:
            task = self._task_manager.GetTaskPendingMerge(self._merge_task)

        if task or self._task_merge_helper:
            self._status = definitions.STATUS_INDICATOR_MERGING

            if self._processing_profiler:
                self._processing_profiler.StartTiming('merge')

            if task:
                if self._task_merge_helper:
                    self._merge_task_on_hold = self._merge_task
                    self._task_merge_helper_on_hold = self._task_merge_helper

                    self._task_manager.SampleTaskStatus(
                        self._merge_task_on_hold, 'merge_on_hold')

                self._merge_task = task
                try:
                    task_storage_reader = self._GetMergeTaskStorage(
                        self._task_storage_format, task)

                    self._task_merge_helper = merge_helpers.ExtractionTaskMergeHelper(
                        task_storage_reader, task.identifier)

                    self._task_manager.SampleTaskStatus(task, 'merge_started')

                except IOError as exception:
                    logger.error(
                        ('Unable to merge results of task: {0:s} '
                         'with error: {1!s}').format(task.identifier,
                                                     exception))
                    self._task_merge_helper = None

            if self._task_merge_helper:
                merge_duration = time.time()

                number_of_containers = self._MergeAttributeContainers(
                    storage_writer,
                    self._task_merge_helper,
                    maximum_number_of_containers=self.
                    _maximum_number_of_containers)

                merge_duration = time.time() - merge_duration

                fully_merged = self._task_merge_helper.fully_merged

                if merge_duration > 0.0 and number_of_containers > 0:
                    # Limit the number of attribute containers from a single task-based
                    # storage file that are merged per loop to keep tasks flowing.
                    containers_per_second = number_of_containers / merge_duration
                    maximum_number_of_containers = int(0.5 *
                                                       containers_per_second)

                    if fully_merged:
                        self._maximum_number_of_containers = max(
                            self._maximum_number_of_containers,
                            maximum_number_of_containers)

                    else:
                        self._maximum_number_of_containers = maximum_number_of_containers

            else:
                # TODO: Do something more sensible when this happens, perhaps
                # retrying the task once that is implemented. For now, we mark the task
                # as fully merged because we can't continue with it.
                fully_merged = True

            if self._processing_profiler:
                self._processing_profiler.StopTiming('merge')

            if fully_merged:
                self._task_merge_helper.Close()

                self._RemoveMergeTaskStorage(self._task_storage_format,
                                             self._merge_task)

                try:
                    self._task_manager.CompleteTask(self._merge_task)

                except KeyError as exception:
                    logger.error(
                        'Unable to complete task: {0:s} with error: {1!s}'.
                        format(self._merge_task.identifier, exception))

                if not self._task_merge_helper_on_hold:
                    self._merge_task = None
                    self._task_merge_helper = None
                else:
                    self._merge_task = self._merge_task_on_hold
                    self._task_merge_helper = self._task_merge_helper_on_hold

                    self._merge_task_on_hold = None
                    self._task_merge_helper_on_hold = None

                    self._task_manager.SampleTaskStatus(
                        self._merge_task, 'merge_resumed')

            self._status = definitions.STATUS_INDICATOR_RUNNING
示例#16
0
    def _UpdateProcessingStatus(self, pid, process_status, used_memory):
        """Updates the processing status.

    Args:
      pid (int): process identifier (PID) of the worker process.
      process_status (dict[str, object]): status values received from
          the worker process.
      used_memory (int): size of used memory in bytes.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
        self._RaiseIfNotRegistered(pid)

        if not process_status:
            return

        process = self._processes_per_pid[pid]

        status_indicator = process_status.get('processing_status', None)

        self._RaiseIfNotMonitored(pid)

        display_name = process_status.get('display_name', '')

        number_of_consumed_event_tags = process_status.get(
            'number_of_consumed_event_tags', None)
        number_of_produced_event_tags = process_status.get(
            'number_of_produced_event_tags', None)

        number_of_consumed_events = process_status.get(
            'number_of_consumed_events', None)
        number_of_produced_events = process_status.get(
            'number_of_produced_events', None)

        number_of_consumed_reports = process_status.get(
            'number_of_consumed_reports', None)
        number_of_produced_reports = process_status.get(
            'number_of_produced_reports', None)

        number_of_consumed_sources = process_status.get(
            'number_of_consumed_sources', None)
        number_of_produced_sources = process_status.get(
            'number_of_produced_sources', None)

        if status_indicator != definitions.STATUS_INDICATOR_IDLE:
            last_activity_timestamp = process_status.get(
                'last_activity_timestamp', 0.0)

            if last_activity_timestamp:
                last_activity_timestamp += self._worker_timeout

                current_timestamp = time.time()
                if current_timestamp > last_activity_timestamp:
                    logger.error((
                        'Process {0:s} (PID: {1:d}) has not reported activity within '
                        'the timeout period.').format(process.name, pid))
                    status_indicator = definitions.STATUS_INDICATOR_NOT_RESPONDING

        self._processing_status.UpdateWorkerStatus(
            process.name, status_indicator, pid, used_memory, display_name,
            number_of_consumed_sources, number_of_produced_sources,
            number_of_consumed_events, number_of_produced_events,
            number_of_consumed_event_tags, number_of_produced_event_tags,
            number_of_consumed_reports, number_of_produced_reports)
示例#17
0
    def AnalyzeEvents(self,
                      session,
                      knowledge_base_object,
                      storage_writer,
                      data_location,
                      analysis_plugins,
                      processing_configuration,
                      event_filter=None,
                      event_filter_expression=None,
                      status_update_callback=None,
                      storage_file_path=None):
        """Analyzes events in a Plaso storage.

    Args:
      session (Session): session in which the events are analyzed.
      knowledge_base_object (KnowledgeBase): contains information from
          the source data needed for processing.
      storage_writer (StorageWriter): storage writer.
      data_location (str): path to the location that data files should
          be loaded from.
      analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that
          should be run and their names.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      event_filter (Optional[EventObjectFilter]): event filter.
      event_filter_expression (Optional[str]): event filter expression.
      status_update_callback (Optional[function]): callback function for status
          updates.
      storage_file_path (Optional[str]): path to the session storage file.

    Returns:
      ProcessingStatus: processing status.

    Raises:
      KeyboardInterrupt: if a keyboard interrupt was raised.
      ValueError: if analysis plugins are missing.
    """
        if not analysis_plugins:
            raise ValueError('Missing analysis plugins')

        abort_kill = False
        keyboard_interrupt = False
        queue_full = False

        self._analysis_plugins = {}
        self._data_location = data_location
        self._event_filter_expression = event_filter_expression
        self._events_status = processing_status.EventsStatus()
        self._knowledge_base = knowledge_base_object
        self._processing_configuration = processing_configuration
        self._session = session
        self._status_update_callback = status_update_callback
        self._storage_file_path = storage_file_path

        stored_event_labels_counter = {}
        if storage_writer.HasAttributeContainers('event_label_count'):
            stored_event_labels_counter = {
                event_label_count.label: event_label_count
                for event_label_count in storage_writer.GetAttributeContainers(
                    'event_label_count')
            }

        self._event_labels_counter = collections.Counter()

        if storage_writer.HasAttributeContainers('parser_count'):
            parsers_counter = {
                parser_count.name: parser_count.number_of_events
                for parser_count in storage_writer.GetAttributeContainers(
                    'parser_count')
            }

            total_number_of_events = parsers_counter['total']

        else:
            total_number_of_events = 0
            for stored_session in storage_writer.GetSessions():
                total_number_of_events += stored_session.parsers_counter[
                    'total']

        self._events_status.total_number_of_events = total_number_of_events

        # Set up the storage writer before the analysis processes.
        self._StartTaskStorage(definitions.STORAGE_FORMAT_SQLITE)

        self._StartAnalysisProcesses(analysis_plugins)

        self._StartProfiling(self._processing_configuration.profiling)

        # Start the status update thread after open of the storage writer
        # so we don't have to clean up the thread if the open fails.
        self._StartStatusUpdateThread()

        try:
            self._AnalyzeEvents(storage_writer,
                                analysis_plugins,
                                event_filter=event_filter)

            for key, value in self._event_labels_counter.items():
                event_label_count = stored_event_labels_counter.get(key, None)
                if event_label_count:
                    event_label_count.number_of_events += value
                    storage_writer.UpdateAttributeContainer(event_label_count)
                else:
                    event_label_count = counts.EventLabelCount(
                        label=key, number_of_events=value)
                    storage_writer.AddAttributeContainer(event_label_count)

            self._status = definitions.STATUS_INDICATOR_FINALIZING

        except errors.QueueFull:
            queue_full = True
            self._abort = True

        except KeyboardInterrupt:
            keyboard_interrupt = True
            self._abort = True

        finally:
            self._processing_status.aborted = self._abort
            session.aborted = self._abort

            # Stop the status update thread after close of the storage writer
            # so we include the storage sync to disk in the status updates.
            self._StopStatusUpdateThread()

            self._StopProfiling()

        # Update the status view one last time before the analysis processses are
        # stopped.
        self._UpdateStatus()

        if queue_full:
            # TODO: handle abort on queue full more elegant.
            abort_kill = True
        else:
            try:
                self._StopAnalysisProcesses(abort=self._abort)

            except KeyboardInterrupt:
                keyboard_interrupt = True
                abort_kill = True

        if abort_kill:
            self._AbortKill()

            # The abort can leave the main process unresponsive
            # due to incorrectly finalized IPC.
            self._KillProcess(os.getpid())

        try:
            self._StopTaskStorage(definitions.STORAGE_FORMAT_SQLITE,
                                  session.identifier,
                                  abort=self._abort)
        except (IOError, OSError) as exception:
            logger.error(
                'Unable to stop task storage with error: {0!s}'.format(
                    exception))

        if self._abort:
            logger.debug('Analysis aborted.')
        else:
            logger.debug('Analysis completed.')

        # Update the status view one last time.
        self._UpdateStatus()

        # Reset values.
        self._analysis_plugins = {}
        self._data_location = None
        self._event_filter_expression = None
        self._knowledge_base = None
        self._processing_configuration = None
        self._session = None
        self._status_update_callback = None
        self._storage_file_path = None

        if keyboard_interrupt:
            raise KeyboardInterrupt

        return self._processing_status
示例#18
0
    def __init__(self,
                 maximum_number_of_tasks=None,
                 number_of_worker_processes=0,
                 worker_memory_limit=None,
                 worker_timeout=None):
        """Initializes an engine.

    Args:
      maximum_number_of_tasks (Optional[int]): maximum number of concurrent
          tasks, where 0 represents no limit.
      number_of_worker_processes (Optional[int]): number of worker processes.
      worker_memory_limit (Optional[int]): maximum amount of memory a worker is
          allowed to consume, where None represents the default memory limit
          and 0 represents no limit.
      worker_timeout (Optional[float]): number of minutes before a worker
          process that is not providing status updates is considered inactive,
          where None or 0.0 represents the default timeout.
    """
        if maximum_number_of_tasks is None:
            maximum_number_of_tasks = self._MAXIMUM_NUMBER_OF_TASKS

        if number_of_worker_processes < 1:
            # One worker for each "available" CPU (minus other processes).
            # The number here is derived from the fact that the engine starts up:
            # * A main process.
            #
            # If we want to utilize all CPUs on the system we therefore need to start
            # up workers that amounts to the total number of CPUs - the other
            # processes.
            try:
                cpu_count = multiprocessing.cpu_count() - 1

                if cpu_count <= self._WORKER_PROCESSES_MINIMUM:
                    cpu_count = self._WORKER_PROCESSES_MINIMUM

                elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM:
                    cpu_count = self._WORKER_PROCESSES_MAXIMUM

            except NotImplementedError:
                logger.error((
                    'Unable to determine number of CPUs defaulting to {0:d} worker '
                    'processes.').format(self._WORKER_PROCESSES_MINIMUM))
                cpu_count = self._WORKER_PROCESSES_MINIMUM

            number_of_worker_processes = cpu_count

        if worker_memory_limit is None:
            worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT

        if not worker_timeout:
            worker_timeout = definitions.DEFAULT_WORKER_TIMEOUT

        super(ExtractionMultiProcessEngine, self).__init__()
        self._enable_sigsegv_handler = False
        self._last_worker_number = 0
        self._maximum_number_of_containers = 50
        self._maximum_number_of_tasks = maximum_number_of_tasks
        self._merge_task = None
        self._merge_task_on_hold = None
        self._number_of_consumed_events = 0
        self._number_of_consumed_event_tags = 0
        self._number_of_consumed_extraction_warnings = 0
        self._number_of_consumed_reports = 0
        self._number_of_consumed_sources = 0
        self._number_of_produced_events = 0
        self._number_of_produced_event_tags = 0
        self._number_of_produced_extraction_warnings = 0
        self._number_of_produced_reports = 0
        self._number_of_produced_sources = 0
        self._number_of_worker_processes = number_of_worker_processes
        self._parsers_counter = None
        self._path_spec_extractor = extractors.PathSpecExtractor()
        self._resolver_context = context.Context()
        self._status = definitions.STATUS_INDICATOR_IDLE
        self._storage_merge_reader = None
        self._storage_merge_reader_on_hold = None
        self._task_manager = task_manager.TaskManager()
        self._task_queue = None
        self._task_queue_port = None
        self._task_storage_format = None
        self._worker_memory_limit = worker_memory_limit
        self._worker_timeout = worker_timeout