Exemplo n.º 1
0
  def _AbortTerminate(self):
    """Aborts all registered processes by sending a SIGTERM or equivalent."""
    for pid, process in self._processes_per_pid.items():
      if not process.is_alive():
        continue

      logger.warning('Terminating process: {0:s} (PID: {1:d}).'.format(
          process.name, pid))
      process.terminate()
Exemplo n.º 2
0
  def _AbortKill(self):
    """Aborts all registered processes by sending a SIGKILL or equivalent."""
    for pid, process in self._processes_per_pid.items():
      if not process.is_alive():
        continue

      logger.warning('Killing process: {0:s} (PID: {1:d}).'.format(
          process.name, pid))
      self._KillProcess(pid)
Exemplo n.º 3
0
  def _GetEventIdentifiers(self, event, event_data, event_data_stream):
    """Retrieves different identifiers of the event.

    The event data attributes and values can be represented as a string and used
    for sorting and uniquely identifying events. This function determines
    multiple identifiers:
    * an identifier of the attributes and values without the timestamp
      description (or usage). This is referred to as the MACB group
      identifier.
    * an identifier of the attributes and values including the timestamp
      description (or usage). This is referred to as the event content
      identifier.

    The identifier without the timestamp description can be used to group
    events that have the same MACB (modification, access, change, birth)
    timestamps. The PsortEventHeap will store these events individually and
    relies on PsortMultiProcessEngine to do the actual grouping of events.

    Args:
      event (EventObject): event.
      event_data (EventData): event data.
      event_data_stream (EventDataStream): event data stream.

    Returns:
      tuple: containing:

        str: identifier of the event MACB group or None if the event cannot
            be grouped.
        str: identifier of the event content.
    """
    content_identifier = self._GetEventDataContentIdentifier(
        event_data, event_data_stream)

    if event.timestamp_desc in (
        definitions.TIME_DESCRIPTION_LAST_ACCESS,
        definitions.TIME_DESCRIPTION_CHANGE,
        definitions.TIME_DESCRIPTION_CREATION,
        definitions.TIME_DESCRIPTION_MODIFICATION):
      macb_group_identifier = content_identifier
    else:
      macb_group_identifier = None

    timestamp_desc = event.timestamp_desc
    if timestamp_desc is None:
      logger.warning('Missing timestamp_desc attribute')
      timestamp_desc = definitions.TIME_DESCRIPTION_UNKNOWN

    content_identifier = ', '.join([timestamp_desc, content_identifier])

    return macb_group_identifier, content_identifier
Exemplo n.º 4
0
  def _TerminateProcess(self, process):
    """Terminate a process.

    Args:
      process (MultiProcessBaseProcess): process to terminate.
    """
    pid = process.pid
    logger.warning('Terminating process: (PID: {0:d}).'.format(pid))
    process.terminate()

    # Wait for the process to exit.
    process.join(timeout=self._PROCESS_JOIN_TIMEOUT)

    if process.is_alive():
      logger.warning('Killing process: (PID: {0:d}).'.format(pid))
      self._KillProcess(pid)
Exemplo n.º 5
0
    def CallFunction(self):
        """Calls the function via RPC."""
        if self._xmlrpc_proxy is None:
            return None

        rpc_call = getattr(self._xmlrpc_proxy, self._RPC_FUNCTION_NAME, None)
        if rpc_call is None:
            return None

        try:
            return rpc_call()  # pylint: disable=not-callable
        except (expat.ExpatError, SocketServer.socket.error,
                xmlrpclib.Fault) as exception:
            logger.warning(
                'Unable to make RPC call with error: {0!s}'.format(exception))
            return None
Exemplo n.º 6
0
  def _ProcessEvent(self, mediator, event, event_data, event_data_stream):
    """Processes an event.

    Args:
      mediator (AnalysisMediator): mediates interactions between
          analysis plugins and other components, such as storage and dfvfs.
      event (EventObject): event.
      event_data (EventData): event data.
      event_data_stream (EventDataStream): event data stream.
    """
    try:
      self._analysis_plugin.ExamineEvent(
          mediator, event, event_data, event_data_stream)

    except Exception as exception:  # pylint: disable=broad-except
      # TODO: write analysis error and change logger to debug only.

      logger.warning('Unhandled exception while processing event object.')
      logger.exception(exception)
Exemplo n.º 7
0
    def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec):
        """Processes a path specification.

    Args:
      extraction_worker (worker.ExtractionWorker): extraction worker.
      parser_mediator (ParserMediator): parser mediator.
      path_spec (dfvfs.PathSpec): path specification.
    """
        excluded_find_specs = None
        if self._collection_filters_helper:
            excluded_find_specs = (self._collection_filters_helper.
                                   excluded_file_system_find_specs)

        self._current_display_name = parser_mediator.GetDisplayNameForPathSpec(
            path_spec)

        try:
            self._CacheFileSystem(path_spec)

            extraction_worker.ProcessPathSpec(
                parser_mediator,
                path_spec,
                excluded_find_specs=excluded_find_specs)

        except dfvfs_errors.CacheFullError:
            # TODO: signal engine of failure.
            self._abort = True
            logger.error((
                'ABORT: detected cache full error while processing path spec: '
                '{0:s}').format(self._current_display_name))

        except Exception as exception:  # pylint: disable=broad-except
            parser_mediator.ProduceExtractionWarning(
                ('unable to process path specification with error: '
                 '{0!s}').format(exception),
                path_spec=path_spec)

            if self._processing_configuration.debug_output:
                logger.warning((
                    'Unhandled exception while processing path specification: '
                    '{0:s}.').format(self._current_display_name))
                logger.exception(exception)
Exemplo n.º 8
0
    def _Open(self, hostname, port):
        """Opens the RPC communication channel for clients.

    Args:
      hostname (str): hostname or IP address to connect to for requests.
      port (int): port to connect to for requests.

    Returns:
      bool: True if the communication channel was successfully opened.
    """
        try:
            self._xmlrpc_server = SimpleXMLRPCServer.SimpleXMLRPCServer(
                (hostname, port), logRequests=False, allow_none=True)
        except SocketServer.socket.error as exception:
            logger.warning(
                ('Unable to bind a RPC server on {0:s}:{1:d} with error: '
                 '{2!s}').format(hostname, port, exception))
            return False

        self._xmlrpc_server.register_function(self._callback,
                                              self._RPC_FUNCTION_NAME)
        return True
Exemplo n.º 9
0
    def Open(self, hostname, port):
        """Opens a RPC communication channel to the server.

    Args:
      hostname (str): hostname or IP address to connect to for requests.
      port (int): port to connect to for requests.

    Returns:
      bool: True if the communication channel was established.
    """
        server_url = 'http://{0:s}:{1:d}'.format(hostname, port)

        try:
            self._xmlrpc_proxy = xmlrpclib.ServerProxy(server_url,
                                                       allow_none=True)
        except SocketServer.socket.error as exception:
            logger.warning(
                ('Unable to connect to RPC server on {0:s}:{1:d} with error: '
                 '{2!s}').format(hostname, port, exception))
            return False

        return True
Exemplo n.º 10
0
    def _StopExtractionProcesses(self, abort=False):
        """Stops the extraction processes.

    Args:
      abort (bool): True to indicated the stop is issued on abort.
    """
        logger.debug('Stopping extraction processes.')
        self._StopMonitoringProcesses()

        if abort:
            # Signal all the processes to abort.
            self._AbortTerminate()

        logger.debug('Emptying task queue.')
        self._task_queue.Empty()

        # Wake the processes to make sure that they are not blocking
        # waiting for the queue new items.
        for _ in self._processes_per_pid:
            try:
                self._task_queue.PushItem(plaso_queue.QueueAbort(),
                                          block=False)
            except errors.QueueFull:
                logger.warning(
                    'Task queue full, unable to push abort message.')

        # Try waiting for the processes to exit normally.
        self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
        self._task_queue.Close(abort=abort)

        if not abort:
            # Check if the processes are still alive and terminate them if necessary.
            self._AbortTerminate()
            self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
            self._task_queue.Close(abort=True)

        # Kill any lingering processes.
        self._AbortKill()
Exemplo n.º 11
0
    def _CheckStatusAnalysisProcess(self, pid):
        """Checks the status of an analysis process.

    Args:
      pid (int): process ID (PID) of a registered analysis process.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
        # TODO: Refactor this method, simplify and separate concerns (monitoring
        # vs management).
        self._RaiseIfNotRegistered(pid)

        if pid in self._completed_analysis_processes:
            status_indicator = definitions.STATUS_INDICATOR_COMPLETED
            process_status = {'processing_status': status_indicator}
            used_memory = 0

        else:
            process = self._processes_per_pid[pid]

            process_status = self._QueryProcessStatus(process)
            if process_status is None:
                process_is_alive = False
            else:
                process_is_alive = True

            process_information = self._process_information_per_pid[pid]
            used_memory = process_information.GetUsedMemory() or 0

            if self._worker_memory_limit and used_memory > self._worker_memory_limit:
                logger.warning((
                    'Process: {0:s} (PID: {1:d}) killed because it exceeded the '
                    'memory limit: {2:d}.').format(process.name, pid,
                                                   self._worker_memory_limit))
                self._KillProcess(pid)

            if isinstance(process_status, dict):
                self._rpc_errors_per_pid[pid] = 0
                status_indicator = process_status.get('processing_status',
                                                      None)

                if status_indicator == definitions.STATUS_INDICATOR_COMPLETED:
                    self._completed_analysis_processes.add(pid)

            else:
                rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1
                self._rpc_errors_per_pid[pid] = rpc_errors

                if rpc_errors > self._MAXIMUM_RPC_ERRORS:
                    process_is_alive = False

                if process_is_alive:
                    rpc_port = process.rpc_port.value
                    logger.warning((
                        'Unable to retrieve process: {0:s} (PID: {1:d}) status via '
                        'RPC socket: http://localhost:{2:d}').format(
                            process.name, pid, rpc_port))

                    processing_status_string = 'RPC error'
                    status_indicator = definitions.STATUS_INDICATOR_RUNNING
                else:
                    processing_status_string = 'killed'
                    status_indicator = definitions.STATUS_INDICATOR_KILLED

                process_status = {
                    'processing_status': processing_status_string
                }

        self._UpdateProcessingStatus(pid, process_status, used_memory)

        if status_indicator in definitions.ERROR_STATUS_INDICATORS:
            logger.error(
                ('Process {0:s} (PID: {1:d}) is not functioning correctly. '
                 'Status code: {2!s}.').format(process.name, pid,
                                               status_indicator))

            self._TerminateProcessByPid(pid)
Exemplo n.º 12
0
    def _Main(self):
        """The main loop."""
        # We need a resolver context per process to prevent multi processing
        # issues with file objects stored in images.
        self._resolver_context = context.Context()

        for credential_configuration in self._processing_configuration.credentials:
            resolver.Resolver.key_chain.SetCredential(
                credential_configuration.path_spec,
                credential_configuration.credential_type,
                credential_configuration.credential_data)

        self._parser_mediator = self._CreateParserMediator(
            self._knowledge_base, self._resolver_context,
            self._processing_configuration)

        # We need to initialize the parser and hasher objects after the process
        # has forked otherwise on Windows the "fork" will fail with
        # a PickleError for Python modules that cannot be pickled.
        self._extraction_worker = worker.EventExtractionWorker(
            parser_filter_expression=(
                self._processing_configuration.parser_filter_expression))

        self._extraction_worker.SetExtractionConfiguration(
            self._processing_configuration.extraction)

        self._parser_mediator.StartProfiling(
            self._processing_configuration.profiling, self._name,
            self._process_information)
        self._StartProfiling(self._processing_configuration.profiling)

        if self._analyzers_profiler:
            self._extraction_worker.SetAnalyzersProfiler(
                self._analyzers_profiler)

        if self._processing_profiler:
            self._extraction_worker.SetProcessingProfiler(
                self._processing_profiler)

        logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format(
            self._name, self._pid))

        self._status = definitions.STATUS_INDICATOR_RUNNING

        try:
            logger.debug(
                '{0!s} (PID: {1:d}) started monitoring task queue.'.format(
                    self._name, self._pid))

            while not self._abort:
                try:
                    task = self._task_queue.PopItem()
                except (errors.QueueClose, errors.QueueEmpty) as exception:
                    logger.debug(
                        'ConsumeItems exiting with exception: {0!s}.'.format(
                            type(exception)))
                    break

                if isinstance(task, plaso_queue.QueueAbort):
                    logger.debug(
                        'ConsumeItems exiting, dequeued QueueAbort object.')
                    break

                self._ProcessTask(task)

            logger.debug(
                '{0!s} (PID: {1:d}) stopped monitoring task queue.'.format(
                    self._name, self._pid))

        # All exceptions need to be caught here to prevent the process
        # from being killed by an uncaught exception.
        except Exception as exception:  # pylint: disable=broad-except
            logger.warning(
                'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
                    self._name, self._pid))
            logger.exception(exception)

            self._abort = True

        if self._analyzers_profiler:
            self._extraction_worker.SetAnalyzersProfiler(None)

        if self._processing_profiler:
            self._extraction_worker.SetProcessingProfiler(None)

        self._StopProfiling()
        self._parser_mediator.StopProfiling()

        self._extraction_worker = None
        self._file_system_cache = []
        self._parser_mediator = None
        self._resolver_context = None

        if self._abort:
            self._status = definitions.STATUS_INDICATOR_ABORTED
        else:
            self._status = definitions.STATUS_INDICATOR_COMPLETED

        logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format(
            self._name, self._pid))

        try:
            self._task_queue.Close(abort=self._abort)
        except errors.QueueAlreadyClosed:
            logger.error('Queue for {0:s} was already closed.'.format(
                self.name))
Exemplo n.º 13
0
  def _CheckStatusWorkerProcess(self, pid):
    """Checks the status of a worker process.

    If a worker process is not responding the process is terminated and
    a replacement process is started.

    Args:
      pid (int): process ID (PID) of a registered worker process.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
    # TODO: Refactor this method, simplify and separate concerns (monitoring
    # vs management).
    self._RaiseIfNotRegistered(pid)

    process = self._processes_per_pid[pid]

    process_status = self._QueryProcessStatus(process)
    if process_status is None:
      process_is_alive = False
    else:
      process_is_alive = True

    process_information = self._process_information_per_pid[pid]
    used_memory = process_information.GetUsedMemory() or 0

    if self._worker_memory_limit and used_memory > self._worker_memory_limit:
      logger.warning((
          'Process: {0:s} (PID: {1:d}) killed because it exceeded the '
          'memory limit: {2:d}.').format(
              process.name, pid, self._worker_memory_limit))
      self._KillProcess(pid)

    if isinstance(process_status, dict):
      self._rpc_errors_per_pid[pid] = 0
      status_indicator = process_status.get('processing_status', None)

    else:
      rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1
      self._rpc_errors_per_pid[pid] = rpc_errors

      if rpc_errors > self._MAXIMUM_RPC_ERRORS:
        process_is_alive = False

      if process_is_alive:
        rpc_port = process.rpc_port.value
        logger.warning((
            'Unable to retrieve process: {0:s} (PID: {1:d}) status via '
            'RPC socket: http://localhost:{2:d}').format(
                process.name, pid, rpc_port))

        processing_status_string = 'RPC error'
        status_indicator = definitions.STATUS_INDICATOR_RUNNING
      else:
        processing_status_string = 'killed'
        status_indicator = definitions.STATUS_INDICATOR_KILLED

      process_status = {
          'processing_status': processing_status_string}

    self._UpdateProcessingStatus(pid, process_status, used_memory)

    # _UpdateProcessingStatus can also change the status of the worker,
    # So refresh the status if applicable.
    for worker_status in self._processing_status.workers_status:
      if worker_status.pid == pid:
        status_indicator = worker_status.status
        break

    if status_indicator in definitions.ERROR_STATUS_INDICATORS:
      logger.error((
          'Process {0:s} (PID: {1:d}) is not functioning correctly. '
          'Status code: {2!s}.').format(process.name, pid, status_indicator))

      self._TerminateProcessByPid(pid)

      replacement_process = None
      for replacement_process_attempt in range(
          self._MAXIMUM_REPLACEMENT_RETRIES):
        logger.info((
            'Attempt: {0:d} to start replacement worker process for '
            '{1:s}').format(replacement_process_attempt + 1, process.name))

        replacement_process = self._StartWorkerProcess(process.name)
        if replacement_process:
          break

        time.sleep(self._REPLACEMENT_WORKER_RETRY_DELAY)

      if not replacement_process:
        logger.error(
            'Unable to create replacement worker process for: {0:s}'.format(
                process.name))
Exemplo n.º 14
0
    def _Main(self):
        """The main loop."""
        self._StartProfiling(self._processing_configuration.profiling)

        logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format(
            self._name, self._pid))

        # Creating the threading event in the constructor will cause a pickle
        # error on Windows when an analysis process is created.
        self._foreman_status_wait_event = threading.Event()
        self._status = definitions.STATUS_INDICATOR_ANALYZING

        task = tasks.Task()
        task.storage_format = definitions.STORAGE_FORMAT_SQLITE
        # TODO: temporary solution.
        task.identifier = self._analysis_plugin.plugin_name

        self._task = task

        task_storage_writer = self._storage_factory.CreateTaskStorageWriter(
            definitions.STORAGE_FORMAT_SQLITE)

        if self._serializers_profiler:
            task_storage_writer.SetSerializersProfiler(
                self._serializers_profiler)

        if self._storage_profiler:
            task_storage_writer.SetStorageProfiler(self._storage_profiler)

        storage_file_path = self._GetTaskStorageFilePath(
            definitions.STORAGE_FORMAT_SQLITE, task)
        task_storage_writer.Open(path=storage_file_path)

        self._analysis_mediator = self._CreateAnalysisMediator(
            self._session, self._knowledge_base,
            self._processing_configuration, self._data_location)
        self._analysis_mediator.SetStorageWriter(task_storage_writer)

        # TODO: set event_filter_expression in mediator.

        task_storage_writer.AddAttributeContainer(task)

        try:
            logger.debug(
                '{0!s} (PID: {1:d}) started monitoring event queue.'.format(
                    self._name, self._pid))

            while not self._abort:
                try:
                    queued_object = self._event_queue.PopItem()

                except (errors.QueueClose, errors.QueueEmpty) as exception:
                    logger.debug(
                        'ConsumeItems exiting with exception {0!s}.'.format(
                            type(exception)))
                    break

                if isinstance(queued_object, plaso_queue.QueueAbort):
                    logger.debug(
                        'ConsumeItems exiting, dequeued QueueAbort object.')
                    break

                self._ProcessEvent(self._analysis_mediator, *queued_object)

                self._number_of_consumed_events += 1

            logger.debug(
                '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format(
                    self._name, self._pid))

            if not self._abort:
                self._status = definitions.STATUS_INDICATOR_REPORTING

                self._analysis_mediator.ProduceAnalysisReport(
                    self._analysis_plugin)

        # All exceptions need to be caught here to prevent the process
        # from being killed by an uncaught exception.
        except Exception as exception:  # pylint: disable=broad-except
            logger.warning(
                'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
                    self._name, self._pid))
            logger.exception(exception)

            self._abort = True

        finally:
            task.aborted = self._abort
            task_storage_writer.UpdateAttributeContainer(task)

            task_storage_writer.Close()

            if self._serializers_profiler:
                task_storage_writer.SetSerializersProfiler(None)

            if self._storage_profiler:
                task_storage_writer.SetStorageProfiler(None)

        try:
            self._FinalizeTaskStorageWriter(definitions.STORAGE_FORMAT_SQLITE,
                                            task)
        except IOError as exception:
            logger.warning(
                'Unable to finalize task storage with error: {0!s}'.format(
                    exception))

        if self._abort:
            self._status = definitions.STATUS_INDICATOR_ABORTED
        else:
            self._status = definitions.STATUS_INDICATOR_COMPLETED

        logger.debug('Wait for foreman status wait event')
        self._foreman_status_wait_event.clear()
        self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT)

        logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format(
            self._name, self._pid))

        self._StopProfiling()

        self._analysis_mediator = None
        self._foreman_status_wait_event = None
        self._task = None

        try:
            self._event_queue.Close(abort=self._abort)
        except errors.QueueAlreadyClosed:
            logger.error('Queue for {0:s} was already closed.'.format(
                self.name))