Пример #1
0
  def _StopMonitoringProcess(self, process):
    """Stops monitoring a process.

    Args:
      process (MultiProcessBaseProcess): process.

    Raises:
      KeyError: if the process is not monitored.
      ValueError: if the process is missing.
    """
    if process is None:
      raise ValueError('Missing process.')

    pid = process.pid

    self._RaiseIfNotMonitored(pid)

    del self._process_information_per_pid[pid]

    rpc_client = self._rpc_clients_per_pid.get(pid, None)
    if rpc_client:
      rpc_client.Close()
      del self._rpc_clients_per_pid[pid]

    if pid in self._rpc_errors_per_pid:
      del self._rpc_errors_per_pid[pid]

    logger.debug('Stopped monitoring process: {0:s} (PID: {1:d})'.format(
        process.name, pid))
Пример #2
0
    def CreateRetryTask(self):
        """Creates a task that to retry a previously abandoned task.

    Returns:
      Task: a task that was abandoned but should be retried or None if there are
          no abandoned tasks that should be retried.
    """
        with self._lock:
            abandoned_task = self._GetTaskPendingRetry()
            if not abandoned_task:
                return None

            # The abandoned task is kept in _tasks_abandoned so it can be still
            # identified in CheckTaskToMerge and UpdateTaskAsPendingMerge.

            retry_task = abandoned_task.CreateRetryTask()
            logger.debug('Retrying task {0:s} as {1:s}.'.format(
                abandoned_task.identifier, retry_task.identifier))

            self._tasks_queued[retry_task.identifier] = retry_task
            self._total_number_of_tasks += 1

            self.SampleTaskStatus(retry_task, 'created_retry')

            return retry_task
Пример #3
0
    def CreateTask(self,
                   session_identifier,
                   storage_format=definitions.STORAGE_FORMAT_SQLITE):
        """Creates a task.

    Args:
      session_identifier (str): the identifier of the session the task is
          part of.
      storage_format (Optional[str]): the storage format that the task should be
          stored in.

    Returns:
      Task: task attribute container.
    """
        task = tasks.Task(session_identifier)
        task.storage_format = storage_format
        logger.debug('Created task: {0:s}.'.format(task.identifier))

        with self._lock:
            self._tasks_queued[task.identifier] = task
            self._total_number_of_tasks += 1

            self.SampleTaskStatus(task, 'created')

        return task
Пример #4
0
    def _AbandonInactiveProcessingTasks(self):
        """Marks processing tasks that exceed the inactive time as abandoned.

    This method does not lock the manager and should be called by a method
    holding the manager lock.
    """
        if self._tasks_processing:
            inactive_time = time.time() - self._TASK_INACTIVE_TIME
            inactive_time = int(inactive_time *
                                definitions.MICROSECONDS_PER_SECOND)

            # Abandon all tasks after they're identified so as not to modify the
            # dict while iterating over it.
            tasks_to_abandon = []
            for task_identifier, task in self._tasks_processing.items():
                if task.last_processing_time < inactive_time:
                    logger.debug('Abandoned processing task: {0:s}.'.format(
                        task_identifier))

                    self.SampleTaskStatus(task, 'abandoned_processing')
                    tasks_to_abandon.append((task_identifier, task))

            for task_identifier, task in tasks_to_abandon:
                self._tasks_abandoned[task_identifier] = task
                del self._tasks_processing[task_identifier]
Пример #5
0
    def _StopAnalysisProcesses(self, abort=False):
        """Stops the analysis processes.

    Args:
      abort (bool): True to indicated the stop is issued on abort.
    """
        logger.debug('Stopping analysis processes.')
        self._StopMonitoringProcesses()

        if abort:
            # Signal all the processes to abort.
            self._AbortTerminate()

        # Wake the processes to make sure that they are not blocking
        # waiting for the queue new items.
        for event_queue in self._event_queues.values():
            event_queue.PushItem(plaso_queue.QueueAbort(), block=False)

        # Try waiting for the processes to exit normally.
        self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
        for event_queue in self._event_queues.values():
            event_queue.Close(abort=abort)

        if abort:
            # Kill any remaining processes.
            self._AbortKill()
        else:
            # Check if the processes are still alive and terminate them if necessary.
            self._AbortTerminate()
            self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)

            for event_queue in self._event_queues.values():
                event_queue.Close(abort=True)
Пример #6
0
    def _StartWorkerProcess(self, process_name):
        """Creates, starts, monitors and registers a worker process.

    Args:
      process_name (str): process name.

    Returns:
      MultiProcessWorkerProcess: extraction worker process or None if the
          process could not be started.
    """
        logger.debug('Starting worker process {0:s}'.format(process_name))

        queue_name = '{0:s} task queue'.format(process_name)
        task_queue = zeromq_queue.ZeroMQRequestConnectQueue(
            delay_open=True,
            linger_seconds=0,
            name=queue_name,
            port=self._task_queue_port,
            timeout_seconds=self._TASK_QUEUE_TIMEOUT_SECONDS)

        process = extraction_process.ExtractionWorkerProcess(
            task_queue,
            self.collection_filters_helper,
            self.knowledge_base,
            self._processing_configuration,
            enable_sigsegv_handler=self._enable_sigsegv_handler,
            name=process_name)

        # Remove all possible log handlers to prevent a child process from logging
        # to the main process log file and garbling the log. The log handlers are
        # recreated after the worker process has been started.
        for handler in logging.root.handlers:
            logging.root.removeHandler(handler)
            handler.close()

        process.start()

        loggers.ConfigureLogging(debug_output=self._debug_output,
                                 filename=self._log_filename,
                                 mode='a',
                                 quiet_mode=self._quiet_mode)

        try:
            self._StartMonitoringProcess(process)

        except (IOError, KeyError) as exception:
            pid = process.pid
            logger.error(
                ('Unable to monitor replacement worker process: {0:s} '
                 '(PID: {1:d}) with error: {2!s}').format(
                     process_name, pid, exception))

            self._TerminateProcess(process)
            return None

        self._RegisterProcess(process)

        self._last_worker_number += 1

        return process
Пример #7
0
    def SignalAbort(self):
        """Signals the process to abort."""
        self._abort = True

        if self._foreman_status_wait_event:
            logger.debug('Abort foreman status wait event')
            self._foreman_status_wait_event.set()

        if self._analysis_mediator:
            self._analysis_mediator.SignalAbort()
Пример #8
0
  def _GetStatus(self):
    """Retrieves status information.

    Returns:
      dict[str, object]: status attributes, indexed by name.
    """
    logger.debug('Status update requested')

    if self._analysis_mediator:
      number_of_produced_event_tags = (
          self._analysis_mediator.number_of_produced_event_tags)
      number_of_produced_reports = (
          self._analysis_mediator.number_of_produced_analysis_reports)
    else:
      number_of_produced_event_tags = None
      number_of_produced_reports = None

    if self._process_information:
      used_memory = self._process_information.GetUsedMemory() or 0
    else:
      used_memory = 0

    if self._memory_profiler:
      self._memory_profiler.Sample('main', used_memory)

    # XML RPC does not support integer values > 2 GiB so we format them
    # as a string.
    used_memory = '{0:d}'.format(used_memory)

    status = {
        'display_name': '',
        'identifier': self._name,
        'number_of_consumed_analysis_warnings': None,
        'number_of_consumed_event_tags': None,
        'number_of_consumed_events': self._number_of_consumed_events,
        'number_of_consumed_reports': None,
        'number_of_consumed_sources': None,
        'number_of_produced_analysis_warnings': None,
        'number_of_produced_event_tags': number_of_produced_event_tags,
        'number_of_produced_events': None,
        'number_of_produced_reports': number_of_produced_reports,
        'number_of_produced_sources': None,
        'processing_status': self._status,
        'task_identifier': None,
        'used_memory': used_memory}

    if self._status in (
        definitions.STATUS_INDICATOR_ABORTED,
        definitions.STATUS_INDICATOR_COMPLETED):
      logger.debug('Set foreman status wait event')
      self._foreman_status_wait_event.set()

    return status
Пример #9
0
  def _AbortJoin(self, timeout=None):
    """Aborts all registered processes by joining with the parent process.

    Args:
      timeout (int): number of seconds to wait for processes to join, where
          None represents no timeout.
    """
    for pid, process in self._processes_per_pid.items():
      logger.debug('Waiting for process: {0:s} (PID: {1:d}).'.format(
          process.name, pid))
      process.join(timeout=timeout)
      if not process.is_alive():
        logger.debug('Process {0:s} (PID: {1:d}) stopped.'.format(
            process.name, pid))
Пример #10
0
    def HasPendingTasks(self):
        """Determines if there are tasks running or in need of retrying.

    Returns:
      bool: True if there are tasks that are active, ready to be merged or
          need to be retried.
    """
        logger.debug('Checking for pending tasks')
        with self._lock:
            self._AbandonInactiveProcessingTasks()

            if self._tasks_processing:
                return True

            # There are no tasks being processed, but we might be
            # waiting for some tasks to be merged.
            if self._HasTasksPendingMerge():
                return True

            # There are no tasks processing or pending merge, but there may
            # still be some waiting to be retried, so we check that.
            if self._HasTasksPendingRetry():
                return True

            # It is possible that a worker has processed a task and the foreman has
            # not been informed about it, since there is no feedback from the worker
            # when it pops a task from the queue.

            # If we believe all the workers are idle for longer than the task
            # inactive time (timeout) abandon all queued tasks. This ensures
            # that processing actually stops when the foreman never gets an
            # update from a worker.

            if self._tasks_queued:
                inactive_time = time.time() - self._TASK_INACTIVE_TIME
                inactive_time = int(inactive_time *
                                    definitions.MICROSECONDS_PER_SECOND)

                if self._latest_task_processing_time < inactive_time:
                    self._AbandonQueuedTasks()

            if self._tasks_queued:
                return True

            if self._tasks_merging:
                return True

        # There are no tasks pending any work.
        return False
Пример #11
0
  def _StopProcessStatusRPCServer(self):
    """Stops the process status RPC server."""
    if not self._rpc_server:
      return

    # Make sure the engine gets one more status update so it knows
    # the worker has completed.
    self._WaitForStatusNotRunning()

    self._rpc_server.Stop()
    self._rpc_server = None
    self.rpc_port.value = 0

    logger.debug(
        'Process: {0!s} process status RPC server stopped'.format(self._name))
Пример #12
0
    def _RunProcess(self):
        """Runs the process."""
        # Prevent the KeyboardInterrupt being raised inside the process.
        # This will prevent a process from generating a traceback when interrupted.
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        # A SIGTERM signal handler is necessary to make sure IPC is cleaned up
        # correctly on terminate.
        signal.signal(signal.SIGTERM, self._SigTermHandler)

        # A SIGSEGV signal handler is necessary to try to indicate where
        # worker failed.
        # WARNING the SIGSEGV handler will deadlock the process on a real segfault.
        if self._enable_sigsegv_handler:
            self._original_sigsegv_handler = signal.signal(
                signal.SIGSEGV, self._SigSegvHandler)

        self._pid = os.getpid()
        self._process_information = process_info.ProcessInfo(self._pid)

        # We need to set the is running status explicitly to True in case
        # the process completes before the engine is able to determine
        # the status of the process, such as in the unit tests.
        self._status_is_running = True

        # Logging needs to be configured before the first output otherwise we
        # mess up the logging of the parent process.
        loggers.ConfigureLogging(debug_output=self._debug_output,
                                 filename=self._log_filename,
                                 quiet_mode=self._quiet_mode)

        logger.debug('Process: {0!s} (PID: {1:d}) started'.format(
            self._name, self._pid))

        self._StartProcessStatusRPCServer()

        self._Main()

        self._StopProcessStatusRPCServer()

        logger.debug('Process: {0!s} (PID: {1:d}) stopped'.format(
            self._name, self._pid))

        # Make sure log files are cleanly closed.
        logging.shutdown()

        self._status_is_running = False
Пример #13
0
    def UpdateTaskAsProcessingByIdentifier(self, task_identifier):
        """Updates the task manager to reflect the task is processing.

    Args:
      task_identifier (str): unique identifier of the task.

    Raises:
      KeyError: if the task is not known to the task manager.
    """
        with self._lock:
            task_processing = self._tasks_processing.get(task_identifier, None)
            if task_processing:
                task_processing.UpdateProcessingTime()
                self._UpdateLatestProcessingTime(task_processing)
                return

            task_queued = self._tasks_queued.get(task_identifier, None)
            if task_queued:
                logger.debug('Task {0:s} was queued, now processing.'.format(
                    task_identifier))
                self._tasks_processing[task_identifier] = task_queued
                del self._tasks_queued[task_identifier]

                task_queued.UpdateProcessingTime()
                self._UpdateLatestProcessingTime(task_queued)
                return

            task_abandoned = self._tasks_abandoned.get(task_identifier, None)
            if task_abandoned:
                del self._tasks_abandoned[task_identifier]
                self._tasks_processing[task_identifier] = task_abandoned
                logger.debug(
                    'Task {0:s} was abandoned, but now processing.'.format(
                        task_identifier))

                task_abandoned.UpdateProcessingTime()
                self._UpdateLatestProcessingTime(task_abandoned)
                return

            if task_identifier in self._tasks_pending_merge:
                # No need to update the processing time, as this task is already
                # finished processing and is just waiting for merge.
                return

        # If we get here, we don't know what state the tasks is in, so raise.
        raise KeyError(
            'Status of task {0:s} is unknown.'.format(task_identifier))
Пример #14
0
    def _AbandonQueuedTasks(self):
        """Marks queued tasks abandoned.

    This method does not lock the manager and should be called by a method
    holding the manager lock.
    """
        # Abandon all tasks after they're identified so as not to modify the
        # dict while iterating over it.
        tasks_to_abandon = []
        for task_identifier, task in self._tasks_queued.items():
            logger.debug(
                'Abandoned queued task: {0:s}.'.format(task_identifier))
            tasks_to_abandon.append((task_identifier, task))

        for task_identifier, task in tasks_to_abandon:
            self._tasks_abandoned[task_identifier] = task
            del self._tasks_queued[task_identifier]
Пример #15
0
    def _FillEventSourceHeap(self,
                             storage_writer,
                             event_source_heap,
                             start_with_first=False):
        """Fills the event source heap with the available written event sources.

    Args:
      storage_writer (StorageWriter): storage writer for a session storage.
      event_source_heap (_EventSourceHeap): event source heap.
      start_with_first (Optional[bool]): True if the function should start
          with the first written event source.
    """
        if self._processing_profiler:
            self._processing_profiler.StartTiming('fill_event_source_heap')

        if self._processing_profiler:
            self._processing_profiler.StartTiming('get_event_source')

        if start_with_first:
            event_source = storage_writer.GetFirstWrittenEventSource()
        else:
            event_source = storage_writer.GetNextWrittenEventSource()

        if self._processing_profiler:
            self._processing_profiler.StopTiming('get_event_source')

        while event_source:
            event_source_heap.PushEventSource(event_source)
            if event_source_heap.IsFull():
                logger.debug('Event source heap is full.')
                break

            if self._processing_profiler:
                self._processing_profiler.StartTiming('get_event_source')

            event_source = storage_writer.GetNextWrittenEventSource()

            if self._processing_profiler:
                self._processing_profiler.StopTiming('get_event_source')

        if self._processing_profiler:
            self._processing_profiler.StopTiming('fill_event_source_heap')
Пример #16
0
    def CompleteTask(self, task):
        """Completes a task.

    The task is complete and can be removed from the task manager.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not merging.
    """
        with self._lock:
            if task.identifier not in self._tasks_merging:
                raise KeyError('Task {0:s} was not merging.'.format(
                    task.identifier))

            self.SampleTaskStatus(task, 'completed')

            del self._tasks_merging[task.identifier]

            logger.debug('Completed task {0:s}.'.format(task.identifier))
Пример #17
0
    def _StopExtractionProcesses(self, abort=False):
        """Stops the extraction processes.

    Args:
      abort (bool): True to indicated the stop is issued on abort.
    """
        logger.debug('Stopping extraction processes.')
        self._StopMonitoringProcesses()

        if abort:
            # Signal all the processes to abort.
            self._AbortTerminate()

        logger.debug('Emptying task queue.')
        self._task_queue.Empty()

        # Wake the processes to make sure that they are not blocking
        # waiting for the queue new items.
        for _ in self._processes_per_pid:
            try:
                self._task_queue.PushItem(plaso_queue.QueueAbort(),
                                          block=False)
            except errors.QueueFull:
                logger.warning(
                    'Task queue full, unable to push abort message.')

        # Try waiting for the processes to exit normally.
        self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
        self._task_queue.Close(abort=abort)

        if not abort:
            # Check if the processes are still alive and terminate them if necessary.
            self._AbortTerminate()
            self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
            self._task_queue.Close(abort=True)

        # Kill any lingering processes.
        self._AbortKill()
Пример #18
0
    def RemoveTask(self, task):
        """Removes an abandoned task.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not abandoned or the task was abandoned and
          was not retried.
    """
        with self._lock:
            if task.identifier not in self._tasks_abandoned:
                raise KeyError('Task {0:s} was not abandoned.'.format(
                    task.identifier))

            if not task.has_retry:
                raise KeyError(
                    'Will not remove a task {0:s} without retry task.'.format(
                        task.identifier))

            del self._tasks_abandoned[task.identifier]

            logger.debug('Removed task {0:s}.'.format(task.identifier))
Пример #19
0
    def _StartProcessStatusRPCServer(self):
        """Starts the process status RPC server."""
        if self._rpc_server:
            return

        self._rpc_server = plaso_xmlrpc.XMLProcessStatusRPCServer(
            self._GetStatus)

        hostname = 'localhost'

        # Try the PID as port number first otherwise pick something random
        # between 1024 and 60000.
        if self._pid < 1024 or self._pid > 60000:
            port = random.randint(1024, 60000)
        else:
            port = self._pid

        if not self._rpc_server.Start(hostname, port):
            port = 0
            for _ in range(self._NUMBER_OF_RPC_SERVER_START_ATTEMPTS):
                port = random.randint(1024, 60000)
                if self._rpc_server.Start(hostname, port):
                    break

                port = 0

        if not port:
            logger.error(
                ('Unable to start a process status RPC server for {0!s} '
                 '(PID: {1:d})').format(self._name, self._pid))
            self._rpc_server = None
            return

        self.rpc_port.value = port

        logger.debug('Process: {0!s} process status RPC server started'.format(
            self._name))
Пример #20
0
    def UpdateTaskAsPendingMerge(self, task):
        """Updates the task manager to reflect that the task is ready to be merged.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not queued, processing or abandoned, or
          the task was abandoned and has a retry task.
    """
        with self._lock:
            is_abandoned = task.identifier in self._tasks_abandoned
            is_processing = task.identifier in self._tasks_processing
            is_queued = task.identifier in self._tasks_queued

            if not is_queued and not is_processing and not is_abandoned:
                raise KeyError('Status of task {0:s} is unknown.'.format(
                    task.identifier))

            if is_abandoned and task.has_retry:
                raise KeyError(
                    'Will not merge a task {0:s} with retry task.'.format(
                        task.identifier))

            if is_queued:
                logger.debug('Task {0:s} was queued, now merging.'.format(
                    task.identifier))
                del self._tasks_queued[task.identifier]

            if is_processing:
                logger.debug('Task {0:s} was processing, now merging.'.format(
                    task.identifier))
                del self._tasks_processing[task.identifier]

            if is_abandoned:
                logger.debug('Task {0:s} was abandoned, now merging.'.format(
                    task.identifier))
                del self._tasks_abandoned[task.identifier]

            self._tasks_pending_merge.PushTask(task)

            self.SampleTaskStatus(task, 'pending_merge')

            task.UpdateProcessingTime()
            self._UpdateLatestProcessingTime(task)
Пример #21
0
    def _AnalyzeEvents(self,
                       storage_writer,
                       analysis_plugins,
                       event_filter=None):
        """Analyzes events in a Plaso storage.

    Args:
      storage_writer (StorageWriter): storage writer.
      analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that
          should be run and their names.
      event_filter (Optional[EventObjectFilter]): event filter.

    Returns:
      collections.Counter: counter containing information about the events
          processed and filtered.

    Raises:
      RuntimeError: if a non-recoverable situation is encountered.
    """
        self._status = definitions.STATUS_INDICATOR_RUNNING
        self._number_of_consumed_analysis_reports = 0
        self._number_of_consumed_events = 0
        self._number_of_consumed_event_tags = 0
        self._number_of_consumed_sources = 0
        self._number_of_produced_analysis_reports = 0
        self._number_of_produced_events = 0
        self._number_of_produced_event_tags = 0
        self._number_of_produced_sources = 0

        number_of_filtered_events = 0

        logger.debug('Processing events.')

        filter_limit = getattr(event_filter, 'limit', None)

        for event in storage_writer.GetSortedEvents():
            event_data_identifier = event.GetEventDataIdentifier()
            event_data = storage_writer.GetAttributeContainerByIdentifier(
                events.EventData.CONTAINER_TYPE, event_data_identifier)

            event_data_stream_identifier = event_data.GetEventDataStreamIdentifier(
            )
            if event_data_stream_identifier:
                event_data_stream = storage_writer.GetAttributeContainerByIdentifier(
                    events.EventDataStream.CONTAINER_TYPE,
                    event_data_stream_identifier)
            else:
                event_data_stream = None

            event_identifier = event.GetIdentifier()
            event_tag = self._event_tag_index.GetEventTagByIdentifier(
                storage_writer, event_identifier)

            if event_filter:
                filter_match = event_filter.Match(event, event_data,
                                                  event_data_stream, event_tag)
            else:
                filter_match = None

            # pylint: disable=singleton-comparison
            if filter_match == False:
                number_of_filtered_events += 1
                continue

            for event_queue in self._event_queues.values():
                # TODO: Check for premature exit of analysis plugins.
                event_queue.PushItem((event, event_data, event_data_stream))

            self._number_of_consumed_events += 1

            if (event_filter and filter_limit
                    and filter_limit == self._number_of_consumed_events):
                break

        logger.debug('Finished pushing events to analysis plugins.')
        # Signal that we have finished adding events.
        for event_queue in self._event_queues.values():
            event_queue.PushItem(plaso_queue.QueueAbort(), block=False)

        logger.debug('Processing analysis plugin results.')

        # TODO: use a task based approach.
        plugin_names = list(analysis_plugins.keys())
        while plugin_names:
            for plugin_name in list(plugin_names):
                if self._abort:
                    break

                # TODO: temporary solution.
                task = tasks.Task()
                task.storage_format = definitions.STORAGE_FORMAT_SQLITE
                task.identifier = plugin_name

                merge_ready = self._CheckTaskReadyForMerge(
                    definitions.STORAGE_FORMAT_SQLITE, task)
                if merge_ready:
                    self._PrepareMergeTaskStorage(
                        definitions.STORAGE_FORMAT_SQLITE, task)
                    self._status = definitions.STATUS_INDICATOR_MERGING

                    event_queue = self._event_queues[plugin_name]
                    del self._event_queues[plugin_name]

                    event_queue.Close()

                    task_storage_reader = self._GetMergeTaskStorage(
                        definitions.STORAGE_FORMAT_SQLITE, task)

                    try:
                        merge_helper = merge_helpers.AnalysisTaskMergeHelper(
                            task_storage_reader, task.identifier)

                        logger.debug('Starting merge of task: {0:s}'.format(
                            merge_helper.task_identifier))

                        number_of_containers = self._MergeAttributeContainers(
                            storage_writer, merge_helper)

                        logger.debug(
                            'Merged {0:d} containers of task: {1:s}'.format(
                                number_of_containers,
                                merge_helper.task_identifier))

                    finally:
                        task_storage_reader.Close()

                    self._RemoveMergeTaskStorage(
                        definitions.STORAGE_FORMAT_SQLITE, task)

                    self._status = definitions.STATUS_INDICATOR_RUNNING

                    # TODO: temporary solution.
                    plugin_names.remove(plugin_name)

        events_counter = collections.Counter()
        events_counter['Events filtered'] = number_of_filtered_events
        events_counter['Events processed'] = self._number_of_consumed_events

        return events_counter
Пример #22
0
    def AnalyzeEvents(self,
                      session,
                      knowledge_base_object,
                      storage_writer,
                      data_location,
                      analysis_plugins,
                      processing_configuration,
                      event_filter=None,
                      event_filter_expression=None,
                      status_update_callback=None,
                      storage_file_path=None):
        """Analyzes events in a Plaso storage.

    Args:
      session (Session): session in which the events are analyzed.
      knowledge_base_object (KnowledgeBase): contains information from
          the source data needed for processing.
      storage_writer (StorageWriter): storage writer.
      data_location (str): path to the location that data files should
          be loaded from.
      analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that
          should be run and their names.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      event_filter (Optional[EventObjectFilter]): event filter.
      event_filter_expression (Optional[str]): event filter expression.
      status_update_callback (Optional[function]): callback function for status
          updates.
      storage_file_path (Optional[str]): path to the session storage file.

    Returns:
      ProcessingStatus: processing status.

    Raises:
      KeyboardInterrupt: if a keyboard interrupt was raised.
      ValueError: if analysis plugins are missing.
    """
        if not analysis_plugins:
            raise ValueError('Missing analysis plugins')

        abort_kill = False
        keyboard_interrupt = False
        queue_full = False

        self._analysis_plugins = {}
        self._data_location = data_location
        self._event_filter_expression = event_filter_expression
        self._events_status = processing_status.EventsStatus()
        self._knowledge_base = knowledge_base_object
        self._processing_configuration = processing_configuration
        self._session = session
        self._status_update_callback = status_update_callback
        self._storage_file_path = storage_file_path

        stored_event_labels_counter = {}
        if storage_writer.HasAttributeContainers('event_label_count'):
            stored_event_labels_counter = {
                event_label_count.label: event_label_count
                for event_label_count in storage_writer.GetAttributeContainers(
                    'event_label_count')
            }

        self._event_labels_counter = collections.Counter()

        if storage_writer.HasAttributeContainers('parser_count'):
            parsers_counter = {
                parser_count.name: parser_count.number_of_events
                for parser_count in storage_writer.GetAttributeContainers(
                    'parser_count')
            }

            total_number_of_events = parsers_counter['total']

        else:
            total_number_of_events = 0
            for stored_session in storage_writer.GetSessions():
                total_number_of_events += stored_session.parsers_counter[
                    'total']

        self._events_status.total_number_of_events = total_number_of_events

        # Set up the storage writer before the analysis processes.
        self._StartTaskStorage(definitions.STORAGE_FORMAT_SQLITE)

        self._StartAnalysisProcesses(analysis_plugins)

        self._StartProfiling(self._processing_configuration.profiling)

        # Start the status update thread after open of the storage writer
        # so we don't have to clean up the thread if the open fails.
        self._StartStatusUpdateThread()

        try:
            self._AnalyzeEvents(storage_writer,
                                analysis_plugins,
                                event_filter=event_filter)

            for key, value in self._event_labels_counter.items():
                event_label_count = stored_event_labels_counter.get(key, None)
                if event_label_count:
                    event_label_count.number_of_events += value
                    storage_writer.UpdateAttributeContainer(event_label_count)
                else:
                    event_label_count = counts.EventLabelCount(
                        label=key, number_of_events=value)
                    storage_writer.AddAttributeContainer(event_label_count)

            self._status = definitions.STATUS_INDICATOR_FINALIZING

        except errors.QueueFull:
            queue_full = True
            self._abort = True

        except KeyboardInterrupt:
            keyboard_interrupt = True
            self._abort = True

        finally:
            self._processing_status.aborted = self._abort
            session.aborted = self._abort

            # Stop the status update thread after close of the storage writer
            # so we include the storage sync to disk in the status updates.
            self._StopStatusUpdateThread()

            self._StopProfiling()

        # Update the status view one last time before the analysis processses are
        # stopped.
        self._UpdateStatus()

        if queue_full:
            # TODO: handle abort on queue full more elegant.
            abort_kill = True
        else:
            try:
                self._StopAnalysisProcesses(abort=self._abort)

            except KeyboardInterrupt:
                keyboard_interrupt = True
                abort_kill = True

        if abort_kill:
            self._AbortKill()

            # The abort can leave the main process unresponsive
            # due to incorrectly finalized IPC.
            self._KillProcess(os.getpid())

        try:
            self._StopTaskStorage(definitions.STORAGE_FORMAT_SQLITE,
                                  session.identifier,
                                  abort=self._abort)
        except (IOError, OSError) as exception:
            logger.error(
                'Unable to stop task storage with error: {0!s}'.format(
                    exception))

        if self._abort:
            logger.debug('Analysis aborted.')
        else:
            logger.debug('Analysis completed.')

        # Update the status view one last time.
        self._UpdateStatus()

        # Reset values.
        self._analysis_plugins = {}
        self._data_location = None
        self._event_filter_expression = None
        self._knowledge_base = None
        self._processing_configuration = None
        self._session = None
        self._status_update_callback = None
        self._storage_file_path = None

        if keyboard_interrupt:
            raise KeyboardInterrupt

        return self._processing_status
Пример #23
0
    def _Main(self):
        """The main loop."""
        self._StartProfiling(self._processing_configuration.profiling)

        logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format(
            self._name, self._pid))

        # Creating the threading event in the constructor will cause a pickle
        # error on Windows when an analysis process is created.
        self._foreman_status_wait_event = threading.Event()
        self._status = definitions.STATUS_INDICATOR_ANALYZING

        task = tasks.Task()
        task.storage_format = definitions.STORAGE_FORMAT_SQLITE
        # TODO: temporary solution.
        task.identifier = self._analysis_plugin.plugin_name

        self._task = task

        task_storage_writer = self._storage_factory.CreateTaskStorageWriter(
            definitions.STORAGE_FORMAT_SQLITE)

        if self._serializers_profiler:
            task_storage_writer.SetSerializersProfiler(
                self._serializers_profiler)

        if self._storage_profiler:
            task_storage_writer.SetStorageProfiler(self._storage_profiler)

        storage_file_path = self._GetTaskStorageFilePath(
            definitions.STORAGE_FORMAT_SQLITE, task)
        task_storage_writer.Open(path=storage_file_path)

        self._analysis_mediator = self._CreateAnalysisMediator(
            self._session, self._knowledge_base,
            self._processing_configuration, self._data_location)
        self._analysis_mediator.SetStorageWriter(task_storage_writer)

        # TODO: set event_filter_expression in mediator.

        task_storage_writer.AddAttributeContainer(task)

        try:
            logger.debug(
                '{0!s} (PID: {1:d}) started monitoring event queue.'.format(
                    self._name, self._pid))

            while not self._abort:
                try:
                    queued_object = self._event_queue.PopItem()

                except (errors.QueueClose, errors.QueueEmpty) as exception:
                    logger.debug(
                        'ConsumeItems exiting with exception {0!s}.'.format(
                            type(exception)))
                    break

                if isinstance(queued_object, plaso_queue.QueueAbort):
                    logger.debug(
                        'ConsumeItems exiting, dequeued QueueAbort object.')
                    break

                self._ProcessEvent(self._analysis_mediator, *queued_object)

                self._number_of_consumed_events += 1

            logger.debug(
                '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format(
                    self._name, self._pid))

            if not self._abort:
                self._status = definitions.STATUS_INDICATOR_REPORTING

                self._analysis_mediator.ProduceAnalysisReport(
                    self._analysis_plugin)

        # All exceptions need to be caught here to prevent the process
        # from being killed by an uncaught exception.
        except Exception as exception:  # pylint: disable=broad-except
            logger.warning(
                'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
                    self._name, self._pid))
            logger.exception(exception)

            self._abort = True

        finally:
            task.aborted = self._abort
            task_storage_writer.UpdateAttributeContainer(task)

            task_storage_writer.Close()

            if self._serializers_profiler:
                task_storage_writer.SetSerializersProfiler(None)

            if self._storage_profiler:
                task_storage_writer.SetStorageProfiler(None)

        try:
            self._FinalizeTaskStorageWriter(definitions.STORAGE_FORMAT_SQLITE,
                                            task)
        except IOError as exception:
            logger.warning(
                'Unable to finalize task storage with error: {0!s}'.format(
                    exception))

        if self._abort:
            self._status = definitions.STATUS_INDICATOR_ABORTED
        else:
            self._status = definitions.STATUS_INDICATOR_COMPLETED

        logger.debug('Wait for foreman status wait event')
        self._foreman_status_wait_event.clear()
        self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT)

        logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format(
            self._name, self._pid))

        self._StopProfiling()

        self._analysis_mediator = None
        self._foreman_status_wait_event = None
        self._task = None

        try:
            self._event_queue.Close(abort=self._abort)
        except errors.QueueAlreadyClosed:
            logger.error('Queue for {0:s} was already closed.'.format(
                self.name))
Пример #24
0
    def ProcessSources(self,
                       source_configurations,
                       storage_writer,
                       session_identifier,
                       processing_configuration,
                       enable_sigsegv_handler=False,
                       status_update_callback=None,
                       storage_file_path=None):
        """Processes the sources and extract events.

    Args:
      source_configurations (list[SourceConfigurationArtifact]): configurations
          of the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      session_identifier (str): the identifier of the session the tasks are
          part of.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler
          should be enabled.
      status_update_callback (Optional[function]): callback function for status
          updates.
      storage_file_path (Optional[str]): path to the session storage file.

    Returns:
      ProcessingStatus: processing status.
    """
        self._enable_sigsegv_handler = enable_sigsegv_handler

        # Keep track of certain values so we can spawn new extraction workers.
        self._processing_configuration = processing_configuration

        self._debug_output = processing_configuration.debug_output
        self._log_filename = processing_configuration.log_filename
        self._status_update_callback = status_update_callback
        self._storage_file_path = storage_file_path
        self._storage_writer = storage_writer
        self._task_storage_format = processing_configuration.task_storage_format

        # Set up the task queue.
        task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
            delay_open=True,
            linger_seconds=0,
            maximum_items=1,
            name='main_task_queue',
            timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS)
        self._task_queue = task_outbound_queue

        # The ZeroMQ backed queue must be started first, so we can save its port.
        # TODO: raises: attribute-defined-outside-init
        # self._task_queue.name = 'Task queue'
        self._task_queue.Open()
        self._task_queue_port = self._task_queue.port

        # Set up the task storage before the worker processes.
        self._StartTaskStorage(self._task_storage_format)

        for worker_number in range(self._number_of_worker_processes):
            process_name = 'Worker_{0:02d}'.format(self._last_worker_number)
            worker_process = self._StartWorkerProcess(process_name)
            if not worker_process:
                logger.error('Unable to create worker process: {0:d}'.format(
                    worker_number))

        self._StartProfiling(self._processing_configuration.profiling)
        self._task_manager.StartProfiling(
            self._processing_configuration.profiling, self._name)

        if self._serializers_profiler:
            storage_writer.SetSerializersProfiler(self._serializers_profiler)

        if self._storage_profiler:
            storage_writer.SetStorageProfiler(self._storage_profiler)

        self._StartStatusUpdateThread()

        self._parsers_counter = collections.Counter({
            parser_count.name: parser_count
            for parser_count in self._storage_writer.GetAttributeContainers(
                'parser_count')
        })

        try:
            self._ProcessSources(source_configurations, storage_writer,
                                 session_identifier)

        finally:
            # Stop the status update thread after close of the storage writer
            # so we include the storage sync to disk in the status updates.
            self._StopStatusUpdateThread()

            if self._serializers_profiler:
                storage_writer.SetSerializersProfiler(None)

            if self._storage_profiler:
                storage_writer.SetStorageProfiler(None)

            self._task_manager.StopProfiling()
            self._StopProfiling()

        try:
            self._StopExtractionProcesses(abort=self._abort)

        except KeyboardInterrupt:
            self._AbortKill()

            # The abort can leave the main process unresponsive
            # due to incorrectly finalized IPC.
            self._KillProcess(os.getpid())

        # The task queue should be closed by _StopExtractionProcesses, this
        # close is a failsafe.
        self._task_queue.Close(abort=True)

        if self._processing_status.error_path_specs:
            task_storage_abort = True
        else:
            task_storage_abort = self._abort

        try:
            self._StopTaskStorage(self._task_storage_format,
                                  session_identifier,
                                  abort=task_storage_abort)
        except (IOError, OSError) as exception:
            logger.error(
                'Unable to stop task storage with error: {0!s}'.format(
                    exception))

        if self._abort:
            logger.debug('Processing aborted.')
            self._processing_status.aborted = True
        else:
            logger.debug('Processing completed.')

        # Update the status view one last time.
        self._UpdateStatus()

        # Reset values.
        self._enable_sigsegv_handler = None

        self._processing_configuration = None

        self._status_update_callback = None
        self._storage_file_path = None
        self._storage_writer = None
        self._task_storage_format = None

        return self._processing_status
Пример #25
0
    def _UpdateProcessingStatus(self, pid, process_status, used_memory):
        """Updates the processing status.

    Args:
      pid (int): process identifier (PID) of the worker process.
      process_status (dict[str, object]): status values received from
          the worker process.
      used_memory (int): size of used memory in bytes.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
        self._RaiseIfNotRegistered(pid)

        if not process_status:
            return

        process = self._processes_per_pid[pid]

        processing_status = process_status.get('processing_status', None)

        self._RaiseIfNotMonitored(pid)

        display_name = process_status.get('display_name', '')

        number_of_consumed_event_tags = process_status.get(
            'number_of_consumed_event_tags', None)
        number_of_produced_event_tags = process_status.get(
            'number_of_produced_event_tags', None)

        number_of_consumed_events = process_status.get(
            'number_of_consumed_events', None)
        number_of_produced_events = process_status.get(
            'number_of_produced_events', None)

        number_of_consumed_extraction_warnings = process_status.get(
            'number_of_consumed_extraction_warnings', None)
        number_of_produced_extraction_warnings = process_status.get(
            'number_of_produced_extraction_warnings', None)

        number_of_consumed_reports = process_status.get(
            'number_of_consumed_reports', None)
        number_of_produced_reports = process_status.get(
            'number_of_produced_reports', None)

        number_of_consumed_sources = process_status.get(
            'number_of_consumed_sources', None)
        number_of_produced_sources = process_status.get(
            'number_of_produced_sources', None)

        if processing_status != definitions.STATUS_INDICATOR_IDLE:
            last_activity_timestamp = process_status.get(
                'last_activity_timestamp', 0.0)

            if last_activity_timestamp:
                last_activity_timestamp += self._worker_timeout

                current_timestamp = time.time()
                if current_timestamp > last_activity_timestamp:
                    logger.error((
                        'Process {0:s} (PID: {1:d}) has not reported activity within '
                        'the timeout period.').format(process.name, pid))
                    processing_status = definitions.STATUS_INDICATOR_NOT_RESPONDING

        self._processing_status.UpdateWorkerStatus(
            process.name, processing_status, pid, used_memory, display_name,
            number_of_consumed_sources, number_of_produced_sources,
            number_of_consumed_events, number_of_produced_events,
            number_of_consumed_event_tags, number_of_produced_event_tags,
            number_of_consumed_reports, number_of_produced_reports,
            number_of_consumed_extraction_warnings,
            number_of_produced_extraction_warnings)

        task_identifier = process_status.get('task_identifier', '')
        if not task_identifier:
            return

        try:
            self._task_manager.UpdateTaskAsProcessingByIdentifier(
                task_identifier)
            return
        except KeyError:
            logger.debug(
                'Worker {0:s} is processing unknown task: {1:s}.'.format(
                    process.name, task_identifier))
Пример #26
0
    def _Main(self):
        """The main loop."""
        # We need a resolver context per process to prevent multi processing
        # issues with file objects stored in images.
        self._resolver_context = context.Context()

        for credential_configuration in self._processing_configuration.credentials:
            resolver.Resolver.key_chain.SetCredential(
                credential_configuration.path_spec,
                credential_configuration.credential_type,
                credential_configuration.credential_data)

        self._parser_mediator = self._CreateParserMediator(
            self._knowledge_base, self._resolver_context,
            self._processing_configuration)

        # We need to initialize the parser and hasher objects after the process
        # has forked otherwise on Windows the "fork" will fail with
        # a PickleError for Python modules that cannot be pickled.
        self._extraction_worker = worker.EventExtractionWorker(
            parser_filter_expression=(
                self._processing_configuration.parser_filter_expression))

        self._extraction_worker.SetExtractionConfiguration(
            self._processing_configuration.extraction)

        self._parser_mediator.StartProfiling(
            self._processing_configuration.profiling, self._name,
            self._process_information)
        self._StartProfiling(self._processing_configuration.profiling)

        if self._analyzers_profiler:
            self._extraction_worker.SetAnalyzersProfiler(
                self._analyzers_profiler)

        if self._processing_profiler:
            self._extraction_worker.SetProcessingProfiler(
                self._processing_profiler)

        logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format(
            self._name, self._pid))

        self._status = definitions.STATUS_INDICATOR_RUNNING

        try:
            logger.debug(
                '{0!s} (PID: {1:d}) started monitoring task queue.'.format(
                    self._name, self._pid))

            while not self._abort:
                try:
                    task = self._task_queue.PopItem()
                except (errors.QueueClose, errors.QueueEmpty) as exception:
                    logger.debug(
                        'ConsumeItems exiting with exception: {0!s}.'.format(
                            type(exception)))
                    break

                if isinstance(task, plaso_queue.QueueAbort):
                    logger.debug(
                        'ConsumeItems exiting, dequeued QueueAbort object.')
                    break

                self._ProcessTask(task)

            logger.debug(
                '{0!s} (PID: {1:d}) stopped monitoring task queue.'.format(
                    self._name, self._pid))

        # All exceptions need to be caught here to prevent the process
        # from being killed by an uncaught exception.
        except Exception as exception:  # pylint: disable=broad-except
            logger.warning(
                'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
                    self._name, self._pid))
            logger.exception(exception)

            self._abort = True

        if self._analyzers_profiler:
            self._extraction_worker.SetAnalyzersProfiler(None)

        if self._processing_profiler:
            self._extraction_worker.SetProcessingProfiler(None)

        self._StopProfiling()
        self._parser_mediator.StopProfiling()

        self._extraction_worker = None
        self._file_system_cache = []
        self._parser_mediator = None
        self._resolver_context = None

        if self._abort:
            self._status = definitions.STATUS_INDICATOR_ABORTED
        else:
            self._status = definitions.STATUS_INDICATOR_COMPLETED

        logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format(
            self._name, self._pid))

        try:
            self._task_queue.Close(abort=self._abort)
        except errors.QueueAlreadyClosed:
            logger.error('Queue for {0:s} was already closed.'.format(
                self.name))
Пример #27
0
    def _ScheduleTasks(self, storage_writer, session_identifier):
        """Schedules tasks.

    Args:
      storage_writer (StorageWriter): storage writer for a session storage.
      session_identifier (str): the identifier of the session the tasks are
          part of.
    """
        logger.debug('Task scheduler started')

        self._status = definitions.STATUS_INDICATOR_RUNNING

        # TODO: make tasks persistent.

        # TODO: protect task scheduler loop by catch all and
        # handle abort path.

        event_source_heap = _EventSourceHeap()

        self._FillEventSourceHeap(storage_writer,
                                  event_source_heap,
                                  start_with_first=True)

        event_source = event_source_heap.PopEventSource()

        task = None
        has_pending_tasks = True

        while event_source or has_pending_tasks:
            if self._abort:
                break

            try:
                if not task:
                    task = self._task_manager.CreateRetryTask()

                if not task and event_source:
                    task = self._task_manager.CreateTask(
                        session_identifier,
                        storage_format=self._task_storage_format)
                    task.file_entry_type = event_source.file_entry_type
                    task.path_spec = event_source.path_spec
                    event_source = None

                    self._number_of_consumed_sources += 1

                if task:
                    if not self._ScheduleTask(task):
                        self._task_manager.SampleTaskStatus(
                            task, 'schedule_attempted')

                    else:
                        path_spec_string = self._GetPathSpecificationString(
                            task.path_spec)
                        logger.debug(
                            'Scheduled task: {0:s} for path specification: {1:s}'
                            .format(task.identifier,
                                    path_spec_string.replace('\n', ' ')))

                        self._task_manager.SampleTaskStatus(task, 'scheduled')

                        task = None

                # Limit the number of attribute containers from a single task-based
                # storage file that are merged per loop to keep tasks flowing.
                merge_duration = time.time()

                number_of_containers = self._MergeTaskStorage(
                    storage_writer,
                    session_identifier,
                    maximum_number_of_containers=self.
                    _maximum_number_of_containers)

                merge_duration = time.time() - merge_duration

                if merge_duration > 0.0 and number_of_containers > 0:
                    containers_per_second = number_of_containers / merge_duration
                    self._maximum_number_of_containers = int(
                        0.5 * containers_per_second)

                if not event_source_heap.IsFull():
                    self._FillEventSourceHeap(storage_writer,
                                              event_source_heap)
                else:
                    logger.debug('Event source heap is full.')

                if not task and not event_source:
                    event_source = event_source_heap.PopEventSource()

                has_pending_tasks = self._task_manager.HasPendingTasks()

            except KeyboardInterrupt:
                if self._debug_output:
                    traceback.print_exc()
                self._abort = True

                self._processing_status.aborted = True
                if self._status_update_callback:
                    self._status_update_callback(self._processing_status)

        for task in self._task_manager.GetFailedTasks():
            warning = warnings.ExtractionWarning(
                message='Worker failed to process path specification',
                path_spec=task.path_spec)
            self._storage_writer.AddAttributeContainer(warning)
            self._processing_status.error_path_specs.append(task.path_spec)

        self._status = definitions.STATUS_INDICATOR_IDLE

        if self._abort:
            logger.debug('Task scheduler aborted')
        else:
            logger.debug('Task scheduler stopped')
Пример #28
0
    def _ProcessTask(self, task):
        """Processes a task.

    Args:
      task (Task): task.
    """
        logger.debug('Started processing task: {0:s}.'.format(task.identifier))

        if self._tasks_profiler:
            self._tasks_profiler.Sample(task, 'processing_started')

        task.storage_format = self._processing_configuration.task_storage_format

        self._task = task

        task_storage_writer = self._storage_factory.CreateTaskStorageWriter(
            self._processing_configuration.task_storage_format)

        if self._serializers_profiler:
            task_storage_writer.SetSerializersProfiler(
                self._serializers_profiler)

        if self._storage_profiler:
            task_storage_writer.SetStorageProfiler(self._storage_profiler)

        self._parser_mediator.SetStorageWriter(task_storage_writer)

        storage_file_path = self._GetTaskStorageFilePath(
            self._processing_configuration.task_storage_format, task)
        task_storage_writer.Open(path=storage_file_path,
                                 session_identifier=task.session_identifier,
                                 task_identifier=task.identifier)

        try:
            task_storage_writer.AddAttributeContainer(task)

            # TODO: add support for more task types.
            self._ProcessPathSpec(self._extraction_worker,
                                  self._parser_mediator, task.path_spec)
            self._number_of_consumed_sources += 1

        finally:
            task.aborted = self._abort
            task_storage_writer.UpdateAttributeContainer(task)

            task_storage_writer.Close()

        self._parser_mediator.SetStorageWriter(None)

        try:
            self._FinalizeTaskStorageWriter(
                self._processing_configuration.task_storage_format, task)
        except IOError:
            pass

        self._task = None

        if self._tasks_profiler:
            self._tasks_profiler.Sample(task, 'processing_completed')

        logger.debug('Completed processing task: {0:s}.'.format(
            task.identifier))