示例#1
0
 def __init__(self):
     """Initializes a psort multi-processing engine."""
     super(PsortMultiProcessEngine, self).__init__()
     self._analysis_plugins = {}
     self._completed_analysis_processes = set()
     self._data_location = None
     self._event_filter_expression = None
     self._event_queues = {}
     self._event_tag_index = event_tag_index.EventTagIndex()
     self._events_status = processing_status.EventsStatus()
     # The export event heap is used to make sure the events are sorted in
     # a deterministic way.
     self._export_event_heap = PsortEventHeap()
     self._export_event_timestamp = 0
     self._knowledge_base = None
     self._memory_profiler = None
     self._merge_task = None
     self._number_of_consumed_event_tags = 0
     self._number_of_consumed_events = 0
     self._number_of_consumed_reports = 0
     self._number_of_consumed_sources = 0
     self._number_of_consumed_warnings = 0
     self._number_of_produced_event_tags = 0
     self._number_of_produced_events = 0
     self._number_of_produced_reports = 0
     self._number_of_produced_sources = 0
     self._number_of_produced_warnings = 0
     self._processing_configuration = None
     self._processing_profiler = None
     self._serializers_profiler = None
     self._status = definitions.STATUS_INDICATOR_IDLE
     self._status_update_callback = None
     self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT
示例#2
0
 def __init__(self):
     """Initializes an output and formatting multi-processing engine."""
     super(OutputAndFormattingMultiProcessEngine, self).__init__()
     # The export event heap is used to make sure the events are sorted in
     # a deterministic way.
     self._event_tag_index = event_tag_index.EventTagIndex()
     self._events_status = processing_status.EventsStatus()
     self._export_event_heap = PsortEventHeap()
     self._export_event_timestamp = 0
     self._knowledge_base = None
     self._number_of_consumed_events = 0
     self._processing_configuration = None
     self._status = definitions.STATUS_INDICATOR_IDLE
     self._status_update_callback = None
示例#3
0
  def __init__(self, worker_memory_limit=None, worker_timeout=None):
    """Initializes a psort multi-processing engine.

    Args:
      worker_memory_limit (Optional[int]): maximum amount of memory a worker is
          allowed to consume, where None represents the default memory limit
          and 0 represents no limit.
      worker_timeout (Optional[float]): number of minutes before a worker
          process that is not providing status updates is considered inactive,
          where None or 0.0 represents the default timeout.
    """
    if worker_memory_limit is None:
      worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT

    if not worker_timeout:
      worker_timeout = definitions.DEFAULT_WORKER_TIMEOUT

    super(PsortMultiProcessEngine, self).__init__()
    self._analysis_plugins = {}
    self._completed_analysis_processes = set()
    self._data_location = None
    self._event_filter_expression = None
    self._event_queues = {}
    self._event_tag_index = event_tag_index.EventTagIndex()
    self._events_status = processing_status.EventsStatus()
    # The export event heap is used to make sure the events are sorted in
    # a deterministic way.
    self._export_event_heap = PsortEventHeap()
    self._export_event_timestamp = 0
    self._knowledge_base = None
    self._memory_profiler = None
    self._merge_task = None
    self._number_of_consumed_event_tags = 0
    self._number_of_consumed_events = 0
    self._number_of_consumed_reports = 0
    self._number_of_consumed_sources = 0
    self._number_of_consumed_warnings = 0
    self._number_of_produced_event_tags = 0
    self._number_of_produced_events = 0
    self._number_of_produced_reports = 0
    self._number_of_produced_sources = 0
    self._number_of_produced_warnings = 0
    self._processing_configuration = None
    self._processing_profiler = None
    self._serializers_profiler = None
    self._status = definitions.STATUS_INDICATOR_IDLE
    self._status_update_callback = None
    self._worker_memory_limit = worker_memory_limit
    self._worker_timeout = worker_timeout
示例#4
0
文件: psort.py 项目: x35029/plaso
    def __init__(self, use_zeromq=True):
        """Initializes an engine object.

    Args:
      use_zeromq (Optional[bool]): True if ZeroMQ should be used for queuing
          instead of Python's multiprocessing queue.
    """
        super(PsortMultiProcessEngine, self).__init__()
        self._analysis_plugins = {}
        self._completed_analysis_processes = set()
        self._data_location = None
        self._event_filter_expression = None
        self._event_queues = {}
        self._event_tag_index = event_tag_index.EventTagIndex()
        self._events_status = processing_status.EventsStatus()
        # The export event heap is used to make sure the events are sorted in
        # a deterministic way.
        self._export_event_heap = PsortEventHeap()
        self._export_event_timestamp = 0
        self._guppy_memory_profiler = None
        self._knowledge_base = None
        self._memory_profiler = None
        self._merge_task = None
        self._number_of_consumed_event_tags = 0
        self._number_of_consumed_events = 0
        self._number_of_consumed_reports = 0
        self._number_of_consumed_sources = 0
        self._number_of_consumed_warnings = 0
        self._number_of_duplicate_events = 0
        self._number_of_macb_grouped_events = 0
        self._number_of_produced_event_tags = 0
        self._number_of_produced_events = 0
        self._number_of_produced_reports = 0
        self._number_of_produced_sources = 0
        self._number_of_produced_warnings = 0
        self._processing_configuration = None
        self._processing_profiler = None
        self._serializers_profiler = None
        self._status = definitions.PROCESSING_STATUS_IDLE
        self._status_update_callback = None
        self._use_zeromq = use_zeromq
        self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT
示例#5
0
  def ExportEvents(
      self, knowledge_base_object, storage_reader, output_module,
      processing_configuration, deduplicate_events=True, event_filter=None,
      status_update_callback=None, time_slice=None, use_time_slicer=False):
    """Exports events using an output module.

    Args:
      knowledge_base_object (KnowledgeBase): contains information from
          the source data needed for processing.
      storage_reader (StorageReader): storage reader.
      output_module (OutputModule): output module.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      deduplicate_events (Optional[bool]): True if events should be
          deduplicated.
      event_filter (Optional[EventObjectFilter]): event filter.
      status_update_callback (Optional[function]): callback function for status
          updates.
      time_slice (Optional[TimeSlice]): slice of time to output.
      use_time_slicer (Optional[bool]): True if the 'time slicer' should be
          used. The 'time slicer' will provide a context of events around
          an event of interest.
    """
    self._events_status = processing_status.EventsStatus()
    self._knowledge_base = knowledge_base_object
    self._processing_configuration = processing_configuration
    self._status_update_callback = status_update_callback

    total_number_of_events = 0
    for session in storage_reader.GetSessions():
      total_number_of_events += session.parsers_counter['total']

    self._events_status.total_number_of_events = total_number_of_events

    output_module.WriteHeader()

    self._StartStatusUpdateThread()

    self._StartProfiling(self._processing_configuration.profiling)

    try:
      self._ExportEvents(
          storage_reader, output_module, deduplicate_events=deduplicate_events,
          event_filter=event_filter, time_slice=time_slice,
          use_time_slicer=use_time_slicer)

    finally:
      # Stop the status update thread after close of the storage writer
      # so we include the storage sync to disk in the status updates.
      self._StopStatusUpdateThread()

    output_module.WriteFooter()

    self._StopProfiling()

    self._UpdateForemanProcessStatus()

    if self._status_update_callback:
      self._status_update_callback(self._processing_status)

    # Reset values.
    self._status_update_callback = None
    self._processing_configuration = None
    self._knowledge_base = None
    self._events_status = None
示例#6
0
  def AnalyzeEvents(
      self, knowledge_base_object, storage_writer, data_location,
      analysis_plugins, processing_configuration, event_filter=None,
      event_filter_expression=None, status_update_callback=None):
    """Analyzes events in a plaso storage.

    Args:
      knowledge_base_object (KnowledgeBase): contains information from
          the source data needed for processing.
      storage_writer (StorageWriter): storage writer.
      data_location (str): path to the location that data files should
          be loaded from.
      analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that
          should be run and their names.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      event_filter (Optional[EventObjectFilter]): event filter.
      event_filter_expression (Optional[str]): event filter expression.
      status_update_callback (Optional[function]): callback function for status
          updates.

    Raises:
      KeyboardInterrupt: if a keyboard interrupt was raised.
    """
    if not analysis_plugins:
      return

    keyboard_interrupt = False

    self._analysis_plugins = {}
    self._data_location = data_location
    self._event_filter_expression = event_filter_expression
    self._events_status = processing_status.EventsStatus()
    self._knowledge_base = knowledge_base_object
    self._status_update_callback = status_update_callback
    self._processing_configuration = processing_configuration

    self._StartProfiling(self._processing_configuration.profiling)

    # Set up the storage writer before the analysis processes.
    storage_writer.StartTaskStorage()

    self._StartAnalysisProcesses(storage_writer, analysis_plugins)

    # Start the status update thread after open of the storage writer
    # so we don't have to clean up the thread if the open fails.
    self._StartStatusUpdateThread()

    try:
      # Open the storage file after creating the worker processes otherwise
      # the session store will remain locked as long as the worker processes
      # are alive.
      storage_writer.Open()
      storage_writer.WriteSessionStart()

      try:
        storage_writer.WriteSessionConfiguration()

        self._AnalyzeEvents(
            storage_writer, analysis_plugins, event_filter=event_filter)

        self._status = definitions.STATUS_INDICATOR_FINALIZING

      except KeyboardInterrupt:
        keyboard_interrupt = True
        self._abort = True

        self._processing_status.aborted = True
        if self._status_update_callback:
          self._status_update_callback(self._processing_status)

      finally:
        storage_writer.WriteSessionCompletion(aborted=self._abort)

        storage_writer.Close()

    finally:
      # Stop the status update thread after close of the storage writer
      # so we include the storage sync to disk in the status updates.
      self._StopStatusUpdateThread()

    try:
      self._StopAnalysisProcesses(abort=self._abort)

    except KeyboardInterrupt:
      keyboard_interrupt = True

      self._AbortKill()

      # The abort can leave the main process unresponsive
      # due to incorrectly finalized IPC.
      self._KillProcess(os.getpid())

    self._StopProfiling()

    # Reset values.
    self._analysis_plugins = {}
    self._data_location = None
    self._event_filter_expression = None
    self._knowledge_base = None
    self._processing_configuration = None
    self._status_update_callback = None

    if keyboard_interrupt:
      raise KeyboardInterrupt

    if keyboard_interrupt:
      raise KeyboardInterrupt
示例#7
0
    def AnalyzeEvents(self,
                      session,
                      knowledge_base_object,
                      storage_writer,
                      data_location,
                      analysis_plugins,
                      processing_configuration,
                      event_filter=None,
                      event_filter_expression=None,
                      status_update_callback=None,
                      storage_file_path=None):
        """Analyzes events in a Plaso storage.

    Args:
      session (Session): session in which the events are analyzed.
      knowledge_base_object (KnowledgeBase): contains information from
          the source data needed for processing.
      storage_writer (StorageWriter): storage writer.
      data_location (str): path to the location that data files should
          be loaded from.
      analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that
          should be run and their names.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      event_filter (Optional[EventObjectFilter]): event filter.
      event_filter_expression (Optional[str]): event filter expression.
      status_update_callback (Optional[function]): callback function for status
          updates.
      storage_file_path (Optional[str]): path to the session storage file.

    Returns:
      ProcessingStatus: processing status.

    Raises:
      KeyboardInterrupt: if a keyboard interrupt was raised.
      ValueError: if analysis plugins are missing.
    """
        if not analysis_plugins:
            raise ValueError('Missing analysis plugins')

        abort_kill = False
        keyboard_interrupt = False
        queue_full = False

        self._analysis_plugins = {}
        self._data_location = data_location
        self._event_filter_expression = event_filter_expression
        self._events_status = processing_status.EventsStatus()
        self._knowledge_base = knowledge_base_object
        self._processing_configuration = processing_configuration
        self._session = session
        self._status_update_callback = status_update_callback
        self._storage_file_path = storage_file_path

        stored_event_labels_counter = {}
        if storage_writer.HasAttributeContainers('event_label_count'):
            stored_event_labels_counter = {
                event_label_count.label: event_label_count
                for event_label_count in storage_writer.GetAttributeContainers(
                    'event_label_count')
            }

        self._event_labels_counter = collections.Counter()

        if storage_writer.HasAttributeContainers('parser_count'):
            parsers_counter = {
                parser_count.name: parser_count.number_of_events
                for parser_count in storage_writer.GetAttributeContainers(
                    'parser_count')
            }

            total_number_of_events = parsers_counter['total']

        else:
            total_number_of_events = 0
            for stored_session in storage_writer.GetSessions():
                total_number_of_events += stored_session.parsers_counter[
                    'total']

        self._events_status.total_number_of_events = total_number_of_events

        # Set up the storage writer before the analysis processes.
        self._StartTaskStorage(definitions.STORAGE_FORMAT_SQLITE)

        self._StartAnalysisProcesses(analysis_plugins)

        self._StartProfiling(self._processing_configuration.profiling)

        # Start the status update thread after open of the storage writer
        # so we don't have to clean up the thread if the open fails.
        self._StartStatusUpdateThread()

        try:
            self._AnalyzeEvents(storage_writer,
                                analysis_plugins,
                                event_filter=event_filter)

            for key, value in self._event_labels_counter.items():
                event_label_count = stored_event_labels_counter.get(key, None)
                if event_label_count:
                    event_label_count.number_of_events += value
                    storage_writer.UpdateAttributeContainer(event_label_count)
                else:
                    event_label_count = counts.EventLabelCount(
                        label=key, number_of_events=value)
                    storage_writer.AddAttributeContainer(event_label_count)

            self._status = definitions.STATUS_INDICATOR_FINALIZING

        except errors.QueueFull:
            queue_full = True
            self._abort = True

        except KeyboardInterrupt:
            keyboard_interrupt = True
            self._abort = True

        finally:
            self._processing_status.aborted = self._abort
            session.aborted = self._abort

            # Stop the status update thread after close of the storage writer
            # so we include the storage sync to disk in the status updates.
            self._StopStatusUpdateThread()

            self._StopProfiling()

        # Update the status view one last time before the analysis processses are
        # stopped.
        self._UpdateStatus()

        if queue_full:
            # TODO: handle abort on queue full more elegant.
            abort_kill = True
        else:
            try:
                self._StopAnalysisProcesses(abort=self._abort)

            except KeyboardInterrupt:
                keyboard_interrupt = True
                abort_kill = True

        if abort_kill:
            self._AbortKill()

            # The abort can leave the main process unresponsive
            # due to incorrectly finalized IPC.
            self._KillProcess(os.getpid())

        try:
            self._StopTaskStorage(definitions.STORAGE_FORMAT_SQLITE,
                                  session.identifier,
                                  abort=self._abort)
        except (IOError, OSError) as exception:
            logger.error(
                'Unable to stop task storage with error: {0!s}'.format(
                    exception))

        if self._abort:
            logger.debug('Analysis aborted.')
        else:
            logger.debug('Analysis completed.')

        # Update the status view one last time.
        self._UpdateStatus()

        # Reset values.
        self._analysis_plugins = {}
        self._data_location = None
        self._event_filter_expression = None
        self._knowledge_base = None
        self._processing_configuration = None
        self._session = None
        self._status_update_callback = None
        self._storage_file_path = None

        if keyboard_interrupt:
            raise KeyboardInterrupt

        return self._processing_status