Python TaskMultiProcessEngine示例，plaso.multi_processing.task_engine.TaskMultiProcessEngine Python示例

示例#1

0

显示文件

文件： task_engine.py 项目： olivierh59500/plaso

    def testProcessSources(self):
        """Tests the PreprocessSources and ProcessSources function."""
        test_engine = task_engine.TaskMultiProcessEngine(
            maximum_number_of_tasks=100)

        source_path = self._GetTestFilePath([u'ímynd.dd'])
        os_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location=u'/',
            parent=os_path_spec)

        test_engine.PreprocessSources([source_path_spec])

        session = sessions.Session()

        with shared_test_lib.TempDirectory() as temp_directory:
            temp_file = os.path.join(temp_directory, u'storage.plaso')
            storage_writer = storage_zip_file.ZIPStorageFileWriter(
                session, temp_file)

            test_engine.ProcessSources(session.identifier, [source_path_spec],
                                       storage_writer,
                                       parser_filter_expression=u'filestat')

示例#2

0

显示文件

    def testProcessSources(self):
        """Tests the PreprocessSources and ProcessSources function."""
        registry = artifacts_registry.ArtifactDefinitionsRegistry()
        reader = artifacts_reader.YamlArtifactsReader()
        path = shared_test_lib.GetTestFilePath(['artifacts'])
        registry.ReadFromDirectory(reader, path)

        test_engine = task_engine.TaskMultiProcessEngine(
            maximum_number_of_tasks=100)

        source_path = self._GetTestFilePath(['ímynd.dd'])
        os_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location='/',
            parent=os_path_spec)

        test_engine.PreprocessSources(registry, [source_path_spec])

        session = sessions.Session()

        configuration = configurations.ProcessingConfiguration()
        configuration.parser_filter_expression = 'filestat'

        with shared_test_lib.TempDirectory() as temp_directory:
            temp_file = os.path.join(temp_directory, 'storage.plaso')
            storage_writer = storage_zip_file.ZIPStorageFileWriter(
                session, temp_file)

            test_engine.ProcessSources(session.identifier, [source_path_spec],
                                       storage_writer, configuration)

示例#3

0

显示文件

    def _CreateEngine(self, single_process_mode):
        """Creates an engine based on the front end settings.

    Args:
      single_process_mode (bool): True if the front-end should run in single
          process mode.

    Returns:
      BaseEngine: engine.
    """
        if single_process_mode:
            engine = single_process.SingleProcessEngine(
                debug_output=self._debug_mode,
                enable_profiling=self._enable_profiling,
                profiling_directory=self._profiling_directory,
                profiling_sample_rate=self._profiling_sample_rate,
                profiling_type=self._profiling_type)
        else:
            engine = multi_process_engine.TaskMultiProcessEngine(
                debug_output=self._debug_mode,
                enable_profiling=self._enable_profiling,
                profiling_directory=self._profiling_directory,
                profiling_sample_rate=self._profiling_sample_rate,
                profiling_type=self._profiling_type,
                use_zeromq=self._use_zeromq)

        return engine

示例#4

0

显示文件

文件： extraction_frontend.py 项目： marcurdy/plaso

    def _CreateEngine(self, single_process_mode):
        """Creates an engine based on the front end settings.

    Args:
      single_process_mode (bool): True if the front-end should run in single
          process mode.

    Returns:
      BaseEngine: engine.
    """
        if single_process_mode:
            engine = single_process.SingleProcessEngine()
        else:
            engine = multi_process_engine.TaskMultiProcessEngine(
                use_zeromq=self._use_zeromq)

        return engine

示例#5

0

显示文件

  def ExtractEventsFromSources(self):
    """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
    self._CheckStorageFile(self._storage_file_path, warn_about_existing=True)

    scan_context = self.ScanSource(self._source_path)
    self._source_type = scan_context.source_type

    self._status_view.SetMode(self._status_view_mode)
    self._status_view.SetSourceInformation(
        self._source_path, self._source_type, filter_file=self._filter_file)

    status_update_callback = (
        self._status_view.GetExtractionStatusUpdateCallback())

    self._output_writer.Write('\n')
    self._status_view.PrintExtractionStatusHeader(None)
    self._output_writer.Write('Processing started.\n')

    session = engine.BaseEngine.CreateSession(
        command_line_arguments=self._command_line_arguments,
        debug_mode=self._debug_mode,
        filter_file=self._filter_file,
        preferred_encoding=self.preferred_encoding,
        preferred_time_zone=self._preferred_time_zone,
        preferred_year=self._preferred_year)

    storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
        self._storage_format, session, self._storage_file_path)
    if not storage_writer:
      raise errors.BadConfigOption(
          'Unsupported storage format: {0:s}'.format(self._storage_format))

    single_process_mode = self._single_process_mode
    if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
      # No need to multi process a single file source.
      single_process_mode = True

    if single_process_mode:
      extraction_engine = single_process_engine.SingleProcessEngine()
    else:
      extraction_engine = multi_process_engine.TaskMultiProcessEngine(
          use_zeromq=self._use_zeromq)

    # If the source is a directory or a storage media image
    # run pre-processing.
    if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
      self._PreprocessSources(extraction_engine)

    configuration = self._CreateProcessingConfiguration(
        extraction_engine.knowledge_base)

    self._SetExtractionParsersAndPlugins(configuration, session)
    self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

    filter_find_specs = None
    if configuration.filter_file:
      environment_variables = (
          extraction_engine.knowledge_base.GetEnvironmentVariables())
      filter_file_object = filter_file.FilterFile(configuration.filter_file)
      filter_find_specs = filter_file_object.BuildFindSpecs(
          environment_variables=environment_variables)

    processing_status = None
    if single_process_mode:
      logger.debug('Starting extraction in single process mode.')

      processing_status = extraction_engine.ProcessSources(
          self._source_path_specs, storage_writer, self._resolver_context,
          configuration, filter_find_specs=filter_find_specs,
          status_update_callback=status_update_callback)

    else:
      logger.debug('Starting extraction in multi process mode.')

      processing_status = extraction_engine.ProcessSources(
          session.identifier, self._source_path_specs, storage_writer,
          configuration, enable_sigsegv_handler=self._enable_sigsegv_handler,
          filter_find_specs=filter_find_specs,
          number_of_worker_processes=self._number_of_extraction_workers,
          status_update_callback=status_update_callback,
          worker_memory_limit=self._worker_memory_limit)

    self._status_view.PrintExtractionSummary(processing_status)

示例#6

0

显示文件

    def ExtractEventsFromSources(self):
        """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported or an invalid collection filter was specified.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        scan_context = self.ScanSource(self._source_path)
        self._source_type = scan_context.source_type

        is_archive = False
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            is_archive = self._IsArchiveFile(self._source_path_specs[0])
            if is_archive:
                self._source_type = definitions.SOURCE_TYPE_ARCHIVE

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            self._source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            artifact_filter_names=self._artifact_filters,
            command_line_arguments=self._command_line_arguments,
            debug_mode=self._debug_mode,
            filter_file_path=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year,
            text_prepend=self._text_prepend)

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format, session, self._storage_file_path)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            if not self._process_archives or not is_archive:
                single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine(
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

        # If the source is a storage media image or device, or directory
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        session.enabled_parser_names = (
            configuration.parser_filter_expression.split(','))
        session.parser_filter_expression = self._parser_filter_expression

        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        # TODO: set mount path in knowledge base with
        # extraction_engine.knowledge_base.SetMountPath()
        extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        processing_status = None
        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            # The following overrides are needed because pylint 2.6.0 gets confused
            # about which ProcessSources to check against.
            # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                status_update_callback=status_update_callback)

        self._status_view.PrintExtractionSummary(processing_status)

示例#7

0

显示文件

    def ExtractEventsFromSources(self):
        """Processes the sources and extract events.

    This is a stripped down copy of tools/log2timeline.py that doesn't support
    the full set of flags. The defaults for these are hard coded in the
    constructor of this class.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported or an invalid collection filter was specified.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        scan_context = self.ScanSource(self._source_path)
        source_type = scan_context.source_type

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            artifact_filter_names=self._artifact_filters,
            command_line_arguments=self._command_line_arguments,
            filter_file_path=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year)

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format, session, self._storage_file_path)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        single_process_mode = self._single_process_mode
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine()

        # If the source is a directory or a storage media image
        # run pre-processing.
        if source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        self._SetExtractionParsersAndPlugins(configuration, session)
        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        processing_status = None
        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            processing_status = extraction_engine.ProcessSources(
                session.identifier,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                number_of_worker_processes=self._number_of_extraction_workers,
                status_update_callback=status_update_callback)

        self._status_view.PrintExtractionSummary(processing_status)

示例#8

0

显示文件

文件： psteal_tool.py 项目： Team-Firebugs/plaso

    def ExtractEventsFromSources(self):
        """Processes the sources and extract events.

    This is a stripped down copy of tools/log2timeline.py that doesn't support
    the full set of flags. The defaults for these are hard coded in the
    constructor of this class.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path)

        scan_context = self.ScanSource()
        source_type = scan_context.source_type

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(self._source_path,
                                               source_type,
                                               filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write(u'\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write(u'Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            command_line_arguments=self._command_line_arguments,
            filter_file=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year)

        storage_writer = storage_zip_file.ZIPStorageFileWriter(
            session, self._storage_file_path)

        configuration = self._CreateProcessingConfiguration()

        single_process_mode = self._single_process_mode
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine(
                use_zeromq=self._use_zeromq)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if (self._force_preprocessing
                or source_type in self._SOURCE_TYPES_TO_PREPROCESS):
            self._PreprocessSources(extraction_engine)

        if not configuration.parser_filter_expression:
            operating_system = extraction_engine.knowledge_base.GetValue(
                u'operating_system')
            operating_system_product = extraction_engine.knowledge_base.GetValue(
                u'operating_system_product')
            operating_system_version = extraction_engine.knowledge_base.GetValue(
                u'operating_system_version')
            parser_filter_expression = (
                self._parsers_manager.GetPresetForOperatingSystem(
                    operating_system, operating_system_product,
                    operating_system_version))

            if parser_filter_expression:
                logging.info(
                    u'Parser filter expression changed to: {0:s}'.format(
                        parser_filter_expression))

            configuration.parser_filter_expression = parser_filter_expression
            session.enabled_parser_names = list(
                self._parsers_manager.GetParserAndPluginNames(
                    parser_filter_expression=configuration.
                    parser_filter_expression))
            session.parser_filter_expression = configuration.parser_filter_expression

        if session.preferred_time_zone:
            try:
                extraction_engine.knowledge_base.SetTimeZone(
                    session.preferred_time_zone)
            except ValueError:
                # pylint: disable=protected-access
                logging.warning(
                    u'Unsupported time zone: {0:s}, defaulting to {1:s}'.
                    format(session.preferred_time_zone,
                           extraction_engine.knowledge_base._time_zone.zone))

        filter_find_specs = None
        if configuration.filter_file:
            environment_variables = (
                extraction_engine.knowledge_base.GetEnvironmentVariables())
            filter_find_specs = frontend_utils.BuildFindSpecsFromFile(
                configuration.filter_file,
                environment_variables=environment_variables)

        processing_status = None
        if single_process_mode:
            logging.debug(u'Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                filter_find_specs=filter_find_specs,
                status_update_callback=status_update_callback)

        else:
            logging.debug(u'Starting extraction in multi process mode.')

            processing_status = extraction_engine.ProcessSources(
                session.identifier,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                filter_find_specs=filter_find_specs,
                number_of_worker_processes=self._number_of_extraction_workers,
                status_update_callback=status_update_callback)

        self._status_view.PrintExtractionSummary(processing_status)

示例#9

0

显示文件

  def ExtractEventsFromSources(self):
    """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
    self._CheckStorageFile(self._storage_file_path, warn_about_existing=True)

    scan_context = self.ScanSource(self._source_path)
    self._source_type = scan_context.source_type

    self._status_view.SetMode(self._status_view_mode)
    self._status_view.SetSourceInformation(
        self._source_path, self._source_type, filter_file=self._filter_file)

    status_update_callback = (
        self._status_view.GetExtractionStatusUpdateCallback())

    self._output_writer.Write('\n')
    self._status_view.PrintExtractionStatusHeader(None)
    self._output_writer.Write('Processing started.\n')

    session = engine.BaseEngine.CreateSession(
        command_line_arguments=self._command_line_arguments,
        debug_mode=self._debug_mode,
        filter_file=self._filter_file,
        preferred_encoding=self.preferred_encoding,
        preferred_time_zone=self._preferred_time_zone,
        preferred_year=self._preferred_year)

    if self._storage_format == definitions.STORAGE_FORMAT_SQLITE:
      storage_writer = storage_sqlite_file.SQLiteStorageFileWriter(
          session, self._storage_file_path)

    else:
      storage_writer = storage_zip_file.ZIPStorageFileWriter(
          session, self._storage_file_path)

    single_process_mode = self._single_process_mode
    if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
      # No need to multi process a single file source.
      single_process_mode = True

    if single_process_mode:
      extraction_engine = single_process_engine.SingleProcessEngine()
    else:
      extraction_engine = multi_process_engine.TaskMultiProcessEngine(
          use_zeromq=self._use_zeromq)

    # If the source is a directory or a storage media image
    # run pre-processing.
    if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
      self._PreprocessSources(extraction_engine)

    configuration = self._CreateProcessingConfiguration()

    if not configuration.parser_filter_expression:
      operating_system = extraction_engine.knowledge_base.GetValue(
          'operating_system')
      operating_system_product = extraction_engine.knowledge_base.GetValue(
          'operating_system_product')
      operating_system_version = extraction_engine.knowledge_base.GetValue(
          'operating_system_version')
      parser_filter_expression = (
          parsers_manager.ParsersManager.GetPresetForOperatingSystem(
              operating_system, operating_system_product,
              operating_system_version))

      if parser_filter_expression:
        logging.info('Parser filter expression changed to: {0:s}'.format(
            parser_filter_expression))

      configuration.parser_filter_expression = parser_filter_expression

      names_generator = parsers_manager.ParsersManager.GetParserAndPluginNames(
          parser_filter_expression=parser_filter_expression)

      session.enabled_parser_names = list(names_generator)
      session.parser_filter_expression = parser_filter_expression

    # Note session.preferred_time_zone will default to UTC but
    # self._preferred_time_zone is None when not set.
    if self._preferred_time_zone:
      try:
        extraction_engine.knowledge_base.SetTimeZone(self._preferred_time_zone)
      except ValueError:
        # pylint: disable=protected-access
        logging.warning(
            'Unsupported time zone: {0:s}, defaulting to {1:s}'.format(
                self._preferred_time_zone,
                extraction_engine.knowledge_base._time_zone.zone))

    filter_find_specs = None
    if configuration.filter_file:
      environment_variables = (
          extraction_engine.knowledge_base.GetEnvironmentVariables())
      filter_file_object = filter_file.FilterFile(configuration.filter_file)
      filter_find_specs = filter_file_object.BuildFindSpecs(
          environment_variables=environment_variables)

    processing_status = None
    if single_process_mode:
      logging.debug('Starting extraction in single process mode.')

      processing_status = extraction_engine.ProcessSources(
          self._source_path_specs, storage_writer, self._resolver_context,
          configuration, filter_find_specs=filter_find_specs,
          status_update_callback=status_update_callback)

    else:
      logging.debug('Starting extraction in multi process mode.')

      processing_status = extraction_engine.ProcessSources(
          session.identifier, self._source_path_specs, storage_writer,
          configuration, enable_sigsegv_handler=self._enable_sigsegv_handler,
          filter_find_specs=filter_find_specs,
          number_of_worker_processes=self._number_of_extraction_workers,
          status_update_callback=status_update_callback,
          worker_memory_limit=self._worker_memory_limit)

    self._status_view.PrintExtractionSummary(processing_status)