示例#1
0
  def testFileSystemWithFilterCollection(self):
    """Test collection on the file system with a filter."""
    dirname = u'.'
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)

    filter_name = ''
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
      filter_name = temp_file.name
      temp_file.write('/test_data/testdir/filter_.+.txt\n')
      temp_file.write('/test_data/.+evtx\n')
      temp_file.write('/AUTHORS\n')
      temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')

    test_collection_queue = queue.SingleThreadedQueue()
    test_store = queue.SingleThreadedQueue()
    resolver_context = context.Context()
    test_collector = collector.Collector(
        test_collection_queue, test_store, dirname, path_spec,
        resolver_context=resolver_context)

    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
    test_collector.SetFilter(find_specs)

    test_collector.Collect()

    test_collector_queue_consumer = TestCollectorQueueConsumer(
          test_collection_queue)
    test_collector_queue_consumer.ConsumePathSpecs()

    try:
      os.remove(filter_name)
    except (OSError, IOError) as exception:
      logging.warning((
          u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
              filter_name, exception))

    # Two files with test_data/testdir/filter_*.txt, AUTHORS
    # and test_data/System.evtx.
    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)

    paths = test_collector_queue_consumer.GetFilePaths()

    current_directory = os.getcwd()

    expected_path = os.path.join(
        current_directory, 'test_data', 'testdir', 'filter_1.txt')
    self.assertTrue(expected_path in paths)

    expected_path = os.path.join(
        current_directory, 'test_data', 'testdir', 'filter_2.txt')
    self.assertFalse(expected_path in paths)

    expected_path = os.path.join(
        current_directory, 'test_data', 'testdir', 'filter_3.txt')
    self.assertTrue(expected_path in paths)

    expected_path = os.path.join(
        current_directory, 'AUTHORS')
    self.assertTrue(expected_path in paths)
示例#2
0
  def testImageWithFilterCollection(self):
    """Test collection on a storage media image file with a filter."""
    test_file = self._GetTestFilePath(['image.dd'])

    volume_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
        parent=volume_path_spec)

    filter_name = ''
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
      filter_name = temp_file.name
      temp_file.write('/a_directory/.+zip\n')
      temp_file.write('/a_directory/another.+\n')
      temp_file.write('/passwords.txt\n')

    test_collection_queue = queue.SingleThreadedQueue()
    test_storage_queue = queue.SingleThreadedQueue()
    test_storage_queue_producer = queue.EventObjectQueueProducer(
        test_storage_queue)
    resolver_context = context.Context()
    test_collector = collector.Collector(
        test_collection_queue, test_storage_queue_producer, test_file,
        path_spec, resolver_context=resolver_context)

    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
    test_collector.SetFilter(find_specs)

    test_collector.Collect()

    test_collector_queue_consumer = TestCollectorQueueConsumer(
        test_collection_queue)
    test_collector_queue_consumer.ConsumePathSpecs()

    try:
      os.remove(filter_name)
    except (OSError, IOError) as exception:
      logging.warning((
          u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
              filter_name, exception))

    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2)

    paths = test_collector_queue_consumer.GetFilePaths()

    # path_specs[0]
    # type: TSK
    # file_path: '/a_directory/another_file'
    # container_path: 'test_data/image.dd'
    # image_offset: 0
    self.assertEquals(paths[0], u'/a_directory/another_file')

    # path_specs[1]
    # type: TSK
    # file_path: '/passwords.txt'
    # container_path: 'test_data/image.dd'
    # image_offset: 0
    self.assertEquals(paths[1], u'/passwords.txt')
示例#3
0
  def testBuildFindSpecsFromFile(self):
    """Tests the BuildFindSpecsFromFile function."""
    filter_name = ''
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
      filter_name = temp_file.name
      # 2 hits.
      temp_file.write('/test_data/testdir/filter_.+.txt\n')
      # A single hit.
      temp_file.write('/test_data/.+evtx\n')
      # A single hit.
      temp_file.write('/AUTHORS\n')
      temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')
      # This should not compile properly, missing file information.
      temp_file.write('failing/\n')
      # This should not fail during initial loading, but fail later on.
      temp_file.write('bad re (no close on that parenthesis/file\n')

    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)

    try:
      os.remove(filter_name)
    except (OSError, IOError) as exception:
      logging.warning(
          u'Unable to remove temporary file: {0:s} with error: {1:s}'.format(
              filter_name, exception))

    self.assertEqual(len(find_specs), 4)

    dirname = u'.'
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)
    file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec)
    searcher = file_system_searcher.FileSystemSearcher(
        file_system, path_spec)

    path_spec_generator = searcher.Find(find_specs=find_specs)
    self.assertNotEqual(path_spec_generator, None)

    path_specs = list(path_spec_generator)
    # One evtx, one AUTHORS, two filter_*.txt files, total 4 files.
    self.assertEqual(len(path_specs), 4)

    with self.assertRaises(IOError):
      _ = engine_utils.BuildFindSpecsFromFile('thisfiledoesnotexist')

    file_system.Close()
示例#4
0
    def testExtractPathSpecsFileSystemWithFilter(self):
        """Tests the ExtractPathSpecs function on the file system with a filter."""
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=u'.')

        filter_name = ''
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            filter_name = temp_file.name
            temp_file.write('/test_data/testdir/filter_.+.txt\n')
            temp_file.write('/test_data/.+evtx\n')
            temp_file.write('/AUTHORS\n')
            temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')

        resolver_context = context.Context()
        test_extractor = extractors.PathSpecExtractor(resolver_context)

        find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
        path_specs = list(
            test_extractor.ExtractPathSpecs([source_path_spec],
                                            find_specs=find_specs))

        try:
            os.remove(filter_name)
        except (OSError, IOError) as exception:
            logging.warning(
                (u'Unable to remove temporary file: {0:s} with error: {1:s}'
                 ).format(filter_name, exception))

        # Two files with test_data/testdir/filter_*.txt, AUTHORS
        # and test_data/System.evtx.
        self.assertEqual(len(path_specs), 4)

        paths = self._GetFilePaths(path_specs)

        current_directory = os.getcwd()

        expected_path = os.path.join(current_directory, u'test_data',
                                     u'testdir', u'filter_1.txt')
        self.assertTrue(expected_path in paths)

        expected_path = os.path.join(current_directory, u'test_data',
                                     u'testdir', u'filter_2.txt')
        self.assertFalse(expected_path in paths)

        expected_path = os.path.join(current_directory, u'test_data',
                                     u'testdir', u'filter_3.txt')
        self.assertTrue(expected_path in paths)

        expected_path = os.path.join(current_directory, u'AUTHORS')
        self.assertTrue(expected_path in paths)
示例#5
0
    def testExtractPathSpecsStorageMediaImageWithFilter(self):
        """Tests the ExtractPathSpecs function on an image file with a filter."""
        test_file = self._GetTestFilePath([u'ímynd.dd'])

        volume_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location=u'/',
            parent=volume_path_spec)

        filter_name = ''
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            filter_name = temp_file.name
            temp_file.write('/a_directory/.+zip\n')
            temp_file.write('/a_directory/another.+\n')
            temp_file.write('/passwords.txt\n')

        resolver_context = context.Context()
        test_extractor = extractors.PathSpecExtractor(resolver_context)

        find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
        path_specs = list(
            test_extractor.ExtractPathSpecs([source_path_spec],
                                            find_specs=find_specs))

        try:
            os.remove(filter_name)
        except (OSError, IOError) as exception:
            logging.warning(
                (u'Unable to remove temporary file: {0:s} with error: {1:s}'
                 ).format(filter_name, exception))

        self.assertEqual(len(path_specs), 2)

        paths = self._GetFilePaths(path_specs)

        # path_specs[0]
        # type: TSK
        # file_path: '/a_directory/another_file'
        # container_path: 'test_data/ímynd.dd'
        # image_offset: 0
        self.assertEqual(paths[0], u'/a_directory/another_file')

        # path_specs[1]
        # type: TSK
        # file_path: '/passwords.txt'
        # container_path: 'test_data/ímynd.dd'
        # image_offset: 0
        self.assertEqual(paths[1], u'/passwords.txt')
示例#6
0
    def _ExtractWithFilter(self,
                           source_path_specs,
                           destination_path,
                           output_writer,
                           filter_file_path,
                           skip_duplicates=True):
        """Extracts files using a filter expression.

    This method runs the file extraction process on the image and
    potentially on every VSS if that is wanted.

    Args:
      source_path_specs (list[dfvfs.PathSpec]): path specifications to extract.
      destination_path (str): path where the extracted files should be stored.
      output_writer (CLIOutputWriter): output writer.
      filter_file_path (str): path of the file that contains the filter
          expressions.
      skip_duplicates (Optional[bool]): True if files with duplicate content
          should be skipped.
    """
        for source_path_spec in source_path_specs:
            file_system, mount_point = self._GetSourceFileSystem(
                source_path_spec, resolver_context=self._resolver_context)

            if self._knowledge_base is None:
                self._Preprocess(file_system, mount_point)

            display_name = path_helper.PathHelper.GetDisplayNameForPathSpec(
                source_path_spec)
            output_writer.Write(
                u'Extracting file entries from: {0:s}\n'.format(display_name))

            path_attributes = self._knowledge_base.GetPathAttributes()
            find_specs = engine_utils.BuildFindSpecsFromFile(
                filter_file_path, path_attributes=path_attributes)

            searcher = file_system_searcher.FileSystemSearcher(
                file_system, mount_point)
            for path_spec in searcher.Find(find_specs=find_specs):
                self._ExtractFileEntry(path_spec,
                                       destination_path,
                                       output_writer,
                                       skip_duplicates=skip_duplicates)

            file_system.Close()
示例#7
0
    def _ExtractWithFilter(self,
                           source_path_specs,
                           destination_path,
                           filter_file_path,
                           remove_duplicates=True):
        """Extracts files using a filter expression.

    This method runs the file extraction process on the image and
    potentially on every VSS if that is wanted.

    Args:
      source_path_specs: list of path specifications (instances of
                         dfvfs.PathSpec) to process.
      destination_path: The path where the extracted files should be stored.
      filter_file_path: The path of the file that contains the filter
                        expressions.
      remove_duplicates: optional boolean value to indicate if files with
                         duplicate content should be removed. The default
                         is True.
    """
        for source_path_spec in source_path_specs:
            file_system, mount_point = self._GetSourceFileSystem(
                source_path_spec, resolver_context=self._resolver_context)

            if self._knowledge_base is None:
                self._Preprocess(file_system, mount_point)

            if not os.path.isdir(destination_path):
                os.makedirs(destination_path)

            find_specs = engine_utils.BuildFindSpecsFromFile(
                filter_file_path, pre_obj=self._knowledge_base.pre_obj)

            # Save the regular files.
            file_saver = FileSaver(skip_duplicates=remove_duplicates)

            searcher = file_system_searcher.FileSystemSearcher(
                file_system, mount_point)
            for path_spec in searcher.Find(find_specs=find_specs):
                self._ExtractFile(file_saver, path_spec, destination_path)

            file_system.Close()
示例#8
0
    def ProcessSources(
            self,
            source_path_specs,
            source_type,
            enable_sigsegv_handler=False,
            filter_file=None,
            hasher_names_string=None,
            parser_filter_string=None,
            preferred_encoding=u'utf-8',
            single_process_mode=False,
            status_update_callback=None,
            storage_serializer_format=definitions.SERIALIZER_FORMAT_PROTOBUF,
            timezone=pytz.UTC):
        """Processes the sources.

    Args:
      source_path_specs: list of path specifications (instances of
                         dfvfs.PathSpec) to process.
      source_type: the dfVFS source type definition.
      enable_sigsegv_handler: optional boolean value to indicate the SIGSEGV
                              handler should be enabled. The default is False.
      filter_file: optional path to a file that contains find specifications.
                   The default is None.
      hasher_names_string: optional comma separated string of names of
                           hashers to enable. The default is None.
      parser_filter_string: optional parser filter string. The default is None.
      preferred_encoding: optional preferred encoding. The default is UTF-8.
      single_process_mode: optional boolean value to indicate if the front-end
                           should run in single process mode. The default is
                           False.
      status_update_callback: optional callback function for status updates.
                              The default is None.
      storage_serializer_format: optional storage serializer format.
                                 The default is protobuf.
      timezone: optional preferred timezone. The default is UTC.

    Returns:
      The processing status (instance of ProcessingStatus) or None.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
                          file system.
      UserAbort: if the user initiated an abort.
    """
        # If the source is a directory or a storage media image
        # run pre-processing.
        # TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_
        # to definitions.SOURCE_TYPE_.
        if source_type in [
                source_scanner.SourceScannerContext.SOURCE_TYPE_DIRECTORY,
                source_scanner.SourceScannerContext.
                SOURCE_TYPE_STORAGE_MEDIA_DEVICE, source_scanner.
                SourceScannerContext.SOURCE_TYPE_STORAGE_MEDIA_IMAGE
        ]:
            self.SetEnablePreprocessing(True)
        else:
            self.SetEnablePreprocessing(False)

        self._CheckStorageFile(self._storage_file_path)

        self._single_process_mode = single_process_mode
        # TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_
        # to definitions.SOURCE_TYPE_.
        if source_type == source_scanner.SourceScannerContext.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            self._single_process_mode = True

        if self._single_process_mode:
            self._engine = single_process.SingleProcessEngine(self._queue_size)
        else:
            self._engine = multi_process.MultiProcessEngine(
                maximum_number_of_queued_items=self._queue_size)

        self._engine.SetEnableDebugOutput(self._debug_mode)
        self._engine.SetEnableProfiling(
            self._enable_profiling,
            profiling_sample_rate=self._profiling_sample_rate,
            profiling_type=self._profiling_type)

        pre_obj = self._PreprocessSource(source_path_specs, source_type)

        self._operating_system = getattr(pre_obj, u'guessed_os', None)

        if not parser_filter_string:
            guessed_os = self._operating_system
            os_version = getattr(pre_obj, u'osversion', u'')
            parser_filter_string = self._GetParserFilterPreset(
                os_guess=guessed_os, os_version=os_version)

            if parser_filter_string:
                logging.info(
                    u'Parser filter expression changed to: {0:s}'.format(
                        parser_filter_string))

        self._parser_names = []
        for _, parser_class in parsers_manager.ParsersManager.GetParsers(
                parser_filter_string=parser_filter_string):
            self._parser_names.append(parser_class.NAME)

        if u'filestat' in self._parser_names:
            include_directory_stat = True
        else:
            include_directory_stat = False

        self._hasher_names = []
        hasher_manager = hashers_manager.HashersManager
        for hasher_name in hasher_manager.GetHasherNamesFromString(
                hasher_names_string=hasher_names_string):
            self._hasher_names.append(hasher_name)

        self._PreprocessSetTimezone(pre_obj, timezone=timezone)

        if filter_file:
            filter_find_specs = engine_utils.BuildFindSpecsFromFile(
                filter_file, pre_obj=pre_obj)
        else:
            filter_find_specs = None

        self._PreprocessSetCollectionInformation(
            pre_obj,
            source_type,
            self._engine,
            filter_file=filter_file,
            parser_filter_string=parser_filter_string,
            preferred_encoding=preferred_encoding)

        if self._output_module:
            storage_writer = storage.BypassStorageWriter(
                self._engine.event_object_queue,
                self._storage_file_path,
                output_module_string=self._output_module,
                pre_obj=pre_obj)
        else:
            storage_writer = storage.FileStorageWriter(
                self._engine.event_object_queue,
                self._storage_file_path,
                buffer_size=self._buffer_size,
                pre_obj=pre_obj,
                serializer_format=storage_serializer_format)

            storage_writer.SetEnableProfiling(
                self._enable_profiling, profiling_type=self._profiling_type)

        processing_status = None
        try:
            if self._single_process_mode:
                logging.debug(u'Starting extraction in single process mode.')

                processing_status = self._engine.ProcessSources(
                    source_path_specs,
                    storage_writer,
                    filter_find_specs=filter_find_specs,
                    filter_object=self._filter_object,
                    hasher_names_string=hasher_names_string,
                    include_directory_stat=include_directory_stat,
                    mount_path=self._mount_path,
                    parser_filter_string=parser_filter_string,
                    process_archive_files=self._process_archive_files,
                    resolver_context=self._resolver_context,
                    status_update_callback=status_update_callback,
                    text_prepend=self._text_prepend)

            else:
                logging.debug(u'Starting extraction in multi process mode.')

                # TODO: pass number_of_extraction_workers.
                processing_status = self._engine.ProcessSources(
                    source_path_specs,
                    storage_writer,
                    enable_sigsegv_handler=enable_sigsegv_handler,
                    filter_find_specs=filter_find_specs,
                    filter_object=self._filter_object,
                    hasher_names_string=hasher_names_string,
                    include_directory_stat=include_directory_stat,
                    mount_path=self._mount_path,
                    parser_filter_string=parser_filter_string,
                    process_archive_files=self._process_archive_files,
                    status_update_callback=status_update_callback,
                    show_memory_usage=self._show_worker_memory_information,
                    text_prepend=self._text_prepend)

        except KeyboardInterrupt:
            self._CleanUpAfterAbort()
            raise errors.UserAbort

        # TODO: check if this still works and if still needed.
        except Exception as exception:
            if not self._single_process_mode:
                raise

            # The tool should generally not be run in single process mode
            # for other reasons than to debug. Hence the general error
            # catching.
            logging.error(
                u'An uncaught exception occurred: {0:s}.\n{1:s}'.format(
                    exception, traceback.format_exc()))
            if self._debug_mode:
                pdb.post_mortem()

        return processing_status
示例#9
0
  def ProcessSource(
      self, filter_file=None, parser_filter_string=None,
      hasher_names_string=None,
      storage_serializer_format=definitions.SERIALIZER_FORMAT_PROTOBUF,
      timezone=pytz.UTC):
    """Processes the source.

    Args:
      filter_file: a path to a file that contains find specifications.
                   The default is None.
      parser_filter_string: optional parser filter string. The default is None.
      hasher_names_string: optional comma separated string of names of
                           hashers to enable. The default is None.
      storage_serializer_format: optional storage serializer format.
                                 The default is protobuf.
      timezone: optional preferred timezone. The default is UTC.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
                          file system.
      UserAbort: if the user initiated an abort.
    """
    if self.SourceIsDirectory() or self.SourceIsStorageMediaImage():
      # If the source is a directory or a storage media image
      # run pre-processing.
      self._preprocess = True
    else:
      self._preprocess = False

    self._CheckStorageFile(self._storage_file_path)

    if self.SourceIsFile():
      # No need to multi process a single file source.
      self._single_process_mode = True

    if self._single_process_mode:
      self._engine = self._InitializeSingleProcessModeEngine()
    else:
      self._engine = self._InitializeMultiProcessModeEngine()

    pre_obj = self._PreprocessSource()

    if not parser_filter_string:
      guessed_os = getattr(pre_obj, u'guessed_os', u'')
      os_version = getattr(pre_obj, u'osversion', u'')
      parser_filter_string = self._GetParserFilterPreset(
          os_guess=guessed_os, os_version=os_version)

      if parser_filter_string:
        logging.info(u'Parser filter expression changed to: {0:s}'.format(
            parser_filter_string))

    self._parser_names = []
    for _, parser_class in parsers_manager.ParsersManager.GetParsers(
        parser_filter_string=parser_filter_string):
      self._parser_names.append(parser_class.NAME)

    if u'filestat' in self._parser_names:
      include_directory_stat = True
    else:
      include_directory_stat = False

    self._hasher_names = []
    hasher_manager = hashers_manager.HashersManager
    for hasher_name in hasher_manager.GetHasherNamesFromString(
        hasher_names_string=hasher_names_string):
      self._hasher_names.append(hasher_name)

    self._PreprocessSetTimezone(pre_obj, timezone=timezone)

    if filter_file:
      filter_find_specs = engine_utils.BuildFindSpecsFromFile(
          filter_file, pre_obj=pre_obj)
    else:
      filter_find_specs = None

    self._PreprocessSetCollectionInformation(
        pre_obj, self._engine, filter_file=filter_file,
        parser_filter_string=parser_filter_string)

    if self._single_process_mode:
      self._ProcessSourceSingleProcessMode(
          pre_obj, filter_find_specs=filter_find_specs,
          include_directory_stat=include_directory_stat,
          parser_filter_string=parser_filter_string,
          hasher_names_string=hasher_names_string,
          storage_serializer_format=storage_serializer_format)
    else:
      self._ProcessSourceMultiProcessMode(
          pre_obj, filter_find_specs=filter_find_specs,
          include_directory_stat=include_directory_stat,
          parser_filter_string=parser_filter_string,
          hasher_names_string=hasher_names_string,
          storage_serializer_format=storage_serializer_format)
示例#10
0
    def _ExtractWithFilter(self, filter_file_path, destination_path):
        """Extracts files using a filter expression.

    This method runs the file extraction process on the image and
    potentially on every VSS if that is wanted.

    Args:
      filter_file_path: The path of the file that contains the filter
                        expressions.
      destination_path: The path where the extracted files should be stored.
    """
        # TODO: add support to handle multiple partitions.
        self._source_path_spec = self.GetSourcePathSpec()

        searcher = self._GetSourceFileSystemSearcher(
            resolver_context=self._resolver_context)

        if self._knowledge_base is None:
            self._Preprocess(searcher)

        if not os.path.isdir(destination_path):
            os.makedirs(destination_path)

        find_specs = engine_utils.BuildFindSpecsFromFile(
            filter_file_path, pre_obj=self._knowledge_base.pre_obj)

        # Save the regular files.
        FileSaver.calc_md5 = self._remove_duplicates

        for path_spec in searcher.Find(find_specs=find_specs):
            FileSaver.WriteFile(path_spec, destination_path)

        if self._process_vss and self._vss_stores:
            volume_path_spec = self._source_path_spec.parent

            logging.info(u'Extracting files from VSS.')
            vss_path_spec = path_spec_factory.Factory.NewPathSpec(
                dfvfs_definitions.TYPE_INDICATOR_VSHADOW,
                location=u'/',
                parent=volume_path_spec)

            vss_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
                vss_path_spec)

            number_of_vss = vss_file_entry.number_of_sub_file_entries

            # In plaso 1 represents the first store index in dfvfs and pyvshadow 0
            # represents the first store index so 1 is subtracted.
            vss_store_range = [store_nr - 1 for store_nr in self._vss_stores]

            for store_index in vss_store_range:
                logging.info(
                    u'Extracting files from VSS {0:d} out of {1:d}'.format(
                        store_index + 1, number_of_vss))

                vss_path_spec = path_spec_factory.Factory.NewPathSpec(
                    dfvfs_definitions.TYPE_INDICATOR_VSHADOW,
                    store_index=store_index,
                    parent=volume_path_spec)
                path_spec = path_spec_factory.Factory.NewPathSpec(
                    dfvfs_definitions.TYPE_INDICATOR_TSK,
                    location=u'/',
                    parent=vss_path_spec)

                filename_prefix = 'vss_{0:d}'.format(store_index)

                file_system = path_spec_resolver.Resolver.OpenFileSystem(
                    path_spec, resolver_context=self._resolver_context)
                searcher = file_system_searcher.FileSystemSearcher(
                    file_system, vss_path_spec)

                for path_spec in searcher.Find(find_specs=find_specs):
                    FileSaver.WriteFile(path_spec,
                                        destination_path,
                                        filename_prefix=filename_prefix)
示例#11
0
    def ProcessSources(self,
                       source_path_specs,
                       source_type,
                       command_line_arguments=None,
                       enable_sigsegv_handler=False,
                       filter_file=None,
                       hasher_names_string=None,
                       number_of_extraction_workers=0,
                       preferred_encoding=u'utf-8',
                       parser_filter_expression=None,
                       single_process_mode=False,
                       status_update_callback=None,
                       timezone=pytz.UTC):
        """Processes the sources.

    Args:
      source_path_specs: list of path specifications (instances of
                         dfvfs.PathSpec) to process.
      source_type: the dfVFS source type definition.
      command_line_arguments: optional string of the command line arguments or
                              None if not set.
      enable_sigsegv_handler: optional boolean value to indicate the SIGSEGV
                              handler should be enabled.
      filter_file: optional path to a file that contains find specifications.
      hasher_names_string: optional comma separated string of names of
                           hashers to enable.
      number_of_extraction_workers: the number of extraction workers to run. If
                                    0, the number will be selected
                                    automatically.
      preferred_encoding: optional preferred encoding.
      parser_filter_expression: optional string containing the parser filter
                                expression, where None represents all parsers
                                and plugins.
      single_process_mode: optional boolean value to indicate if the front-end
                           should run in single process mode.
      status_update_callback: optional callback function for status updates.
      timezone: optional preferred timezone.

    Returns:
      The processing status (instance of ProcessingStatus) or None.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
                          file system.
      UserAbort: if the user initiated an abort.
    """
        # If the source is a directory or a storage media image
        # run pre-processing.
        if source_type in [
                dfvfs_definitions.SOURCE_TYPE_DIRECTORY,
                dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_DEVICE,
                dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_IMAGE
        ]:
            self.SetEnablePreprocessing(True)
        else:
            self.SetEnablePreprocessing(False)

        self._CheckStorageFile(self._storage_file_path)

        self._single_process_mode = single_process_mode
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            self._single_process_mode = True

        if self._single_process_mode:
            self._engine = single_process.SingleProcessEngine(self._queue_size)
        else:
            self._engine = multi_process.MultiProcessEngine(
                maximum_number_of_queued_items=self._queue_size,
                use_zeromq=self._use_zeromq)

        self._engine.SetEnableDebugOutput(self._debug_mode)
        self._engine.SetEnableProfiling(
            self._enable_profiling,
            profiling_sample_rate=self._profiling_sample_rate,
            profiling_type=self._profiling_type)

        pre_obj = self._PreprocessSources(source_path_specs, source_type)

        self._operating_system = getattr(pre_obj, u'guessed_os', None)

        if not parser_filter_expression:
            guessed_os = self._operating_system
            os_version = getattr(pre_obj, u'osversion', u'')
            parser_filter_expression = self._GetParserFilterPreset(
                os_guess=guessed_os, os_version=os_version)

            if parser_filter_expression:
                logging.info(
                    u'Parser filter expression changed to: {0:s}'.format(
                        parser_filter_expression))

        self._parser_names = []
        for _, parser_class in parsers_manager.ParsersManager.GetParsers(
                parser_filter_expression=parser_filter_expression):
            self._parser_names.append(parser_class.NAME)

        self._hasher_names = []
        hasher_manager = hashers_manager.HashersManager
        for hasher_name in hasher_manager.GetHasherNamesFromString(
                hasher_names_string=hasher_names_string):
            self._hasher_names.append(hasher_name)

        self._PreprocessSetTimezone(pre_obj, timezone=timezone)

        if filter_file:
            filter_find_specs = engine_utils.BuildFindSpecsFromFile(
                filter_file, pre_obj=pre_obj)
        else:
            filter_find_specs = None

        # TODO: deprecate the need for this function.
        self._PreprocessSetCollectionInformation(pre_obj)

        session_start = self._CreateSessionStart(
            command_line_arguments=command_line_arguments,
            filter_file=filter_file,
            parser_filter_expression=parser_filter_expression,
            preferred_encoding=preferred_encoding)

        storage_writer = storage_zip_file.ZIPStorageFileWriter(
            self._storage_file_path, pre_obj, buffer_size=self._buffer_size)

        storage_writer.SetEnableProfiling(self._enable_profiling,
                                          profiling_type=self._profiling_type)

        storage_writer.Open()
        storage_writer.WriteSessionStart(session_start)

        processing_status = None
        try:
            if self._single_process_mode:
                logging.debug(u'Starting extraction in single process mode.')

                processing_status = self._engine.ProcessSources(
                    source_path_specs,
                    storage_writer,
                    filter_find_specs=filter_find_specs,
                    filter_object=self._filter_object,
                    hasher_names_string=hasher_names_string,
                    mount_path=self._mount_path,
                    parser_filter_expression=parser_filter_expression,
                    process_archive_files=self._process_archive_files,
                    resolver_context=self._resolver_context,
                    status_update_callback=status_update_callback,
                    text_prepend=self._text_prepend)

            else:
                logging.debug(u'Starting extraction in multi process mode.')

                # TODO: pass number_of_extraction_workers.
                processing_status = self._engine.ProcessSources(
                    source_path_specs,
                    storage_writer,
                    enable_sigsegv_handler=enable_sigsegv_handler,
                    filter_find_specs=filter_find_specs,
                    filter_object=self._filter_object,
                    hasher_names_string=hasher_names_string,
                    mount_path=self._mount_path,
                    number_of_extraction_workers=number_of_extraction_workers,
                    parser_filter_expression=parser_filter_expression,
                    process_archive_files=self._process_archive_files,
                    status_update_callback=status_update_callback,
                    show_memory_usage=self._show_worker_memory_information,
                    text_prepend=self._text_prepend)

        except KeyboardInterrupt:
            self._CleanUpAfterAbort()
            raise errors.UserAbort

        # TODO: check if this still works and if still needed.
        except Exception as exception:  # pylint: disable=broad-except
            if not self._single_process_mode:
                raise

            # The tool should generally not be run in single process mode
            # for other reasons than to debug. Hence the general error
            # catching.
            logging.error(
                u'An uncaught exception occurred: {0:s}.\n{1:s}'.format(
                    exception, traceback.format_exc()))
            if self._debug_mode:
                pdb.post_mortem()

        return processing_status
示例#12
0
  def _StartSingleThread(self, options):
    """Starts everything up in a single process.

    This should not normally be used, since running the tool in a single
    process buffers up everything into memory until the storage is called.

    Just to make it clear, this starts up the collection, completes that
    before calling the worker that extracts all EventObjects and stores
    them in memory. when that is all done, the storage function is called
    to drain the buffer. Hence the tool's excessive use of memory in this
    mode and the reason why it is not suggested to be used except for
    debugging reasons (and mostly to get into the debugger).

    This is therefore mostly useful during debugging sessions for some
    limited parsing.

    Args:
      options: the command line arguments (instance of argparse.Namespace).
    """
    self._engine = single_process.SingleProcessEngine(self._queue_size)
    self._engine.SetEnableDebugOutput(self._debug_mode)
    self._engine.SetEnableProfiling(
        self._enable_profiling,
        profiling_sample_rate=self._profiling_sample_rate)
    self._engine.SetProcessArchiveFiles(self._process_archive_files)

    if self._filter_object:
      self._engine.SetFilterObject(self._filter_object)

    if self._mount_path:
      self._engine.SetMountPath(self._mount_path)

    if self._text_prepend:
      self._engine.SetTextPrepend(self._text_prepend)

    # TODO: add support to handle multiple partitions.
    self._engine.SetSource(
        self.GetSourcePathSpec(), resolver_context=self._resolver_context)

    logging.debug(u'Starting preprocessing.')
    pre_obj = self.PreprocessSource(options)

    logging.debug(u'Preprocessing done.')

    # TODO: make sure parsers option is not set by preprocessing.
    parser_filter_string = getattr(options, 'parsers', '')

    self._parser_names = []
    for _, parser_class in parsers_manager.ParsersManager.GetParsers(
        parser_filter_string=parser_filter_string):
      self._parser_names.append(parser_class.NAME)

    self._PreprocessSetCollectionInformation(options, pre_obj)

    if 'filestat' in self._parser_names:
      include_directory_stat = True
    else:
      include_directory_stat = False

    filter_file = getattr(options, 'file_filter', None)
    if filter_file:
      filter_find_specs = engine_utils.BuildFindSpecsFromFile(
          filter_file, pre_obj=pre_obj)
    else:
      filter_find_specs = None

    self._collector = self._engine.CreateCollector(
        include_directory_stat, vss_stores=self._vss_stores,
        filter_find_specs=filter_find_specs,
        resolver_context=self._resolver_context)

    self._DebugPrintCollector(options)

    if self._output_module:
      storage_writer = storage.BypassStorageWriter(
          self._engine.storage_queue, self._storage_file_path,
          output_module_string=self._output_module, pre_obj=pre_obj)
    else:
      storage_writer = storage.StorageFileWriter(
          self._engine.storage_queue, self._storage_file_path,
          buffer_size=self._buffer_size, pre_obj=pre_obj,
          serializer_format=self._storage_serializer_format)

    hasher_names_string = getattr(options, u'hashers', u'')

    try:
      self._engine.ProcessSource(
          self._collector, storage_writer,
          parser_filter_string=parser_filter_string,
          hasher_names_string=hasher_names_string)

    except KeyboardInterrupt:
      self._CleanUpAfterAbort()
      raise errors.UserAbort(u'Process source aborted.')

    finally:
      self._resolver_context.Empty()
示例#13
0
  def _ProcessSourceMultiProcessMode(self, options):
    """Processes the source in a multiple process.

    Multiprocessing is used to start up separate processes.

    Args:
      options: the command line arguments (instance of argparse.Namespace).
    """
    # TODO: replace by an option.
    start_collection_process = True

    self._number_of_worker_processes = getattr(options, 'workers', 0)

    logging.info(u'Starting extraction in multi process mode.')

    self._engine = multi_process.MultiProcessEngine(
        maximum_number_of_queued_items=self._queue_size)

    self._engine.SetEnableDebugOutput(self._debug_mode)
    self._engine.SetEnableProfiling(
        self._enable_profiling,
        profiling_sample_rate=self._profiling_sample_rate)
    self._engine.SetProcessArchiveFiles(self._process_archive_files)

    if self._filter_object:
      self._engine.SetFilterObject(self._filter_object)

    if self._mount_path:
      self._engine.SetMountPath(self._mount_path)

    if self._text_prepend:
      self._engine.SetTextPrepend(self._text_prepend)
    # TODO: add support to handle multiple partitions.
    self._engine.SetSource(
        self.GetSourcePathSpec(), resolver_context=self._resolver_context)

    logging.debug(u'Starting preprocessing.')
    pre_obj = self.PreprocessSource(options)
    logging.debug(u'Preprocessing done.')

    # TODO: make sure parsers option is not set by preprocessing.
    parser_filter_string = getattr(options, 'parsers', '')

    self._parser_names = []
    for _, parser_class in parsers_manager.ParsersManager.GetParsers(
        parser_filter_string=parser_filter_string):
      self._parser_names.append(parser_class.NAME)

    hasher_names_string = getattr(options, u'hashers', u'')

    self._hasher_names = []
    hasher_manager = hashers_manager.HashersManager
    for hasher_name in hasher_manager.GetHasherNamesFromString(
        hasher_names_string=hasher_names_string):
      self._hasher_names.append(hasher_name)

    self._PreprocessSetCollectionInformation(options, pre_obj)

    if 'filestat' in self._parser_names:
      include_directory_stat = True
    else:
      include_directory_stat = False

    filter_file = getattr(options, 'file_filter', None)
    if filter_file:
      filter_find_specs = engine_utils.BuildFindSpecsFromFile(
          filter_file, pre_obj=pre_obj)
    else:
      filter_find_specs = None

    if start_collection_process:
      resolver_context = context.Context()
    else:
      resolver_context = self._resolver_context

    # TODO: create multi process collector.
    self._collector = self._engine.CreateCollector(
        include_directory_stat, vss_stores=self._vss_stores,
        filter_find_specs=filter_find_specs, resolver_context=resolver_context)

    self._DebugPrintCollector(options)

    if self._output_module:
      storage_writer = storage.BypassStorageWriter(
          self._engine.storage_queue, self._storage_file_path,
          output_module_string=self._output_module, pre_obj=pre_obj)
    else:
      storage_writer = storage.StorageFileWriter(
          self._engine.storage_queue, self._storage_file_path,
          buffer_size=self._buffer_size, pre_obj=pre_obj,
          serializer_format=self._storage_serializer_format)

    try:
      self._engine.ProcessSource(
          self._collector, storage_writer,
          parser_filter_string=parser_filter_string,
          hasher_names_string=hasher_names_string,
          number_of_extraction_workers=self._number_of_worker_processes,
          have_collection_process=start_collection_process,
          have_foreman_process=self._run_foreman,
          show_memory_usage=self._show_worker_memory_information)

    except KeyboardInterrupt:
      self._CleanUpAfterAbort()
      raise errors.UserAbort(u'Process source aborted.')
示例#14
0
    def ProcessSources(self,
                       session,
                       storage_writer,
                       source_path_specs,
                       source_type,
                       enable_sigsegv_handler=False,
                       force_preprocessing=False,
                       hasher_names_string=None,
                       number_of_extraction_workers=0,
                       process_archives=False,
                       process_compressed_streams=True,
                       single_process_mode=False,
                       status_update_callback=None,
                       temporary_directory=None,
                       timezone=u'UTC',
                       yara_rules_string=None):
        """Processes the sources.

    Args:
      session (Session): session the storage changes are part of.
      storage_writer (StorageWriter): storage writer.
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
      source_type (str): the dfVFS source type definition.
      enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler
          should be enabled.
      force_preprocessing (Optional[bool]): True if preprocessing should be
          forced.
      hasher_names_string (Optional[str]): comma separated string of names
          of hashers to use during processing.
      number_of_extraction_workers (Optional[int]): number of extraction
          workers to run. If 0, the number will be selected automatically.
      process_archives (Optional[bool]): True if archive files should be
          scanned for file entries.
      process_compressed_streams (Optional[bool]): True if file content in
          compressed streams should be processed.
      single_process_mode (Optional[bool]): True if the front-end should
          run in single process mode.
      status_update_callback (Optional[function]): callback function for status
          updates.
      temporary_directory (Optional[str]): path of the directory for temporary
          files.
      timezone (Optional[datetime.tzinfo]): timezone.
      yara_rules_string (Optional[str]): unparsed yara rule definitions.

    Returns:
      ProcessingStatus: processing status or None.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
                          file system.
      UserAbort: if the user initiated an abort.
    """
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            single_process_mode = True

        engine = self._CreateEngine(single_process_mode)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if force_preprocessing or source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(engine, source_path_specs)

        if not session.parser_filter_expression:
            operating_system = engine.knowledge_base.GetValue(
                u'operating_system')
            operating_system_product = engine.knowledge_base.GetValue(
                u'operating_system_product')
            operating_system_version = engine.knowledge_base.GetValue(
                u'operating_system_version')
            session.parser_filter_expression = self._GetParserFilterPreset(
                operating_system, operating_system_product,
                operating_system_version)

            if session.parser_filter_expression:
                logging.info(
                    u'Parser filter expression changed to: {0:s}'.format(
                        session.parser_filter_expression))

        self._parser_names = []
        for _, parser_class in parsers_manager.ParsersManager.GetParsers(
                parser_filter_expression=session.parser_filter_expression):
            self._parser_names.append(parser_class.NAME)

        self._SetTimezone(engine.knowledge_base, timezone)

        if session.filter_file:
            path_attributes = engine.knowledge_base.GetPathAttributes()
            filter_find_specs = engine_utils.BuildFindSpecsFromFile(
                session.filter_file, path_attributes=path_attributes)
        else:
            filter_find_specs = None

        processing_status = None
        if single_process_mode:
            logging.debug(u'Starting extraction in single process mode.')

            # TODO: check if preferred_encoding should be passed.
            processing_status = engine.ProcessSources(
                source_path_specs,
                storage_writer,
                self._resolver_context,
                filter_find_specs=filter_find_specs,
                filter_object=self._filter_object,
                hasher_names_string=hasher_names_string,
                mount_path=self._mount_path,
                parser_filter_expression=session.parser_filter_expression,
                preferred_year=session.preferred_year,
                process_archives=process_archives,
                process_compressed_streams=process_compressed_streams,
                status_update_callback=status_update_callback,
                temporary_directory=temporary_directory,
                text_prepend=self._text_prepend,
                yara_rules_string=yara_rules_string)

        else:
            logging.debug(u'Starting extraction in multi process mode.')

            # TODO: check if preferred_encoding should be passed.
            processing_status = engine.ProcessSources(
                session.identifier,
                source_path_specs,
                storage_writer,
                enable_sigsegv_handler=enable_sigsegv_handler,
                filter_find_specs=filter_find_specs,
                filter_object=self._filter_object,
                hasher_names_string=hasher_names_string,
                mount_path=self._mount_path,
                number_of_worker_processes=number_of_extraction_workers,
                parser_filter_expression=session.parser_filter_expression,
                preferred_year=session.preferred_year,
                process_archives=process_archives,
                process_compressed_streams=process_compressed_streams,
                status_update_callback=status_update_callback,
                show_memory_usage=self._show_worker_memory_information,
                temporary_directory=temporary_directory,
                text_prepend=self._text_prepend,
                yara_rules_string=yara_rules_string)

        return processing_status