Exemplo n.º 1
0
  def _GetVolumeVssStoreIdentifiers(self, scan_context, vss_stores=None):
    """Determines the VSS store identifiers.

    Args:
      scan_context: the scan context (instance of dfvfs.ScanContext).
      vss_stores: Optional list of preferred VSS stored identifiers. The
                  default is None.

    Raises:
      SourceScannerError: if the format of or within the source
                          is not supported or the the scan context
                          is invalid.
    """
    if (not scan_context or not scan_context.last_scan_node or
        not scan_context.last_scan_node.path_spec):
      raise errors.SourceScannerError(u'Invalid scan context.')

    volume_system = vshadow_volume_system.VShadowVolumeSystem()
    volume_system.Open(scan_context.last_scan_node.path_spec)

    volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
        volume_system)
    if not volume_identifiers:
      return

    try:
      self.vss_stores = self._GetVssStoreIdentifiersFromUser(
          volume_system, volume_identifiers, vss_stores=vss_stores)
    except KeyboardInterrupt:
      raise errors.UserAbort(u'File system scan aborted.')

    return
Exemplo n.º 2
0
    def _GetVSSStoreIdentifiers(self, scan_node, vss_stores=None):
        """Determines the VSS store identifiers.

    Args:
      scan_node: the scan node (instance of dfvfs.ScanNode).
      vss_stores: Optional list of preferred VSS store identifiers. The
                  default is None.

    Returns:
      A list of VSS store identifiers.

    Raises:
      SourceScannerError: if the format of or within the source
                          is not supported or the the scan node is invalid.
    """
        if not scan_node or not scan_node.path_spec:
            raise errors.SourceScannerError(u'Invalid scan node.')

        volume_system = vshadow_volume_system.VShadowVolumeSystem()
        volume_system.Open(scan_node.path_spec)

        volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
            volume_system)
        if not volume_identifiers:
            return []

        try:
            selected_store_identifiers = self._PromptUserForVSSStoreIdentifiers(
                volume_system, volume_identifiers, vss_stores=vss_stores)
        except KeyboardInterrupt:
            raise errors.UserAbort(u'File system scan aborted.')

        return selected_store_identifiers
Exemplo n.º 3
0
  def _StartSingleThread(
      self, pre_obj, filter_find_specs=None, include_directory_stat=True,
      parser_filter_string=None, hasher_names_string=None,
      storage_serializer_format=definitions.SERIALIZER_FORMAT_PROTOBUF):
    """Starts everything up in a single process.

    This should not normally be used, since running the tool in a single
    process buffers up everything into memory until the storage is called.

    Just to make it clear, this starts up the collection, completes that
    before calling the worker that extracts all EventObjects and stores
    them in memory. when that is all done, the storage function is called
    to drain the buffer. Hence the tool's excessive use of memory in this
    mode and the reason why it is not suggested to be used except for
    debugging reasons (and mostly to get into the debugger).

    This is therefore mostly useful during debugging sessions for some
    limited parsing.

    Args:
      pre_obj: the preprocess object (instance of PreprocessObject).
      filter_find_specs: optional list of filter find specifications (instances
                         of dfvfs.FindSpec). The default is None.
      include_directory_stat: Boolean value to indicate whether directory
                              stat information should be collected. The default
                              is True.
      parser_filter_string: optional parser filter string. The default is None.
      hasher_names_string: optional comma separated string of names of
                           hashers to enable. The default is None.
      storage_serializer_format: optional storage serializer format.
                                 The default is protobuf.
    """
    self._collector = self._engine.CreateCollector(
        include_directory_stat, vss_stores=self.vss_stores,
        filter_find_specs=filter_find_specs,
        resolver_context=self._resolver_context)

    if self._output_module:
      storage_writer = storage.BypassStorageWriter(
          self._engine.storage_queue, self._storage_file_path,
          output_module_string=self._output_module, pre_obj=pre_obj)
    else:
      storage_writer = storage.StorageFileWriter(
          self._engine.storage_queue, self._storage_file_path,
          buffer_size=self._buffer_size, pre_obj=pre_obj,
          serializer_format=storage_serializer_format)

      storage_writer.SetEnableProfiling(
          self._enable_profiling,
          profiling_type=self._profiling_type)

    try:
      self._engine.ProcessSource(
          self._collector, storage_writer,
          parser_filter_string=parser_filter_string,
          hasher_names_string=hasher_names_string)

    except KeyboardInterrupt:
      self._CleanUpAfterAbort()
      raise errors.UserAbort(u'Process source aborted.')
Exemplo n.º 4
0
    def _GetTSKPartitionIdentifiers(self, scan_node):
        """Determines the TSK partition identifiers.

    This method first checks for the preferred partition number, then
    falls back to prompt the user if no usable preferences were specified.

    Args:
      scan_node (dfvfs.SourceScanNode): scan node.

    Returns:
      list[str]: TSK partition identifiers.

    Raises:
      RuntimeError: if the volume for a specific identifier cannot be
          retrieved.
      SourceScannerError: if the format of or within the source
          is not supported or the the scan node is invalid.
      UserAbort: if the user requested to abort.
    """
        if not scan_node or not scan_node.path_spec:
            raise errors.SourceScannerError('Invalid scan node.')

        volume_system = tsk_volume_system.TSKVolumeSystem()
        volume_system.Open(scan_node.path_spec)

        volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
            volume_system)
        if not volume_identifiers:
            return []

        # TODO: refactor self._partitions to use scan options.
        if self._partitions:
            if self._partitions == 'all':
                partitions = range(1, volume_system.number_of_volumes + 1)
            else:
                partitions = self._ParseVolumeIdentifiersString(
                    self._partitions, prefix='p')

            selected_volume_identifiers = self._NormalizedVolumeIdentifiers(
                volume_system, partitions, prefix='p')

            if not set(selected_volume_identifiers).difference(
                    volume_identifiers):
                return selected_volume_identifiers

        if len(volume_identifiers) > 1:
            if self._unattended_mode:
                raise errors.SourceScannerError(
                    'More than 1 parition found but no paritions specified.')

            try:
                volume_identifiers = self._PromptUserForPartitionIdentifiers(
                    volume_system, volume_identifiers)
            except KeyboardInterrupt:
                raise errors.UserAbort('File system scan aborted.')

        return self._NormalizedVolumeIdentifiers(volume_system,
                                                 volume_identifiers,
                                                 prefix='p')
Exemplo n.º 5
0
  def _ProcessSourceMultiProcessMode(
      self, pre_obj, filter_find_specs=None, include_directory_stat=True,
      number_of_worker_processes=0, parser_filter_string=None,
      hasher_names_string=None,
      storage_serializer_format=definitions.SERIALIZER_FORMAT_PROTOBUF):
    """Processes the source with multiple processes.

    Args:
      pre_obj: the preprocess object (instance of PreprocessObject).
      filter_find_specs: optional list of filter find specifications (instances
                         of dfvfs.FindSpec). The default is None.
      include_directory_stat: Boolean value to indicate whether directory
                              stat information should be collected. The default
                              is True.
      number_of_worker_processes: optional number of worker processes.
                                  The default is 0 which represents deterimine
                                  automatically.
      parser_filter_string: optional parser filter string. The default is None.
      hasher_names_string: optional comma separated string of names of
                           hashers to enable. The default is None.
      storage_serializer_format: optional storage serializer format.
                                 The default is protobuf.
    """
    logging.info(u'Starting extraction in multi process mode.')

    resolver_context = context.Context()

    # TODO: create multi process collector.
    self._collector = self._engine.CreateCollector(
        include_directory_stat, vss_stores=self.vss_stores,
        filter_find_specs=filter_find_specs, resolver_context=resolver_context)

    if self._output_module:
      storage_writer = storage.BypassStorageWriter(
          self._engine.storage_queue, self._storage_file_path,
          output_module_string=self._output_module, pre_obj=pre_obj)
    else:
      storage_writer = storage.StorageFileWriter(
          self._engine.storage_queue, self._storage_file_path,
          buffer_size=self._buffer_size, pre_obj=pre_obj,
          serializer_format=storage_serializer_format)

      storage_writer.SetEnableProfiling(
          self._enable_profiling,
          profiling_type=self._profiling_type)

    try:
      self._engine.ProcessSource(
          self._collector, storage_writer,
          parser_filter_string=parser_filter_string,
          hasher_names_string=hasher_names_string,
          number_of_extraction_workers=number_of_worker_processes,
          show_memory_usage=self._show_worker_memory_information)

    except KeyboardInterrupt:
      self._CleanUpAfterAbort()
      raise errors.UserAbort(u'Process source aborted.')
Exemplo n.º 6
0
    def _GetLVMVolumeIdentifiers(self, scan_node):
        """Determines the LVM volume identifiers.

    Args:
      scan_node (dfvfs.SourceScanNode): scan node.

    Returns:
      list[str]: LVM volume identifiers.

    Raises:
      SourceScannerError: if the scan node is invalid or more than 1 volume
          was found but no volumes were specified.
      UserAbort: if the user requested to abort.
    """
        if not scan_node or not scan_node.path_spec:
            raise errors.SourceScannerError('Invalid scan node.')

        volume_system = lvm_volume_system.LVMVolumeSystem()
        volume_system.Open(scan_node.path_spec)

        volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
            volume_system)
        if not volume_identifiers:
            return []

        # TODO: refactor self._volumes to use scan options.
        if self._volumes:
            if self._volumes == 'all':
                volumes = volume_system.volume_identifiers
            else:
                volumes = self._mediator.ParseVolumeIdentifiersString(
                    self._volumes, prefix='lvm')

            selected_volume_identifiers = self._NormalizedVolumeIdentifiers(
                volume_system, volumes, prefix='lvm')

            if not set(selected_volume_identifiers).difference(
                    volume_identifiers):
                return selected_volume_identifiers

        if len(volume_identifiers) > 1:
            if self._unattended_mode:
                raise errors.SourceScannerError(
                    'More than 1 volume found but no volumes specified.')

            try:
                volume_identifiers = self._mediator.GetLVMVolumeIdentifiers(
                    volume_system, volume_identifiers)
            except KeyboardInterrupt:
                raise errors.UserAbort('File system scan aborted.')

        return self._NormalizedVolumeIdentifiers(volume_system,
                                                 volume_identifiers,
                                                 prefix='lvm')
Exemplo n.º 7
0
    def _GetAPFSVolumeIdentifiers(self, scan_node):
        """Determines the APFS volume identifiers.

    Args:
      scan_node (dfvfs.SourceScanNode): scan node.

    Returns:
      list[str]: APFS volume identifiers.

    Raises:
      SourceScannerError: if the format of or within the source is not
          supported or the the scan node is invalid.
      UserAbort: if the user requested to abort.
    """
        if not scan_node or not scan_node.path_spec:
            raise errors.SourceScannerError('Invalid scan node.')

        volume_system = apfs_volume_system.APFSVolumeSystem()
        volume_system.Open(scan_node.path_spec)

        volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
            volume_system)
        if not volume_identifiers:
            return []

        # TODO: refactor self._volumes to use scan options.
        if self._volumes:
            if self._volumes == 'all':
                volumes = range(1, volume_system.number_of_volumes + 1)
            else:
                volumes = self._ParseVolumeIdentifiersString(self._volumes,
                                                             prefix='apfs')

            selected_volume_identifiers = self._NormalizedVolumeIdentifiers(
                volume_system, volumes, prefix='apfs')

            if not set(selected_volume_identifiers).difference(
                    volume_identifiers):
                return selected_volume_identifiers

        if len(volume_identifiers) > 1:
            if self._unattended_mode:
                raise errors.SourceScannerError(
                    'More than 1 volume found but no volumes specified.')

            try:
                volume_identifiers = self._PromptUserForAPFSVolumeIdentifiers(
                    volume_system, volume_identifiers)
            except KeyboardInterrupt:
                raise errors.UserAbort('File system scan aborted.')

        return self._NormalizedVolumeIdentifiers(volume_system,
                                                 volume_identifiers,
                                                 prefix='apfs')
Exemplo n.º 8
0
    def _GetVSSStoreIdentifiers(self, scan_node):
        """Determines the VSS store identifiers.

    Args:
      scan_node (dfvfs.SourceScanNode): scan node.

    Returns:
      list[str]: VSS store identifiers.

    Raises:
      SourceScannerError: if the scan node is invalid.
      UserAbort: if the user requested to abort.
    """
        if not scan_node or not scan_node.path_spec:
            raise errors.SourceScannerError('Invalid scan node.')

        volume_system = vshadow_volume_system.VShadowVolumeSystem()
        volume_system.Open(scan_node.path_spec)

        volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
            volume_system)
        if not volume_identifiers:
            return []

        # TODO: refactor to use scan options.
        if self._vss_stores:
            if self._vss_stores == 'all':
                vss_stores = volume_system.volume_identifiers
            else:
                vss_stores = self._mediator.ParseVolumeIdentifiersString(
                    self._vss_stores, prefix='vss')

            selected_volume_identifiers = self._NormalizedVolumeIdentifiers(
                volume_system, vss_stores, prefix='vss')

            if not set(selected_volume_identifiers).difference(
                    volume_identifiers):
                return selected_volume_identifiers

        if self._unattended_mode:
            return []

        try:
            volume_identifiers = self._mediator.GetVSSStoreIdentifiers(
                volume_system, volume_identifiers)

        except KeyboardInterrupt:
            raise errors.UserAbort('File system scan aborted.')

        return self._NormalizedVolumeIdentifiers(volume_system,
                                                 volume_identifiers,
                                                 prefix='vss')
Exemplo n.º 9
0
    def _GetVSSStoreIdentifiers(self, scan_node):
        """Determines the VSS store identifiers.

    Args:
      scan_node (dfvfs.SourceScanNode): scan node.

    Returns:
      list[str]: VSS store identifiers.

    Raises:
      SourceScannerError: if the format of or within the source is not
          supported or the scan node is invalid.
      UserAbort: if the user requested to abort.
    """
        if not scan_node or not scan_node.path_spec:
            raise errors.SourceScannerError('Invalid scan node.')

        volume_system = vshadow_volume_system.VShadowVolumeSystem()
        volume_system.Open(scan_node.path_spec)

        volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
            volume_system)
        if not volume_identifiers:
            return []

        # TODO: refactor to use scan options.
        if self._vss_stores:
            if self._vss_stores == 'all':
                vss_stores = range(1, volume_system.number_of_volumes + 1)
            else:
                vss_stores = self._vss_stores

            selected_volume_identifiers = self._NormalizedVolumeIdentifiers(
                volume_system, vss_stores, prefix='vss')

            if not set(selected_volume_identifiers).difference(
                    volume_identifiers):
                return selected_volume_identifiers

        try:
            volume_identifiers = self._PromptUserForVSSStoreIdentifiers(
                volume_system, volume_identifiers)

        except KeyboardInterrupt:
            raise errors.UserAbort('File system scan aborted.')

        return self._NormalizedVolumeIdentifiers(volume_system,
                                                 volume_identifiers,
                                                 prefix='vss')
Exemplo n.º 10
0
    def ProcessSources(self):
        """Processes the sources.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        try:
            self.ScanSource(self._source_path)
        except dfvfs_errors.UserAbort as exception:
            raise errors.UserAbort(exception)

        self._output_writer.Write('Export started.\n')

        if not os.path.isdir(self._destination_path):
            os.makedirs(self._destination_path)

        self._Extract(self._source_path_specs,
                      self._destination_path,
                      self._output_writer,
                      self._artifact_filters,
                      self._filter_file,
                      self._artifact_definitions_path,
                      self._custom_artifacts_path,
                      skip_duplicates=self._skip_duplicates)

        json_data = []

        if not self._no_hashes:
            with open(
                    os.path.join(self._destination_path,
                                 self._HASHES_FILENAME), 'w') as write_file:
                for sha256, paths in self._paths_by_hash.items():
                    json_data.append({'sha256': sha256, 'paths': paths})
                json.dump(json_data, write_file)

        self._output_writer.Write('Export completed.\n')
        self._output_writer.Write('\n')
Exemplo n.º 11
0
    def _GetTSKPartitionIdentifiers(self,
                                    scan_node,
                                    partition_string=None,
                                    partition_offset=None):
        """Determines the TSK partition identifiers.

    This method first checks for the preferred partition number, then for
    the preferred partition offset and falls back to prompt the user if
    no usable preferences were specified.

    Args:
      scan_node: the scan node (instance of dfvfs.ScanNode).
      partition_string: Optional preferred partition number string. The default
                        is None.
      partition_offset: Optional preferred partition byte offset. The default
                        is None.

    Returns:
      A list of partition identifiers.

    Raises:
      RuntimeError: if the volume for a specific identifier cannot be
                    retrieved.
      SourceScannerError: if the format of or within the source
                          is not supported or the the scan node is invalid.
    """
        if not scan_node or not scan_node.path_spec:
            raise errors.SourceScannerError(u'Invalid scan node.')

        volume_system = tsk_volume_system.TSKVolumeSystem()
        volume_system.Open(scan_node.path_spec)

        volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
            volume_system)
        if not volume_identifiers:
            self._output_writer.Write(u'[WARNING] No partitions found.\n')
            return

        if partition_string == u'all':
            return volume_identifiers

        if partition_string is not None and not partition_string.startswith(
                u'p'):
            return volume_identifiers

        partition_number = None
        if partition_string:
            try:
                partition_number = int(partition_string[1:], 10)
            except ValueError:
                pass

        if partition_number is not None and partition_number > 0:
            # Plaso uses partition numbers starting with 1 while dfvfs expects
            # the volume index to start with 0.
            volume = volume_system.GetVolumeByIndex(partition_number - 1)
            if volume:
                return [u'p{0:d}'.format(partition_number)]

            self._output_writer.Write(
                u'[WARNING] No such partition: {0:d}.\n'.format(
                    partition_number))

        if partition_offset is not None:
            for volume in volume_system.volumes:
                volume_extent = volume.extents[0]
                if volume_extent.offset == partition_offset:
                    return [volume.identifier]

            self._output_writer.Write(
                (u'[WARNING] No such partition with offset: {0:d} '
                 u'(0x{0:08x}).\n').format(partition_offset))

        if len(volume_identifiers) == 1:
            return volume_identifiers

        try:
            selected_volume_identifier = self._PromptUserForPartitionIdentifier(
                volume_system, volume_identifiers)
        except KeyboardInterrupt:
            raise errors.UserAbort(u'File system scan aborted.')

        if selected_volume_identifier == u'all':
            return volume_identifiers

        return [selected_volume_identifier]
Exemplo n.º 12
0
    def _GetTSKPartitionIdentifiers(self,
                                    scan_node,
                                    partition_offset=None,
                                    partitions=None):
        """Determines the TSK partition identifiers.

    This method first checks for the preferred partition number, then for
    the preferred partition offset and falls back to prompt the user if
    no usable preferences were specified.

    Args:
      scan_node (dfvfs.SourceScanNode): scan node.
      partition_offset (Optional[int]): preferred partition byte offset.
      partitions (Optional[list[str]]): preferred partition identifiers.

    Returns:
      list[str]: partition identifiers.

    Raises:
      RuntimeError: if the volume for a specific identifier cannot be
          retrieved.
      SourceScannerError: if the format of or within the source
          is not supported or the the scan node is invalid.
    """
        if not scan_node or not scan_node.path_spec:
            raise errors.SourceScannerError('Invalid scan node.')

        volume_system = tsk_volume_system.TSKVolumeSystem()
        volume_system.Open(scan_node.path_spec)

        volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
            volume_system)
        if not volume_identifiers:
            self._output_writer.Write('[WARNING] No partitions found.\n')
            return None

        normalized_volume_identifiers = self._GetNormalizedTSKVolumeIdentifiers(
            volume_system, volume_identifiers)

        if partitions:
            if partitions == ['all']:
                partitions = range(1, volume_system.number_of_volumes + 1)

            if not set(partitions).difference(normalized_volume_identifiers):
                return [
                    'p{0:d}'.format(partition_number)
                    for partition_number in partitions
                ]

        if partition_offset is not None:
            for volume in volume_system.volumes:
                volume_extent = volume.extents[0]
                if volume_extent.offset == partition_offset:
                    return [volume.identifier]

            self._output_writer.Write(
                ('[WARNING] No such partition with offset: {0:d} '
                 '(0x{0:08x}).\n').format(partition_offset))

        if len(volume_identifiers) == 1:
            return volume_identifiers

        try:
            selected_volume_identifier = self._PromptUserForPartitionIdentifier(
                volume_system, volume_identifiers)
        except KeyboardInterrupt:
            raise errors.UserAbort('File system scan aborted.')

        if selected_volume_identifier == 'all':
            return volume_identifiers

        return [selected_volume_identifier]
Exemplo n.º 13
0
  def _GetVolumeTSKPartition(
      self, scan_context, partition_number=None, partition_offset=None):
    """Determines the volume path specification.

    Args:
      scan_context: the scan context (instance of dfvfs.ScanContext).
      partition_number: Optional preferred partition number. The default is
                        None.
      partition_offset: Optional preferred partition byte offset. The default
                        is None.

    Returns:
      The volume scan node (instance of dfvfs.SourceScanNode) or None
      if no supported partition was found.

    Raises:
      SourceScannerError: if the format of or within the source
                          is not supported or the the scan context
                          is invalid.
      RuntimeError: if the volume for a specific identifier cannot be
                    retrieved.
    """
    if (not scan_context or not scan_context.last_scan_node or
        not scan_context.last_scan_node.path_spec):
      raise errors.SourceScannerError(u'Invalid scan context.')

    volume_system = tsk_volume_system.TSKVolumeSystem()
    volume_system.Open(scan_context.last_scan_node.path_spec)

    volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
        volume_system)
    if not volume_identifiers:
      logging.info(u'No supported partitions found.')
      return

    if partition_number is not None and partition_number > 0:
      # Plaso uses partition numbers starting with 1 while dfvfs expects
      # the volume index to start with 0.
      volume = volume_system.GetVolumeByIndex(partition_number - 1)
      if volume:
        volume_location = u'/{0:s}'.format(volume.identifier)
        volume_scan_node = scan_context.last_scan_node.GetSubNodeByLocation(
            volume_location)
        if not volume_scan_node:
          raise RuntimeError(
              u'Unable to retrieve volume scan node by location: {0:s}'.format(
                  volume_location))
        return volume_scan_node

      logging.warning(u'No such partition: {0:d}.'.format(partition_number))

    if partition_offset is not None:
      for volume in volume_system.volumes:
        volume_extent = volume.extents[0]
        if volume_extent.offset == partition_offset:
          volume_location = u'/{0:s}'.format(volume.identifier)
          volume_scan_node = scan_context.last_scan_node.GetSubNodeByLocation(
              volume_location)
          if not volume_scan_node:
            raise RuntimeError((
                u'Unable to retrieve volume scan node by location: '
                u'{0:s}').format(volume_location))
          return volume_scan_node

      logging.warning(
          u'No such partition with offset: {0:d} (0x{0:08x}).'.format(
              partition_offset))

    if len(volume_identifiers) == 1:
      volume_location = u'/{0:s}'.format(volume_identifiers[0])

    else:
      try:
        selected_volume_identifier = self._GetPartitionIdentifierFromUser(
            volume_system, volume_identifiers)
      except KeyboardInterrupt:
        raise errors.UserAbort(u'File system scan aborted.')

      volume = volume_system.GetVolumeByIdentifier(selected_volume_identifier)
      if not volume:
        raise RuntimeError(
            u'Unable to retrieve volume by identifier: {0:s}'.format(
                selected_volume_identifier))

      volume_location = u'/{0:s}'.format(selected_volume_identifier)

    volume_scan_node = scan_context.last_scan_node.GetSubNodeByLocation(
        volume_location)
    if not volume_scan_node:
      raise RuntimeError(
          u'Unable to retrieve volume scan node by location: {0:s}'.format(
              volume_location))
    return volume_scan_node
Exemplo n.º 14
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid, or the storage
          format not supported, or there was a failure to writing to the
          storage.
      IOError: if the extraction engine could not write to the storage.
      OSError: if the extraction engine could not write to the storage.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        try:
            self.ScanSource(self._source_path)
        except dfvfs_errors.UserAbort as exception:
            raise errors.UserAbort(exception)

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            self._source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        # TODO: attach processing configuration to session?
        session = engine.BaseEngine.CreateSession()

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        try:
            storage_writer.Open(path=self._storage_file_path)
        except IOError as exception:
            raise IOError(
                'Unable to open storage with error: {0!s}'.format(exception))

        processing_status = None

        try:
            session_start = session.CreateSessionStart()
            storage_writer.AddAttributeContainer(session_start)

            try:
                processing_status = self._ProcessSources(
                    session, storage_writer)

            finally:
                session.aborted = getattr(processing_status, 'aborted', True)

                session_completion = session.CreateSessionCompletion()
                storage_writer.AddAttributeContainer(session_completion)

        except IOError as exception:
            raise IOError(
                'Unable to write to storage with error: {0!s}'.format(
                    exception))

        finally:
            storage_writer.Close()

        self._status_view.PrintExtractionSummary(processing_status)
Exemplo n.º 15
0
  def _StartSingleThread(self, options):
    """Starts everything up in a single process.

    This should not normally be used, since running the tool in a single
    process buffers up everything into memory until the storage is called.

    Just to make it clear, this starts up the collection, completes that
    before calling the worker that extracts all EventObjects and stores
    them in memory. when that is all done, the storage function is called
    to drain the buffer. Hence the tool's excessive use of memory in this
    mode and the reason why it is not suggested to be used except for
    debugging reasons (and mostly to get into the debugger).

    This is therefore mostly useful during debugging sessions for some
    limited parsing.

    Args:
      options: the command line arguments (instance of argparse.Namespace).
    """
    self._engine = single_process.SingleProcessEngine(self._queue_size)
    self._engine.SetEnableDebugOutput(self._debug_mode)
    self._engine.SetEnableProfiling(
        self._enable_profiling,
        profiling_sample_rate=self._profiling_sample_rate)
    self._engine.SetProcessArchiveFiles(self._process_archive_files)

    if self._filter_object:
      self._engine.SetFilterObject(self._filter_object)

    if self._mount_path:
      self._engine.SetMountPath(self._mount_path)

    if self._text_prepend:
      self._engine.SetTextPrepend(self._text_prepend)

    # TODO: add support to handle multiple partitions.
    self._engine.SetSource(
        self.GetSourcePathSpec(), resolver_context=self._resolver_context)

    logging.debug(u'Starting preprocessing.')
    pre_obj = self.PreprocessSource(options)

    logging.debug(u'Preprocessing done.')

    # TODO: make sure parsers option is not set by preprocessing.
    parser_filter_string = getattr(options, 'parsers', '')

    self._parser_names = []
    for _, parser_class in parsers_manager.ParsersManager.GetParsers(
        parser_filter_string=parser_filter_string):
      self._parser_names.append(parser_class.NAME)

    self._PreprocessSetCollectionInformation(options, pre_obj)

    if 'filestat' in self._parser_names:
      include_directory_stat = True
    else:
      include_directory_stat = False

    filter_file = getattr(options, 'file_filter', None)
    if filter_file:
      filter_find_specs = engine_utils.BuildFindSpecsFromFile(
          filter_file, pre_obj=pre_obj)
    else:
      filter_find_specs = None

    self._collector = self._engine.CreateCollector(
        include_directory_stat, vss_stores=self._vss_stores,
        filter_find_specs=filter_find_specs,
        resolver_context=self._resolver_context)

    self._DebugPrintCollector(options)

    if self._output_module:
      storage_writer = storage.BypassStorageWriter(
          self._engine.storage_queue, self._storage_file_path,
          output_module_string=self._output_module, pre_obj=pre_obj)
    else:
      storage_writer = storage.StorageFileWriter(
          self._engine.storage_queue, self._storage_file_path,
          buffer_size=self._buffer_size, pre_obj=pre_obj,
          serializer_format=self._storage_serializer_format)

    hasher_names_string = getattr(options, u'hashers', u'')

    try:
      self._engine.ProcessSource(
          self._collector, storage_writer,
          parser_filter_string=parser_filter_string,
          hasher_names_string=hasher_names_string)

    except KeyboardInterrupt:
      self._CleanUpAfterAbort()
      raise errors.UserAbort(u'Process source aborted.')

    finally:
      self._resolver_context.Empty()
Exemplo n.º 16
0
  def _ProcessSourceMultiProcessMode(self, options):
    """Processes the source in a multiple process.

    Multiprocessing is used to start up separate processes.

    Args:
      options: the command line arguments (instance of argparse.Namespace).
    """
    # TODO: replace by an option.
    start_collection_process = True

    self._number_of_worker_processes = getattr(options, 'workers', 0)

    logging.info(u'Starting extraction in multi process mode.')

    self._engine = multi_process.MultiProcessEngine(
        maximum_number_of_queued_items=self._queue_size)

    self._engine.SetEnableDebugOutput(self._debug_mode)
    self._engine.SetEnableProfiling(
        self._enable_profiling,
        profiling_sample_rate=self._profiling_sample_rate)
    self._engine.SetProcessArchiveFiles(self._process_archive_files)

    if self._filter_object:
      self._engine.SetFilterObject(self._filter_object)

    if self._mount_path:
      self._engine.SetMountPath(self._mount_path)

    if self._text_prepend:
      self._engine.SetTextPrepend(self._text_prepend)
    # TODO: add support to handle multiple partitions.
    self._engine.SetSource(
        self.GetSourcePathSpec(), resolver_context=self._resolver_context)

    logging.debug(u'Starting preprocessing.')
    pre_obj = self.PreprocessSource(options)
    logging.debug(u'Preprocessing done.')

    # TODO: make sure parsers option is not set by preprocessing.
    parser_filter_string = getattr(options, 'parsers', '')

    self._parser_names = []
    for _, parser_class in parsers_manager.ParsersManager.GetParsers(
        parser_filter_string=parser_filter_string):
      self._parser_names.append(parser_class.NAME)

    hasher_names_string = getattr(options, u'hashers', u'')

    self._hasher_names = []
    hasher_manager = hashers_manager.HashersManager
    for hasher_name in hasher_manager.GetHasherNamesFromString(
        hasher_names_string=hasher_names_string):
      self._hasher_names.append(hasher_name)

    self._PreprocessSetCollectionInformation(options, pre_obj)

    if 'filestat' in self._parser_names:
      include_directory_stat = True
    else:
      include_directory_stat = False

    filter_file = getattr(options, 'file_filter', None)
    if filter_file:
      filter_find_specs = engine_utils.BuildFindSpecsFromFile(
          filter_file, pre_obj=pre_obj)
    else:
      filter_find_specs = None

    if start_collection_process:
      resolver_context = context.Context()
    else:
      resolver_context = self._resolver_context

    # TODO: create multi process collector.
    self._collector = self._engine.CreateCollector(
        include_directory_stat, vss_stores=self._vss_stores,
        filter_find_specs=filter_find_specs, resolver_context=resolver_context)

    self._DebugPrintCollector(options)

    if self._output_module:
      storage_writer = storage.BypassStorageWriter(
          self._engine.storage_queue, self._storage_file_path,
          output_module_string=self._output_module, pre_obj=pre_obj)
    else:
      storage_writer = storage.StorageFileWriter(
          self._engine.storage_queue, self._storage_file_path,
          buffer_size=self._buffer_size, pre_obj=pre_obj,
          serializer_format=self._storage_serializer_format)

    try:
      self._engine.ProcessSource(
          self._collector, storage_writer,
          parser_filter_string=parser_filter_string,
          hasher_names_string=hasher_names_string,
          number_of_extraction_workers=self._number_of_worker_processes,
          have_collection_process=start_collection_process,
          have_foreman_process=self._run_foreman,
          show_memory_usage=self._show_worker_memory_information)

    except KeyboardInterrupt:
      self._CleanUpAfterAbort()
      raise errors.UserAbort(u'Process source aborted.')