示例#1
0
文件: extractors.py 项目: dfjxs/plaso
    def _ExtractPathSpecsFromDirectory(self, file_entry, depth=0):
        """Extracts path specification from a directory.

    Args:
      file_entry (dfvfs.FileEntry): file entry that refers to the directory.
      depth (Optional[int]): current depth where 0 represents the file system
          root.

    Yields:
      dfvfs.PathSpec: path specification of a file entry found in the directory.

    Raises:
      MaximumRecursionDepth: when the maximum recursion depth is reached.
    """
        if depth >= self._MAXIMUM_DEPTH:
            raise errors.MaximumRecursionDepth(
                'Maximum recursion depth reached.')

        # Need to do a breadth-first search otherwise we'll hit the Python
        # maximum recursion depth.
        sub_directories = []

        for sub_file_entry in file_entry.sub_file_entries:
            try:
                if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink():
                    continue
            except dfvfs_errors.BackEndError as exception:
                path_spec_string = self._GetPathSpecificationString(
                    sub_file_entry.path_spec)
                logger.warning(
                    'Unable to process file: {0:s} with error: {1!s}'.format(
                        path_spec_string.replace('\n', ';'), exception))
                continue

            # For TSK-based file entries only, ignore the virtual /$OrphanFiles
            # directory.
            if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK:
                if file_entry.IsRoot(
                ) and sub_file_entry.name == '$OrphanFiles':
                    continue

            if sub_file_entry.IsDirectory():
                sub_directories.append(sub_file_entry)

            for path_spec in self._ExtractPathSpecsFromFile(sub_file_entry):
                yield path_spec

        for sub_file_entry in sub_directories:
            try:
                for path_spec in self._ExtractPathSpecsFromDirectory(
                        sub_file_entry, depth=(depth + 1)):
                    yield path_spec

            except (IOError, dfvfs_errors.AccessError,
                    dfvfs_errors.BackEndError,
                    dfvfs_errors.PathSpecError) as exception:
                logger.warning('{0!s}'.format(exception))
示例#2
0
    def _ExtractPathSpecsFromDirectory(self, file_entry, depth=0):
        """Extracts path specification from a directory.

    Args:
      file_entry (dfvfs.FileEntry): file entry that refers to the directory.
      depth (Optional[int]): current depth where 0 represents the file system
          root.

    Yields:
      dfvfs.PathSpec: path specification of a file entry found in the directory.
    """
        if depth >= self._MAXIMUM_DEPTH:
            raise errors.MaximumRecursionDepth(
                'Maximum recursion depth reached.')

        # Need to do a breadth-first search otherwise we'll hit the Python
        # maximum recursion depth.
        sub_directories = []

        for sub_file_entry in file_entry.sub_file_entries:
            try:
                if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink():
                    continue
            except dfvfs_errors.BackEndError as exception:
                logger.warning(
                    'Unable to process file: {0:s} with error: {1!s}'.format(
                        sub_file_entry.path_spec.comparable.replace('\n', ';'),
                        exception))
                continue

            # For TSK-based file entries only, ignore the virtual /$OrphanFiles
            # directory.
            if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK:
                if file_entry.IsRoot(
                ) and sub_file_entry.name == '$OrphanFiles':
                    continue

            if sub_file_entry.IsDirectory():
                sub_directories.append(sub_file_entry)

            elif sub_file_entry.IsFile():
                # If we are dealing with a VSS we want to calculate a hash
                # value based on available timestamps and compare that to previously
                # calculated hash values, and only include the file into the queue if
                # the hash does not match.
                if self._duplicate_file_check:
                    hash_value = self._CalculateNTFSTimeHash(sub_file_entry)

                    inode = getattr(sub_file_entry.path_spec, 'inode', 0)
                    if inode in self._hashlist:
                        if hash_value in self._hashlist[inode]:
                            continue

                    self._hashlist.setdefault(inode, []).append(hash_value)

            for path_spec in self._ExtractPathSpecsFromFile(sub_file_entry):
                yield path_spec

        for sub_file_entry in sub_directories:
            try:
                for path_spec in self._ExtractPathSpecsFromDirectory(
                        sub_file_entry, depth=(depth + 1)):
                    yield path_spec

            except (IOError, dfvfs_errors.AccessError,
                    dfvfs_errors.BackEndError,
                    dfvfs_errors.PathSpecError) as exception:
                logger.warning('{0!s}'.format(exception))