def _ExtractPathSpecsFromDirectory(self, file_entry, depth=0): """Extracts path specification from a directory. Args: file_entry (dfvfs.FileEntry): file entry that refers to the directory. depth (Optional[int]): current depth where 0 represents the file system root. Yields: dfvfs.PathSpec: path specification of a file entry found in the directory. Raises: MaximumRecursionDepth: when the maximum recursion depth is reached. """ if depth >= self._MAXIMUM_DEPTH: raise errors.MaximumRecursionDepth( 'Maximum recursion depth reached.') # Need to do a breadth-first search otherwise we'll hit the Python # maximum recursion depth. sub_directories = [] for sub_file_entry in file_entry.sub_file_entries: try: if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink(): continue except dfvfs_errors.BackEndError as exception: path_spec_string = self._GetPathSpecificationString( sub_file_entry.path_spec) logger.warning( 'Unable to process file: {0:s} with error: {1!s}'.format( path_spec_string.replace('\n', ';'), exception)) continue # For TSK-based file entries only, ignore the virtual /$OrphanFiles # directory. if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK: if file_entry.IsRoot( ) and sub_file_entry.name == '$OrphanFiles': continue if sub_file_entry.IsDirectory(): sub_directories.append(sub_file_entry) for path_spec in self._ExtractPathSpecsFromFile(sub_file_entry): yield path_spec for sub_file_entry in sub_directories: try: for path_spec in self._ExtractPathSpecsFromDirectory( sub_file_entry, depth=(depth + 1)): yield path_spec except (IOError, dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.warning('{0!s}'.format(exception))
def _ExtractPathSpecsFromDirectory(self, file_entry, depth=0): """Extracts path specification from a directory. Args: file_entry (dfvfs.FileEntry): file entry that refers to the directory. depth (Optional[int]): current depth where 0 represents the file system root. Yields: dfvfs.PathSpec: path specification of a file entry found in the directory. """ if depth >= self._MAXIMUM_DEPTH: raise errors.MaximumRecursionDepth( 'Maximum recursion depth reached.') # Need to do a breadth-first search otherwise we'll hit the Python # maximum recursion depth. sub_directories = [] for sub_file_entry in file_entry.sub_file_entries: try: if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink(): continue except dfvfs_errors.BackEndError as exception: logger.warning( 'Unable to process file: {0:s} with error: {1!s}'.format( sub_file_entry.path_spec.comparable.replace('\n', ';'), exception)) continue # For TSK-based file entries only, ignore the virtual /$OrphanFiles # directory. if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK: if file_entry.IsRoot( ) and sub_file_entry.name == '$OrphanFiles': continue if sub_file_entry.IsDirectory(): sub_directories.append(sub_file_entry) elif sub_file_entry.IsFile(): # If we are dealing with a VSS we want to calculate a hash # value based on available timestamps and compare that to previously # calculated hash values, and only include the file into the queue if # the hash does not match. if self._duplicate_file_check: hash_value = self._CalculateNTFSTimeHash(sub_file_entry) inode = getattr(sub_file_entry.path_spec, 'inode', 0) if inode in self._hashlist: if hash_value in self._hashlist[inode]: continue self._hashlist.setdefault(inode, []).append(hash_value) for path_spec in self._ExtractPathSpecsFromFile(sub_file_entry): yield path_spec for sub_file_entry in sub_directories: try: for path_spec in self._ExtractPathSpecsFromDirectory( sub_file_entry, depth=(depth + 1)): yield path_spec except (IOError, dfvfs_errors.AccessError, dfvfs_errors.BackEndError, dfvfs_errors.PathSpecError) as exception: logger.warning('{0!s}'.format(exception))