def file_to_metadata(filepath, collapse_symlinks): """Processes an input file, a dependency, and return meta data about it. Behaviors: - Retrieves the file mode, file size, file timestamp, file link destination if it is a file link and calcultate the SHA-1 of the file's content if the path points to a file and not a symlink. Arguments: filepath: File to act on. collapse_symlinks: True if symlinked files should be treated like they were the normal underlying file. Returns: The necessary dict to create a entry in the 'files' section of an .isolated file *except* 'h' for files. """ out = {} # Always check the file stat and check if it is a link. try: if collapse_symlinks: # os.stat follows symbolic links filestats = fs.stat(filepath) else: # os.lstat does not follow symbolic links, and thus preserves them. filestats = fs.lstat(filepath) except OSError: # The file is not present. raise MappingError('%s is missing' % filepath) is_link = stat.S_ISLNK(filestats.st_mode) if sys.platform != 'win32': # Ignore file mode on Windows since it's not really useful there. filemode = stat.S_IMODE(filestats.st_mode) # Remove write access for group and all access to 'others'. filemode &= ~(stat.S_IWGRP | stat.S_IRWXO) if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP): # Only keep x group bit if both x user bit and group read bit are set. filemode |= stat.S_IXGRP else: filemode &= ~stat.S_IXGRP if not is_link: out['m'] = filemode if not is_link: out['s'] = filestats.st_size else: # The link could be in an incorrect path case. In practice, this only # happens on macOS on case insensitive HFS. # TODO(maruel): It'd be better if it was only done once, in # expand_directory_and_symlink(), so it would not be necessary to do again # here. symlink_value = fs.readlink(filepath) # pylint: disable=no-member filedir = file_path.get_native_path_case(os.path.dirname(filepath)) native_dest = file_path.fix_native_path_case(filedir, symlink_value) out['l'] = os.path.relpath(native_dest, filedir) return out
def get_recursive_size(path): """Returns the total data size for the specified path.""" try: total = 0 for root, _, files in fs.walk(path): for f in files: total += fs.lstat(os.path.join(root, f)).st_size return total except (IOError, OSError, UnicodeEncodeError) as exc: logging.warning('Exception while getting the size of %s:\n%s', path, exc) # Returns a negative number to make it clear that something is wrong. return -1
def _get_recursive_size(path): """Returns the total data size for the specified path. This function can be surprisingly slow on OSX, so its output should be cached. """ try: total = 0 for root, _, files in fs.walk(path): for f in files: total += fs.lstat(os.path.join(root, f)).st_size return total except (IOError, OSError, UnicodeEncodeError) as exc: logging.warning('Exception while getting the size of %s:\n%s', path, exc) return None
def _get_recursive_size(path): """Returns the total data size for the specified path. This function can be surprisingly slow on OSX, so its output should be cached. """ try: total = 0 if _use_scandir(): if sys.platform == 'win32': def direntIsJunction(entry): # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are # windows-only symbols. return bool(entry.stat().st_file_attributes & scandir.FILE_ATTRIBUTE_REPARSE_POINT) else: def direntIsJunction(_entry): return False stack = [path] while stack: for entry in scandir.scandir(stack.pop()): if entry.is_symlink() or direntIsJunction(entry): continue if entry.is_file(): total += entry.stat().st_size elif entry.is_dir(): stack.append(entry.path) else: logging.warning('non directory/file entry: %s', entry) return total for root, _, files in fs.walk(path): for f in files: st = fs.lstat(os.path.join(root, f)) if stat.S_ISLNK(st.st_mode): continue total += st.st_size return total except (IOError, OSError, UnicodeEncodeError) as exc: logging.warning('Exception while getting the size of %s:\n%s', path, exc) return None
def set_read_only(path, read_only): """Sets or resets the write bit on a file or directory. Zaps out access to 'group' and 'others'. """ mode = fs.lstat(path).st_mode # TODO(maruel): Stop removing GO bits. mode = (mode & 0500) if read_only else (mode | 0200) if hasattr(os, 'lchmod'): fs.lchmod(path, mode) # pylint: disable=E1101 else: if stat.S_ISLNK(mode): # Skip symlink without lchmod() support. logging.debug('Can\'t change %sw bit on symlink %s', '-' if read_only else '+', path) return # TODO(maruel): Implement proper DACL modification on Windows. fs.chmod(path, mode)
def set_read_only(path, read_only): """Sets or resets the write bit on a file or directory. Zaps out access to 'group' and 'others'. """ mode = fs.lstat(path).st_mode # TODO(maruel): Stop removing GO bits. mode = (mode & 0500) if read_only else (mode | 0200) if hasattr(os, 'lchmod'): fs.lchmod(path, mode) # pylint: disable=E1101 else: if stat.S_ISLNK(mode): # Skip symlink without lchmod() support. logging.debug( 'Can\'t change %sw bit on symlink %s', '-' if read_only else '+', path) return # TODO(maruel): Implement proper DACL modification on Windows. fs.chmod(path, mode)
def set_read_only(path, read_only): """Sets or resets the write bit on a file or directory. Zaps out access to 'group' and 'others'. """ mode = fs.lstat(path).st_mode # TODO(maruel): Stop removing GO bits. if read_only: mode &= stat.S_IRUSR | stat.S_IXUSR # 0500 else: mode |= stat.S_IRUSR | stat.S_IWUSR # 0600 if sys.platform != 'win32' and stat.S_ISDIR(mode): mode |= stat.S_IXUSR # 0100 if hasattr(os, 'lchmod'): fs.lchmod(path, mode) # pylint: disable=E1101 else: if stat.S_ISLNK(mode): # Skip symlink without lchmod() support. return # TODO(maruel): Implement proper DACL modification on Windows. fs.chmod(path, mode)
def file_to_metadata(filepath, prevdict, read_only, algo, collapse_symlinks): """Processes an input file, a dependency, and return meta data about it. Behaviors: - Retrieves the file mode, file size, file timestamp, file link destination if it is a file link and calcultate the SHA-1 of the file's content if the path points to a file and not a symlink. Arguments: filepath: File to act on. prevdict: the previous dictionary. It is used to retrieve the cached hash to skip recalculating the hash. Optional. read_only: If 1 or 2, the file mode is manipulated. In practice, only save one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On windows, mode is not set since all files are 'executable' by default. algo: Hashing algorithm used. collapse_symlinks: True if symlinked files should be treated like they were the normal underlying file. Returns: The necessary dict to create a entry in the 'files' section of an .isolated file. """ # TODO(maruel): None is not a valid value. assert read_only in (None, 0, 1, 2), read_only out = {} # Always check the file stat and check if it is a link. The timestamp is used # to know if the file's content/symlink destination should be looked into. # E.g. only reuse from prevdict if the timestamp hasn't changed. # There is the risk of the file's timestamp being reset to its last value # manually while its content changed. We don't protect against that use case. try: if collapse_symlinks: # os.stat follows symbolic links filestats = fs.stat(filepath) else: # os.lstat does not follow symbolic links, and thus preserves them. filestats = fs.lstat(filepath) except OSError: # The file is not present. raise MappingError('%s is missing' % filepath) is_link = stat.S_ISLNK(filestats.st_mode) if sys.platform != 'win32': # Ignore file mode on Windows since it's not really useful there. filemode = stat.S_IMODE(filestats.st_mode) # Remove write access for group and all access to 'others'. filemode &= ~(stat.S_IWGRP | stat.S_IRWXO) if read_only: filemode &= ~stat.S_IWUSR if filemode & (stat.S_IXUSR | stat.S_IRGRP) == (stat.S_IXUSR | stat.S_IRGRP): # Only keep x group bit if both x user bit and group read bit are set. filemode |= stat.S_IXGRP else: filemode &= ~stat.S_IXGRP if not is_link: out['m'] = filemode # Used to skip recalculating the hash or link destination. Use the most recent # update time. out['t'] = int(round(filestats.st_mtime)) if not is_link: out['s'] = filestats.st_size # If the timestamp wasn't updated and the file size is still the same, carry # on the hash. if (prevdict.get('t') == out['t'] and prevdict.get('s') == out['s']): # Reuse the previous hash if available. out['h'] = prevdict.get('h') if not out.get('h'): out['h'] = hash_file(filepath, algo) else: # If the timestamp wasn't updated, carry on the link destination. if prevdict.get('t') == out['t']: # Reuse the previous link destination if available. out['l'] = prevdict.get('l') if out.get('l') is None: # The link could be in an incorrect path case. In practice, this only # happen on OSX on case insensitive HFS. # TODO(maruel): It'd be better if it was only done once, in # expand_directory_and_symlink(), so it would not be necessary to do again # here. symlink_value = fs.readlink(filepath) # pylint: disable=E1101 filedir = file_path.get_native_path_case(os.path.dirname(filepath)) native_dest = file_path.fix_native_path_case( filedir, symlink_value) out['l'] = os.path.relpath(native_dest, filedir) return out