def file_to_metadata(filepath, collapse_symlinks): """Processes an input file, a dependency, and return meta data about it. Behaviors: - Retrieves the file mode, file size, file timestamp, file link destination if it is a file link and calcultate the SHA-1 of the file's content if the path points to a file and not a symlink. Arguments: filepath: File to act on. collapse_symlinks: True if symlinked files should be treated like they were the normal underlying file. Returns: The necessary dict to create a entry in the 'files' section of an .isolated file *except* 'h' for files. """ out = {} # Always check the file stat and check if it is a link. try: if collapse_symlinks: # os.stat follows symbolic links filestats = fs.stat(filepath) else: # os.lstat does not follow symbolic links, and thus preserves them. filestats = fs.lstat(filepath) except OSError: # The file is not present. raise MappingError('%s is missing' % filepath) is_link = stat.S_ISLNK(filestats.st_mode) if sys.platform != 'win32': # Ignore file mode on Windows since it's not really useful there. filemode = stat.S_IMODE(filestats.st_mode) # Remove write access for group and all access to 'others'. filemode &= ~(stat.S_IWGRP | stat.S_IRWXO) if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP): # Only keep x group bit if both x user bit and group read bit are set. filemode |= stat.S_IXGRP else: filemode &= ~stat.S_IXGRP if not is_link: out['m'] = filemode if not is_link: out['s'] = filestats.st_size else: # The link could be in an incorrect path case. In practice, this only # happens on macOS on case insensitive HFS. # TODO(maruel): It'd be better if it was only done once, in # expand_directory_and_symlink(), so it would not be necessary to do again # here. symlink_value = fs.readlink(filepath) # pylint: disable=no-member filedir = file_path.get_native_path_case(os.path.dirname(filepath)) native_dest = file_path.fix_native_path_case(filedir, symlink_value) out['l'] = os.path.relpath(native_dest, filedir) return out
def file_to_metadata(filepath, prevdict, read_only, algo): """Processes an input file, a dependency, and return meta data about it. Behaviors: - Retrieves the file mode, file size, file timestamp, file link destination if it is a file link and calcultate the SHA-1 of the file's content if the path points to a file and not a symlink. Arguments: filepath: File to act on. prevdict: the previous dictionary. It is used to retrieve the cached sha-1 to skip recalculating the hash. Optional. read_only: If 1 or 2, the file mode is manipulated. In practice, only save one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On windows, mode is not set since all files are 'executable' by default. algo: Hashing algorithm used. Returns: The necessary dict to create a entry in the 'files' section of an .isolated file. """ # TODO(maruel): None is not a valid value. assert read_only in (None, 0, 1, 2), read_only out = {} # Always check the file stat and check if it is a link. The timestamp is used # to know if the file's content/symlink destination should be looked into. # E.g. only reuse from prevdict if the timestamp hasn't changed. # There is the risk of the file's timestamp being reset to its last value # manually while its content changed. We don't protect against that use case. try: filestats = os.lstat(filepath) except OSError: # The file is not present. raise MappingError('%s is missing' % filepath) is_link = stat.S_ISLNK(filestats.st_mode) if sys.platform != 'win32': # Ignore file mode on Windows since it's not really useful there. filemode = stat.S_IMODE(filestats.st_mode) # Remove write access for group and all access to 'others'. filemode &= ~(stat.S_IWGRP | stat.S_IRWXO) if read_only: filemode &= ~stat.S_IWUSR if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP): # Only keep x group bit if both x user bit and group read bit are set. filemode |= stat.S_IXGRP else: filemode &= ~stat.S_IXGRP if not is_link: out['m'] = filemode # Used to skip recalculating the hash or link destination. Use the most recent # update time. out['t'] = int(round(filestats.st_mtime)) if not is_link: out['s'] = filestats.st_size # If the timestamp wasn't updated and the file size is still the same, carry # on the sha-1. if (prevdict.get('t') == out['t'] and prevdict.get('s') == out['s']): # Reuse the previous hash if available. out['h'] = prevdict.get('h') if not out.get('h'): out['h'] = hash_file(filepath, algo) else: # If the timestamp wasn't updated, carry on the link destination. if prevdict.get('t') == out['t']: # Reuse the previous link destination if available. out['l'] = prevdict.get('l') if out.get('l') is None: # The link could be in an incorrect path case. In practice, this only # happen on OSX on case insensitive HFS. # TODO(maruel): It'd be better if it was only done once, in # expand_directory_and_symlink(), so it would not be necessary to do again # here. symlink_value = os.readlink(filepath) # pylint: disable=E1101 filedir = file_path.get_native_path_case(os.path.dirname(filepath)) native_dest = file_path.fix_native_path_case(filedir, symlink_value) out['l'] = os.path.relpath(native_dest, filedir) return out
def expand_symlinks(indir, relfile): """Follows symlinks in |relfile|, but treating symlinks that point outside the build tree as if they were ordinary directories/files. Returns the final symlink-free target and a list of paths to symlinks encountered in the process. The rule about symlinks outside the build tree is for the benefit of the Chromium OS ebuild, which symlinks the output directory to an unrelated path in the chroot. Fails when a directory loop is detected, although in theory we could support that case. """ is_directory = relfile.endswith(os.path.sep) done = indir todo = relfile.strip(os.path.sep) symlinks = [] while todo: pre_symlink, symlink, post_symlink = file_path.split_at_symlink(done, todo) if not symlink: todo = file_path.fix_native_path_case(done, todo) done = os.path.join(done, todo) break symlink_path = os.path.join(done, pre_symlink, symlink) post_symlink = post_symlink.lstrip(os.path.sep) # readlink doesn't exist on Windows. # pylint: disable=E1101 target = os.path.normpath(os.path.join(done, pre_symlink)) symlink_target = os.readlink(symlink_path) if os.path.isabs(symlink_target): # Absolute path are considered a normal directories. The use case is # generally someone who puts the output directory on a separate drive. target = symlink_target else: # The symlink itself could be using the wrong path case. target = file_path.fix_native_path_case(target, symlink_target) if not os.path.exists(target): raise MappingError( 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target)) target = file_path.get_native_path_case(target) if not file_path.path_starts_with(indir, target): done = symlink_path todo = post_symlink continue if file_path.path_starts_with(target, symlink_path): raise MappingError( 'Can\'t map recursive symlink reference %s -> %s' % (symlink_path, target)) logging.info('Found symlink: %s -> %s', symlink_path, target) symlinks.append(os.path.relpath(symlink_path, indir)) # Treat the common prefix of the old and new paths as done, and start # scanning again. target = target.split(os.path.sep) symlink_path = symlink_path.split(os.path.sep) prefix_length = 0 for target_piece, symlink_path_piece in zip(target, symlink_path): if target_piece == symlink_path_piece: prefix_length += 1 else: break done = os.path.sep.join(target[:prefix_length]) todo = os.path.join( os.path.sep.join(target[prefix_length:]), post_symlink) relfile = os.path.relpath(done, indir) relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep return relfile, symlinks