def link_file(outfile, infile, action): """Links a file. The type of link depends on |action|. Returns: True if the action was caried on, False if fallback was used. """ if action not in (HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY): raise ValueError('Unknown mapping action %s' % action) if not fs.isfile(infile): raise OSError('%s is missing' % infile) if fs.isfile(outfile): raise OSError( '%s already exist; insize:%d; outsize:%d' % (outfile, fs.stat(infile).st_size, fs.stat(outfile).st_size)) if action == COPY: readable_copy(outfile, infile) elif action == SYMLINK and sys.platform != 'win32': # On windows, symlink are converted to hardlink and fails over to copy. fs.symlink(infile, outfile) # pylint: disable=E1101 else: # HARDLINK or HARDLINK_WITH_FALLBACK. try: hardlink(infile, outfile) except OSError as e: if action == HARDLINK: raise OSError('Failed to hardlink %s to %s: %s' % (infile, outfile, e)) # Probably a different file system. logging.warning( 'Failed to hardlink, failing back to copy %s to %s' % ( infile, outfile)) readable_copy(outfile, infile) # Signal caller that fallback copy was used. return False return True
def link_file(outfile, infile, action): """Links a file. The type of link depends on |action|. Returns: True if the action was carried on, False if fallback was used. """ if action < 1 or action > COPY: raise ValueError('Unknown mapping action %s' % action) # TODO(maruel): Skip these checks. if not fs.isfile(infile): raise OSError('%s is missing' % infile) if fs.isfile(outfile): raise OSError( '%s already exist; insize:%d; outsize:%d' % (outfile, fs.stat(infile).st_size, fs.stat(outfile).st_size)) if action == COPY: readable_copy(outfile, infile) return True if action in (SYMLINK, SYMLINK_WITH_FALLBACK): try: fs.symlink(infile, outfile) # pylint: disable=E1101 return True except OSError: if action == SYMLINK: raise logging.warning( 'Failed to symlink, falling back to copy %s to %s' % ( infile, outfile)) # Signal caller that fallback copy was used. readable_copy(outfile, infile) return False # HARDLINK or HARDLINK_WITH_FALLBACK. try: hardlink(infile, outfile) return True except OSError as e: if action == HARDLINK: raise OSError('Failed to hardlink %s to %s: %s' % (infile, outfile, e)) # Probably a different file system. logging.warning( 'Failed to hardlink, falling back to copy %s to %s' % ( infile, outfile)) readable_copy(outfile, infile) # Signal caller that fallback copy was used. return False
def is_same_filesystem(path1, path2): """Returns True if both paths are on the same filesystem. This is required to enable the use of hardlinks. """ assert os.path.isabs(path1), path1 assert os.path.isabs(path2), path2 if sys.platform == 'win32': # If the drive letter mismatches, assume it's a separate partition. # TODO(maruel): It should look at the underlying drive, a drive letter could # be a mount point to a directory on another drive. assert re.match(ur'^[a-zA-Z]\:\\.*', path1), path1 assert re.match(ur'^[a-zA-Z]\:\\.*', path2), path2 if path1[0].lower() != path2[0].lower(): return False return fs.stat(path1).st_dev == fs.stat(path2).st_dev
def assertFileMode(self, filepath, mode, umask=None): umask = test_utils.umask() if umask is None else umask actual = fs.stat(filepath).st_mode expected = mode & ~umask self.assertEqual( expected, actual, (filepath, oct(expected), oct(actual), oct(umask)))
def file_to_metadata(filepath, collapse_symlinks): """Processes an input file, a dependency, and return meta data about it. Behaviors: - Retrieves the file mode, file size, file timestamp, file link destination if it is a file link and calcultate the SHA-1 of the file's content if the path points to a file and not a symlink. Arguments: filepath: File to act on. collapse_symlinks: True if symlinked files should be treated like they were the normal underlying file. Returns: The necessary dict to create a entry in the 'files' section of an .isolated file *except* 'h' for files. """ out = {} # Always check the file stat and check if it is a link. try: if collapse_symlinks: # os.stat follows symbolic links filestats = fs.stat(filepath) else: # os.lstat does not follow symbolic links, and thus preserves them. filestats = fs.lstat(filepath) except OSError: # The file is not present. raise MappingError('%s is missing' % filepath) is_link = stat.S_ISLNK(filestats.st_mode) if sys.platform != 'win32': # Ignore file mode on Windows since it's not really useful there. filemode = stat.S_IMODE(filestats.st_mode) # Remove write access for group and all access to 'others'. filemode &= ~(stat.S_IWGRP | stat.S_IRWXO) if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP): # Only keep x group bit if both x user bit and group read bit are set. filemode |= stat.S_IXGRP else: filemode &= ~stat.S_IXGRP if not is_link: out['m'] = filemode if not is_link: out['s'] = filestats.st_size else: # The link could be in an incorrect path case. In practice, this only # happens on macOS on case insensitive HFS. # TODO(maruel): It'd be better if it was only done once, in # expand_directory_and_symlink(), so it would not be necessary to do again # here. symlink_value = fs.readlink(filepath) # pylint: disable=no-member filedir = file_path.get_native_path_case(os.path.dirname(filepath)) native_dest = file_path.fix_native_path_case(filedir, symlink_value) out['l'] = os.path.relpath(native_dest, filedir) return out
def recreate_tree(outdir, indir, infiles, action, as_hash): """Creates a new tree with only the input files in it. Arguments: outdir: Output directory to create the files in. indir: Root directory the infiles are based in. infiles: dict of files to map from |indir| to |outdir|. action: One of accepted action of file_path.link_file(). as_hash: Output filename is the hash instead of relfile. """ logging.info( 'recreate_tree(outdir=%s, indir=%s, files=%d, action=%s, as_hash=%s)' % (outdir, indir, len(infiles), action, as_hash)) assert os.path.isabs(outdir) and outdir == os.path.normpath(outdir), outdir if not os.path.isdir(outdir): logging.info('Creating %s' % outdir) fs.makedirs(outdir) for relfile, metadata in infiles.iteritems(): infile = os.path.join(indir, relfile) if as_hash: # Do the hashtable specific checks. if 'l' in metadata: # Skip links when storing a hashtable. continue outfile = os.path.join(outdir, metadata['h']) if os.path.isfile(outfile): # Just do a quick check that the file size matches. No need to stat() # again the input file, grab the value from the dict. if not 's' in metadata: raise isolated_format.MappingError( 'Misconfigured item %s: %s' % (relfile, metadata)) if metadata['s'] == fs.stat(outfile).st_size: continue else: logging.warn('Overwritting %s' % metadata['h']) fs.remove(outfile) else: outfile = os.path.join(outdir, relfile) outsubdir = os.path.dirname(outfile) if not os.path.isdir(outsubdir): fs.makedirs(outsubdir) if 'l' in metadata: pointed = metadata['l'] logging.debug('Symlink: %s -> %s' % (outfile, pointed)) # symlink doesn't exist on Windows. fs.symlink(pointed, outfile) # pylint: disable=E1101 else: file_path.link_file(outfile, infile, action)
def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE): """Deletes cache file from the file system.""" self._lock.assert_locked() try: if size == UNKNOWN_FILE_SIZE: try: size = fs.stat(self._path(digest)).st_size except OSError: size = 0 file_path.try_remove(self._path(digest)) self._evicted.append(size) self._free_disk += size except OSError as e: if e.errno != errno.ENOENT: logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
def is_valid_file(path, size): """Returns if the given files appears valid. Currently it just checks the file exists and its size matches the expectation. """ if size == UNKNOWN_FILE_SIZE: return fs.isfile(path) try: actual_size = fs.stat(path).st_size except OSError as e: logging.warning('Can\'t read item %s, assuming it\'s invalid: %s', os.path.basename(path), e) return False if size != actual_size: logging.warning('Found invalid item %s; %d != %d', os.path.basename(path), actual_size, size) return False return True
def _add(self, digest, size=UNKNOWN_FILE_SIZE): """Adds an item into LRU cache marking it as a newest one.""" self._lock.assert_locked() if size == UNKNOWN_FILE_SIZE: size = fs.stat(self._path(digest)).st_size self._added.append(size) self._lru.add(digest, size) self._free_disk -= size # Do a quicker version of self._trim(). It only enforces free disk space, # not cache size limits. It doesn't actually look at real free disk space, # only uses its cache values. self._trim() will be called later to enforce # real trimming but doing this quick version here makes it possible to map # an isolated that is larger than the current amount of free disk space when # the cache size is already large. while (self.policies.min_free_space and self._lru and self._free_disk < self.policies.min_free_space): if self._remove_lru_file(False) == -1: break
def readable_copy(outfile, infile): """Makes a copy of the file that is readable by everyone.""" fs.copy2(infile, outfile) fs.chmod( outfile, fs.stat(outfile).st_mode | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
def try_remove(p): try: self._free_disk += fs.stat(p).st_size except OSError: pass return old_try_remove(p)
def file_to_metadata(filepath, prevdict, read_only, algo, collapse_symlinks): """Processes an input file, a dependency, and return meta data about it. Behaviors: - Retrieves the file mode, file size, file timestamp, file link destination if it is a file link and calcultate the SHA-1 of the file's content if the path points to a file and not a symlink. Arguments: filepath: File to act on. prevdict: the previous dictionary. It is used to retrieve the cached hash to skip recalculating the hash. Optional. read_only: If 1 or 2, the file mode is manipulated. In practice, only save one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On windows, mode is not set since all files are 'executable' by default. algo: Hashing algorithm used. collapse_symlinks: True if symlinked files should be treated like they were the normal underlying file. Returns: The necessary dict to create a entry in the 'files' section of an .isolated file. """ # TODO(maruel): None is not a valid value. assert read_only in (None, 0, 1, 2), read_only out = {} # Always check the file stat and check if it is a link. The timestamp is used # to know if the file's content/symlink destination should be looked into. # E.g. only reuse from prevdict if the timestamp hasn't changed. # There is the risk of the file's timestamp being reset to its last value # manually while its content changed. We don't protect against that use case. try: if collapse_symlinks: # os.stat follows symbolic links filestats = fs.stat(filepath) else: # os.lstat does not follow symbolic links, and thus preserves them. filestats = fs.lstat(filepath) except OSError: # The file is not present. raise MappingError('%s is missing' % filepath) is_link = stat.S_ISLNK(filestats.st_mode) if sys.platform != 'win32': # Ignore file mode on Windows since it's not really useful there. filemode = stat.S_IMODE(filestats.st_mode) # Remove write access for group and all access to 'others'. filemode &= ~(stat.S_IWGRP | stat.S_IRWXO) if read_only: filemode &= ~stat.S_IWUSR if filemode & (stat.S_IXUSR | stat.S_IRGRP) == (stat.S_IXUSR | stat.S_IRGRP): # Only keep x group bit if both x user bit and group read bit are set. filemode |= stat.S_IXGRP else: filemode &= ~stat.S_IXGRP if not is_link: out['m'] = filemode # Used to skip recalculating the hash or link destination. Use the most recent # update time. out['t'] = int(round(filestats.st_mtime)) if not is_link: out['s'] = filestats.st_size # If the timestamp wasn't updated and the file size is still the same, carry # on the hash. if (prevdict.get('t') == out['t'] and prevdict.get('s') == out['s']): # Reuse the previous hash if available. out['h'] = prevdict.get('h') if not out.get('h'): out['h'] = hash_file(filepath, algo) else: # If the timestamp wasn't updated, carry on the link destination. if prevdict.get('t') == out['t']: # Reuse the previous link destination if available. out['l'] = prevdict.get('l') if out.get('l') is None: # The link could be in an incorrect path case. In practice, this only # happen on OSX on case insensitive HFS. # TODO(maruel): It'd be better if it was only done once, in # expand_directory_and_symlink(), so it would not be necessary to do again # here. symlink_value = fs.readlink(filepath) # pylint: disable=E1101 filedir = file_path.get_native_path_case(os.path.dirname(filepath)) native_dest = file_path.fix_native_path_case( filedir, symlink_value) out['l'] = os.path.relpath(native_dest, filedir) return out