def test_clean_cache(self): dest_dir = os.path.join(self.tempdir, 'dest') cache = self.get_cache(_get_policies()) self.assertEqual([], fs.listdir(cache.cache_dir)) a_path = os.path.join(dest_dir, u'a') b_path = os.path.join(dest_dir, u'b') self.assertEqual(0, cache.install(a_path, u'1')) self.assertEqual(0, cache.install(b_path, u'2')) self.assertEqual( False, fs.exists(os.path.join(cache.cache_dir, cache.NAMED_DIR))) self.assertEqual({u'a', u'b'}, set(fs.listdir(dest_dir))) self.assertFalse(cache.available) self.assertEqual([cache.STATE_FILE], fs.listdir(cache.cache_dir)) write_file(os.path.join(a_path, u'x'), u'x') write_file(os.path.join(b_path, u'y'), u'y') self.assertEqual(1, cache.uninstall(a_path, u'1')) self.assertEqual(1, cache.uninstall(b_path, u'2')) self.assertEqual(4, len(fs.listdir(cache.cache_dir))) path1 = os.path.join(cache.cache_dir, cache._lru['1'][0]) self.assertEqual('x', read_file(os.path.join(path1, u'x'))) path2 = os.path.join(cache.cache_dir, cache._lru['2'][0]) self.assertEqual('y', read_file(os.path.join(path2, u'y'))) self.assertEqual(os.path.join(u'..', cache._lru['1'][0]), fs.readlink(cache._get_named_path('1'))) self.assertEqual(os.path.join(u'..', cache._lru['2'][0]), fs.readlink(cache._get_named_path('2'))) self.assertEqual( [u'1', u'2'], sorted(fs.listdir(os.path.join(cache.cache_dir, cache.NAMED_DIR))))
def test_symlink_missing_destination_abs(self): # A symlink to a missing destination is valid and can be read back. filepath = os.path.join(self.tempdir, 'file') linkfile = os.path.join(self.tempdir, 'lf') fs.symlink(filepath, linkfile) self.assertEqual(True, fs.islink(linkfile)) self.assertEqual(filepath, fs.readlink(linkfile))
def test_symlink_absolute(self): # A symlink to an absolute path is valid. # /dir # /dir/file # /ld -> /dir # /lf -> /ld/file dirpath = os.path.join(self.tempdir, 'dir') filepath = os.path.join(dirpath, 'file') fs.mkdir(dirpath) write_content(filepath, b'hello') linkfile = os.path.join(self.tempdir, 'lf') linkdir = os.path.join(self.tempdir, 'ld') dstfile = os.path.join(linkdir, 'file') fs.symlink(dstfile, linkfile) fs.symlink(dirpath, linkdir) self.assertEqual(True, fs.islink(linkfile)) self.assertEqual(True, fs.islink(linkdir)) self.assertEqual(dstfile, fs.readlink(linkfile)) self.assertEqual(dirpath, fs.readlink(linkdir)) self.assertEqual(['file'], fs.listdir(linkdir)) # /lf resolves to /dir/file. with fs.open(linkfile) as f: self.assertEqual('hello', f.read()) # Ensures that followlinks is respected in walk(). expected = [ (self.tempdir, ['dir', 'ld'], ['lf']), (dirpath, [], ['file']), ] actual = [ (r, sorted(d), sorted(f)) for r, d, f in sorted(fs.walk(self.tempdir, followlinks=False)) ] self.assertEqual(expected, actual) expected = [ (self.tempdir, ['dir', 'ld'], ['lf']), (dirpath, [], ['file']), (linkdir, [], ['file']), ] actual = [ (r, sorted(d), sorted(f)) for r, d, f in sorted(fs.walk(self.tempdir, followlinks=True)) ] self.assertEqual(expected, actual)
def file_to_metadata(filepath, collapse_symlinks): """Processes an input file, a dependency, and return meta data about it. Behaviors: - Retrieves the file mode, file size, file timestamp, file link destination if it is a file link and calcultate the SHA-1 of the file's content if the path points to a file and not a symlink. Arguments: filepath: File to act on. collapse_symlinks: True if symlinked files should be treated like they were the normal underlying file. Returns: The necessary dict to create a entry in the 'files' section of an .isolated file *except* 'h' for files. """ out = {} # Always check the file stat and check if it is a link. try: if collapse_symlinks: # os.stat follows symbolic links filestats = fs.stat(filepath) else: # os.lstat does not follow symbolic links, and thus preserves them. filestats = fs.lstat(filepath) except OSError: # The file is not present. raise MappingError('%s is missing' % filepath) is_link = stat.S_ISLNK(filestats.st_mode) if sys.platform != 'win32': # Ignore file mode on Windows since it's not really useful there. filemode = stat.S_IMODE(filestats.st_mode) # Remove write access for group and all access to 'others'. filemode &= ~(stat.S_IWGRP | stat.S_IRWXO) if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP): # Only keep x group bit if both x user bit and group read bit are set. filemode |= stat.S_IXGRP else: filemode &= ~stat.S_IXGRP if not is_link: out['m'] = filemode if not is_link: out['s'] = filestats.st_size else: # The link could be in an incorrect path case. In practice, this only # happens on macOS on case insensitive HFS. # TODO(maruel): It'd be better if it was only done once, in # expand_directory_and_symlink(), so it would not be necessary to do again # here. symlink_value = fs.readlink(filepath) # pylint: disable=no-member filedir = file_path.get_native_path_case(os.path.dirname(filepath)) native_dest = file_path.fix_native_path_case(filedir, symlink_value) out['l'] = os.path.relpath(native_dest, filedir) return out
def test_existing_cache(self): # Ensures that the code does what is expected under number use. dest_dir = os.path.join(self.tempdir, 'dest') cache = self.get_cache(_get_policies()) # Assume test_clean passes. a_path = os.path.join(dest_dir, u'a') b_path = os.path.join(dest_dir, u'b') self.assertEqual(0, cache.install(a_path, u'1')) write_file(os.path.join(dest_dir, u'a', u'x'), u'x') self.assertEqual(1, cache.uninstall(a_path, u'1')) # Test starts here. self.assertEqual(1, cache.install(a_path, u'1')) self.assertEqual(0, cache.install(b_path, u'2')) self.assertEqual({'a', 'b'}, set(fs.listdir(dest_dir))) self.assertFalse(cache.available) self.assertEqual(sorted([cache.NAMED_DIR, cache.STATE_FILE]), sorted(fs.listdir(cache.cache_dir))) self.assertEqual([], fs.listdir( os.path.join(cache.cache_dir, cache.NAMED_DIR))) self.assertEqual('x', read_file(os.path.join(dest_dir, u'a', u'x'))) write_file(os.path.join(a_path, 'x'), 'x2') write_file(os.path.join(b_path, 'y'), 'y') self.assertEqual(2, cache.uninstall(a_path, '1')) self.assertEqual(1, cache.uninstall(b_path, '2')) self.assertEqual(4, len(fs.listdir(cache.cache_dir))) path1 = os.path.join(cache.cache_dir, cache._lru['1'][0]) self.assertEqual('x2', read_file(os.path.join(path1, 'x'))) path2 = os.path.join(cache.cache_dir, cache._lru['2'][0]) self.assertEqual('y', read_file(os.path.join(path2, 'y'))) self.assertEqual(os.path.join(u'..', cache._lru['1'][0]), fs.readlink(cache._get_named_path('1'))) self.assertEqual(os.path.join(u'..', cache._lru['2'][0]), fs.readlink(cache._get_named_path('2'))) self.assertEqual( [u'1', u'2'], sorted(fs.listdir(os.path.join(cache.cache_dir, cache.NAMED_DIR))))
def _prepare_named_cache(self, cache): self._prepare_cache(cache) # Figure out the short names via the symlinks. items = range(1, 11) short_names = { n: os.path.basename( fs.readlink( os.path.join(cache.cache_dir, cache.NAMED_DIR, unicode(n)))) for n in items } self._verify_named_cache(cache, short_names, items) return short_names
def copy_recursively(src, dst): """Efficiently copies a file or directory from src_dir to dst_dir. `item` may be a file, directory, or a symlink to a file or directory. All symlinks are replaced with their targets, so the resulting directory structure in dst_dir will never have any symlinks. To increase speed, copy_recursively hardlinks individual files into the (newly created) directory structure if possible, unlike Python's shutil.copytree(). """ orig_src = src try: # Replace symlinks with their final target. while fs.islink(src): res = fs.readlink(src) src = os.path.join(os.path.dirname(src), res) # TODO(sadafm): Explicitly handle cyclic symlinks. # Note that fs.isfile (which is a wrapper around os.path.isfile) throws # an exception if src does not exist. A warning will be logged in that case. if fs.isfile(src): file_path.link_file(dst, src, file_path.HARDLINK_WITH_FALLBACK) return if not fs.exists(dst): os.makedirs(dst) for child in fs.listdir(src): copy_recursively(os.path.join(src, child), os.path.join(dst, child)) except OSError as e: if e.errno == errno.ENOENT: logging.warning('Path %s does not exist or %s is a broken symlink', src, orig_src) else: logging.info("Couldn't collect output file %s: %s", src, e)
def _expand_symlinks(indir, relfile): """Finds symlinks in relfile. Follows symlinks in |relfile|, but treating symlinks that point outside the build tree as if they were ordinary directories/files. Returns the final symlink-free target and a list of paths to symlinks encountered in the process. The rule about symlinks outside the build tree is for the benefit of the Chromium OS ebuild, which symlinks the output directory to an unrelated path in the chroot. Fails when a directory loop is detected, although in theory we could support that case. Arguments: - indir: base directory; symlinks in indir are not processed; this is the base directory that is considered 'outside of the tree'. - relfile: part of the path to expand symlink. Returns: tuple(relfile, list(symlinks)): relfile is real path of relfile where all symlinks were evaluated. symlinks if the chain of symlinks found along the way, if any. """ is_directory = relfile.endswith(os.path.sep) done = indir todo = relfile.strip(os.path.sep) symlinks = [] while todo: pre_symlink, symlink, post_symlink = file_path.split_at_symlink( done, todo) if not symlink: todo = file_path.fix_native_path_case(done, todo) done = os.path.join(done, todo) break symlink_path = os.path.join(done, pre_symlink, symlink) post_symlink = post_symlink.lstrip(os.path.sep) # readlink doesn't exist on Windows. # pylint: disable=E1101 target = os.path.normpath(os.path.join(done, pre_symlink)) symlink_target = fs.readlink(symlink_path) if os.path.isabs(symlink_target): # Absolute path are considered a normal directories. The use case is # generally someone who puts the output directory on a separate drive. target = symlink_target else: # The symlink itself could be using the wrong path case. target = file_path.fix_native_path_case(target, symlink_target) if not fs.exists(target): raise MappingError('Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target)) target = file_path.get_native_path_case(target) if not file_path.path_starts_with(indir, target): done = symlink_path todo = post_symlink continue if file_path.path_starts_with(target, symlink_path): raise MappingError( 'Can\'t map recursive symlink reference %s -> %s' % (symlink_path, target)) logging.info('Found symlink: %s -> %s', symlink_path, target) symlinks.append(os.path.relpath(symlink_path, indir)) # Treat the common prefix of the old and new paths as done, and start # scanning again. target = target.split(os.path.sep) symlink_path = symlink_path.split(os.path.sep) prefix_length = 0 for target_piece, symlink_path_piece in zip(target, symlink_path): if target_piece != symlink_path_piece: break prefix_length += 1 done = os.path.sep.join(target[:prefix_length]) todo = os.path.join(os.path.sep.join(target[prefix_length:]), post_symlink) relfile = os.path.relpath(done, indir) relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep return relfile, symlinks
def file_to_metadata(filepath, prevdict, read_only, algo, collapse_symlinks): """Processes an input file, a dependency, and return meta data about it. Behaviors: - Retrieves the file mode, file size, file timestamp, file link destination if it is a file link and calcultate the SHA-1 of the file's content if the path points to a file and not a symlink. Arguments: filepath: File to act on. prevdict: the previous dictionary. It is used to retrieve the cached hash to skip recalculating the hash. Optional. read_only: If 1 or 2, the file mode is manipulated. In practice, only save one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On windows, mode is not set since all files are 'executable' by default. algo: Hashing algorithm used. collapse_symlinks: True if symlinked files should be treated like they were the normal underlying file. Returns: The necessary dict to create a entry in the 'files' section of an .isolated file. """ # TODO(maruel): None is not a valid value. assert read_only in (None, 0, 1, 2), read_only out = {} # Always check the file stat and check if it is a link. The timestamp is used # to know if the file's content/symlink destination should be looked into. # E.g. only reuse from prevdict if the timestamp hasn't changed. # There is the risk of the file's timestamp being reset to its last value # manually while its content changed. We don't protect against that use case. try: if collapse_symlinks: # os.stat follows symbolic links filestats = fs.stat(filepath) else: # os.lstat does not follow symbolic links, and thus preserves them. filestats = fs.lstat(filepath) except OSError: # The file is not present. raise MappingError('%s is missing' % filepath) is_link = stat.S_ISLNK(filestats.st_mode) if sys.platform != 'win32': # Ignore file mode on Windows since it's not really useful there. filemode = stat.S_IMODE(filestats.st_mode) # Remove write access for group and all access to 'others'. filemode &= ~(stat.S_IWGRP | stat.S_IRWXO) if read_only: filemode &= ~stat.S_IWUSR if filemode & (stat.S_IXUSR | stat.S_IRGRP) == (stat.S_IXUSR | stat.S_IRGRP): # Only keep x group bit if both x user bit and group read bit are set. filemode |= stat.S_IXGRP else: filemode &= ~stat.S_IXGRP if not is_link: out['m'] = filemode # Used to skip recalculating the hash or link destination. Use the most recent # update time. out['t'] = int(round(filestats.st_mtime)) if not is_link: out['s'] = filestats.st_size # If the timestamp wasn't updated and the file size is still the same, carry # on the hash. if (prevdict.get('t') == out['t'] and prevdict.get('s') == out['s']): # Reuse the previous hash if available. out['h'] = prevdict.get('h') if not out.get('h'): out['h'] = hash_file(filepath, algo) else: # If the timestamp wasn't updated, carry on the link destination. if prevdict.get('t') == out['t']: # Reuse the previous link destination if available. out['l'] = prevdict.get('l') if out.get('l') is None: # The link could be in an incorrect path case. In practice, this only # happen on OSX on case insensitive HFS. # TODO(maruel): It'd be better if it was only done once, in # expand_directory_and_symlink(), so it would not be necessary to do again # here. symlink_value = fs.readlink(filepath) # pylint: disable=E1101 filedir = file_path.get_native_path_case(os.path.dirname(filepath)) native_dest = file_path.fix_native_path_case( filedir, symlink_value) out['l'] = os.path.relpath(native_dest, filedir) return out
def cleanup(self): """Removes unknown directories. Does not recalculate the cache size since it's surprisingly slow on some OSes. """ success = True with self._lock: try: actual = set(fs.listdir(self.cache_dir)) actual.discard(self.NAMED_DIR) actual.discard(self.STATE_FILE) expected = {v[0]: k for k, v in self._lru.iteritems()} # First, handle the actual cache content. # Remove missing entries. for missing in (set(expected) - actual): self._lru.pop(expected[missing]) # Remove unexpected items. for unexpected in (actual - set(expected)): try: p = os.path.join(self.cache_dir, unexpected) if fs.isdir(p) and not fs.islink(p): file_path.rmtree(p) else: fs.remove(p) except (IOError, OSError) as e: logging.error('Failed to remove %s: %s', unexpected, e) success = False # Second, fix named cache links. named = os.path.join(self.cache_dir, self.NAMED_DIR) if os.path.isdir(named): actual = set(fs.listdir(named)) expected = set(self._lru) # Confirm entries. Do not add missing ones for now. for name in expected.intersection(actual): p = os.path.join(self.cache_dir, self.NAMED_DIR, name) expected_link = os.path.join(self.cache_dir, self._lru[name][0]) if fs.islink(p): if sys.platform == 'win32': # TODO(maruel): Implement readlink() on Windows in fs.py, then # remove this condition. # https://crbug.com/853721 continue link = fs.readlink(p) if expected_link == link: continue logging.warning( 'Unexpected symlink for cache %s: %s, expected %s', name, link, expected_link) else: logging.warning( 'Unexpected non symlink for cache %s', name) if fs.isdir(p) and not fs.islink(p): file_path.rmtree(p) else: fs.remove(p) # Remove unexpected items. for unexpected in (actual - expected): try: p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected) if fs.isdir(p): file_path.rmtree(p) else: fs.remove(p) except (IOError, OSError) as e: logging.error('Failed to remove %s: %s', unexpected, e) success = False finally: self._save() return success
def test_readlink_fail(self): # Reading a non-existing symlink fails. Obvious but it's to make sure the # Windows part acts the same. with self.assertRaises(OSError): fs.readlink(os.path.join(self.tempdir, 'not_there'))