def test_save_named(self): cache = self.get_cache(_get_policies()) self.assertEqual([], sorted(fs.listdir(cache.cache_dir))) self._add_one_item(cache, 2) with fs.open(os.path.join(cache.cache_dir, cache.STATE_FILE)) as f: old_content = json.load(f) # It's immediately saved. items = lru.LRUDict.load( os.path.join(cache.cache_dir, cache.STATE_FILE)) self.assertEqual(1, len(items)) _key, (v, _timestamp) = items.get_oldest() # This depends on the inner format as generated by NamedCache. entry_dir_name = v[0] self.assertEqual( sorted([entry_dir_name, cache.NAMED_DIR, cache.STATE_FILE]), sorted(fs.listdir(cache.cache_dir))) cache.save() self.assertEqual( sorted([entry_dir_name, cache.NAMED_DIR, cache.STATE_FILE]), sorted(fs.listdir(cache.cache_dir))) with fs.open(os.path.join(cache.cache_dir, cache.STATE_FILE)) as f: new_content = json.load(f) # That's because uninstall() called from self._add_one_item() # causes an implicit save(). See uninstall() comments for more details. self.assertEqual(new_content, old_content)
def test_clean_cache(self): dest_dir = os.path.join(self.tempdir, 'dest') cache = self.get_cache(_get_policies()) self.assertEqual([], fs.listdir(cache.cache_dir)) a_path = os.path.join(dest_dir, u'a') b_path = os.path.join(dest_dir, u'b') self.assertEqual(0, cache.install(a_path, u'1')) self.assertEqual(0, cache.install(b_path, u'2')) self.assertEqual( False, fs.exists(os.path.join(cache.cache_dir, cache.NAMED_DIR))) self.assertEqual({u'a', u'b'}, set(fs.listdir(dest_dir))) self.assertFalse(cache.available) self.assertEqual([cache.STATE_FILE], fs.listdir(cache.cache_dir)) write_file(os.path.join(a_path, u'x'), u'x') write_file(os.path.join(b_path, u'y'), u'y') self.assertEqual(1, cache.uninstall(a_path, u'1')) self.assertEqual(1, cache.uninstall(b_path, u'2')) self.assertEqual(4, len(fs.listdir(cache.cache_dir))) path1 = os.path.join(cache.cache_dir, cache._lru['1'][0]) self.assertEqual('x', read_file(os.path.join(path1, u'x'))) path2 = os.path.join(cache.cache_dir, cache._lru['2'][0]) self.assertEqual('y', read_file(os.path.join(path2, u'y'))) self.assertEqual(os.path.join(u'..', cache._lru['1'][0]), fs.readlink(cache._get_named_path('1'))) self.assertEqual(os.path.join(u'..', cache._lru['2'][0]), fs.readlink(cache._get_named_path('2'))) self.assertEqual( [u'1', u'2'], sorted(fs.listdir(os.path.join(cache.cache_dir, cache.NAMED_DIR))))
def test_cleanup_disk(self): # Inject an item without a state.json, one is lost. Both will be deleted on # cleanup. self._free_disk = 1003 cache = self.get_cache(_get_policies(min_free_space=1000)) h_foo = self._algo('foo').hexdigest() self.assertEqual([], sorted(cache._lru._items.iteritems())) cache.write(h_foo, ['foo']) self.assertEqual([], cache.trim()) self.assertEqual([h_foo], [i[0] for i in cache._lru._items.iteritems()]) h_a = self._algo('a').hexdigest() local_caching.file_write(os.path.join(cache.cache_dir, h_a), 'a') # file_path.remove() explicitly handle the +R bit on Windows. file_path.remove(os.path.join(cache.cache_dir, h_foo)) # Still hasn't realized that the file is missing. self.assertEqual([h_foo], [i[0] for i in cache._lru._items.iteritems()]) self.assertEqual(sorted([h_a, cache.STATE_FILE]), sorted(fs.listdir(cache.cache_dir))) cache.cleanup() self.assertEqual([cache.STATE_FILE], fs.listdir(cache.cache_dir))
def test_cleanup_unexpected(self): os.mkdir(self.cache_dir) with open(os.path.join(self.cache_dir, u'junk'), 'w') as f: f.write('random') cache = self.get_cache(_get_policies()) self.assertEqual(['junk'], fs.listdir(cache.cache_dir)) self.assertEqual(True, cache.cleanup()) self.assertEqual([cache.STATE_FILE], fs.listdir(cache.cache_dir))
def delete_and_upload(storage, out_dir, leak_temp_dir): """Deletes the temporary run directory and uploads results back. Returns: tuple(outputs_ref, success, stats) - outputs_ref: a dict referring to the results archived back to the isolated server, if applicable. - success: False if something occurred that means that the task must forcibly be considered a failure, e.g. zombie processes were left behind. - stats: uploading stats. """ # Upload out_dir and generate a .isolated file out of this directory. It is # only done if files were written in the directory. outputs_ref = None cold = [] hot = [] start = time.time() if fs.isdir(out_dir) and fs.listdir(out_dir): with tools.Profiler('ArchiveOutput'): try: results, f_cold, f_hot = isolateserver.archive_files_to_storage( storage, [out_dir], None) outputs_ref = { 'isolated': results[0][0], 'isolatedserver': storage.location, 'namespace': storage.namespace, } cold = sorted(i.size for i in f_cold) hot = sorted(i.size for i in f_hot) except isolateserver.Aborted: # This happens when a signal SIGTERM was received while uploading data. # There is 2 causes: # - The task was too slow and was about to be killed anyway due to # exceeding the hard timeout. # - The amount of data uploaded back is very large and took too much # time to archive. sys.stderr.write('Received SIGTERM while uploading') # Re-raise, so it will be treated as an internal failure. raise success = False try: if (not leak_temp_dir and fs.isdir(out_dir) and not file_path.rmtree(out_dir)): logging.error('Had difficulties removing out_dir %s', out_dir) else: success = True except OSError as e: # When this happens, it means there's a process error. logging.exception('Had difficulties removing out_dir %s: %s', out_dir, e) stats = { 'duration': time.time() - start, 'items_cold': base64.b64encode(large.pack(cold)), 'items_hot': base64.b64encode(large.pack(hot)), } return outputs_ref, success, stats
def CMDremap(parser, args): """Creates a directory with all the dependencies mapped into it. Useful to test manually why a test is failing. The target executable is not run. """ add_isolate_options(parser) add_outdir_options(parser) add_skip_refresh_option(parser) options, args = parser.parse_args(args) if args: parser.error('Unsupported argument: %s' % args) cwd = os.getcwd() process_isolate_options(parser, options, cwd, require_isolated=False) process_outdir_options(parser, options, cwd) complete_state = load_complete_state(options, cwd, None, options.skip_refresh) file_path.ensure_tree(options.outdir) print('Remapping into %s' % options.outdir) if fs.listdir(options.outdir): raise ExecutionError('Can\'t remap in a non-empty directory') create_isolate_tree(options.outdir, complete_state.root_dir, complete_state.saved_state.files, complete_state.saved_state.relative_cwd, complete_state.saved_state.read_only) if complete_state.isolated_filepath: complete_state.save_files() return 0
def CMDremap(parser, args): """Creates a directory with all the dependencies mapped into it. Useful to test manually why a test is failing. The target executable is not run. """ add_isolate_options(parser) add_outdir_options(parser) add_skip_refresh_option(parser) options, args = parser.parse_args(args) if args: parser.error('Unsupported argument: %s' % args) cwd = os.getcwd() process_isolate_options(parser, options, cwd, require_isolated=False) process_outdir_options(parser, options, cwd) complete_state = load_complete_state(options, cwd, None, options.skip_refresh) if not fs.isdir(options.outdir): fs.makedirs(options.outdir) print('Remapping into %s' % options.outdir) if fs.listdir(options.outdir): raise ExecutionError('Can\'t remap in a non-empty directory') create_isolate_tree( options.outdir, complete_state.root_dir, complete_state.saved_state.files, complete_state.saved_state.relative_cwd, complete_state.saved_state.read_only) if complete_state.isolated_filepath: complete_state.save_files() return 0
def test_native_case_alternate_datastream(self): # Create the file manually, since tempfile doesn't support ADS. tempdir = unicode(tempfile.mkdtemp(prefix=u'trace_inputs')) try: tempdir = file_path.get_native_path_case(tempdir) basename = 'foo.txt' filename = basename + ':Zone.Identifier' filepath = os.path.join(tempdir, filename) open(filepath, 'w').close() self.assertEqual(filepath, file_path.get_native_path_case(filepath)) data_suffix = ':$DATA' self.assertEqual( filepath + data_suffix, file_path.get_native_path_case(filepath + data_suffix)) open(filepath + '$DATA', 'w').close() self.assertEqual( filepath + data_suffix, file_path.get_native_path_case(filepath + data_suffix)) # Ensure the ADS weren't created as separate file. You love NTFS, don't # you? self.assertEqual([basename], fs.listdir(tempdir)) finally: file_path.rmtree(tempdir)
def test_cleanup_incorrect_link(self): cache = self.get_cache(_get_policies()) self._add_one_item(cache, 1) self._add_one_item(cache, 2) fs.remove(os.path.join(self.cache_dir, cache.NAMED_DIR, u'1')) fs.remove(os.path.join(self.cache_dir, cache.NAMED_DIR, u'2')) fs.symlink('invalid_dest', os.path.join(self.cache_dir, cache.NAMED_DIR, u'1')) os.mkdir(os.path.join(self.cache_dir, cache.NAMED_DIR, u'2')) cache = self.get_cache(_get_policies()) self.assertEqual( ['1', '2'], sorted(fs.listdir(os.path.join(cache.cache_dir, cache.NAMED_DIR)))) self.assertEqual(True, cache.cleanup()) self.assertEqual([], fs.listdir( os.path.join(cache.cache_dir, cache.NAMED_DIR)))
def test_save_disk(self): cache = self.get_cache(_get_policies()) self.assertEqual(sorted([cache.STATE_FILE]), sorted(fs.listdir(cache.cache_dir))) h = self._add_one_item(cache, 2) self.assertEqual(sorted([h, cache.STATE_FILE]), sorted(fs.listdir(cache.cache_dir))) items = lru.LRUDict.load( os.path.join(cache.cache_dir, cache.STATE_FILE)) self.assertEqual(0, len(items)) cache.save() self.assertEqual(sorted([h, cache.STATE_FILE]), sorted(fs.listdir(cache.cache_dir))) items = lru.LRUDict.load( os.path.join(cache.cache_dir, cache.STATE_FILE)) self.assertEqual(1, len(items)) self.assertEqual((h, [2, 1000]), items.get_oldest())
def test_existing_cache(self): # Ensures that the code does what is expected under number use. dest_dir = os.path.join(self.tempdir, 'dest') cache = self.get_cache(_get_policies()) # Assume test_clean passes. a_path = os.path.join(dest_dir, u'a') b_path = os.path.join(dest_dir, u'b') self.assertEqual(0, cache.install(a_path, u'1')) write_file(os.path.join(dest_dir, u'a', u'x'), u'x') self.assertEqual(1, cache.uninstall(a_path, u'1')) # Test starts here. self.assertEqual(1, cache.install(a_path, u'1')) self.assertEqual(0, cache.install(b_path, u'2')) self.assertEqual({'a', 'b'}, set(fs.listdir(dest_dir))) self.assertFalse(cache.available) self.assertEqual(sorted([cache.NAMED_DIR, cache.STATE_FILE]), sorted(fs.listdir(cache.cache_dir))) self.assertEqual([], fs.listdir( os.path.join(cache.cache_dir, cache.NAMED_DIR))) self.assertEqual('x', read_file(os.path.join(dest_dir, u'a', u'x'))) write_file(os.path.join(a_path, 'x'), 'x2') write_file(os.path.join(b_path, 'y'), 'y') self.assertEqual(2, cache.uninstall(a_path, '1')) self.assertEqual(1, cache.uninstall(b_path, '2')) self.assertEqual(4, len(fs.listdir(cache.cache_dir))) path1 = os.path.join(cache.cache_dir, cache._lru['1'][0]) self.assertEqual('x2', read_file(os.path.join(path1, 'x'))) path2 = os.path.join(cache.cache_dir, cache._lru['2'][0]) self.assertEqual('y', read_file(os.path.join(path2, 'y'))) self.assertEqual(os.path.join(u'..', cache._lru['1'][0]), fs.readlink(cache._get_named_path('1'))) self.assertEqual(os.path.join(u'..', cache._lru['2'][0]), fs.readlink(cache._get_named_path('2'))) self.assertEqual( [u'1', u'2'], sorted(fs.listdir(os.path.join(cache.cache_dir, cache.NAMED_DIR))))
def test_trim(self): cache = self.get_cache(_get_policies(max_items=2)) item_count = 12 for i in xrange(item_count): self._add_one_item(cache, i + 1) self.assertEqual(len(cache), item_count) self.assertEqual([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], cache.trim()) self.assertEqual(len(cache), 2) self.assertEqual( ['11', '12'], sorted(fs.listdir(os.path.join(cache.cache_dir, cache.NAMED_DIR))))
def test_cleanup_disk_evict_corrupted_files(self): self._free_disk = 1003 cache = self.get_cache(_get_policies(min_free_space=1000)) # add a corrupted item h_a = self._algo(b'a').hexdigest() cache.write(h_a, [b'A']) h_b = self._algo(b'b').hexdigest() cache.write(h_b, [b'b']) mtime_a = self._now mtime_b = self._now def _get_mtime(h): if h == h_a: return mtime_a if h == h_b: return mtime_b self.mock(cache, '_get_mtime', _get_mtime) self.assertEqual([(h_a, (1, mtime_a)), (h_b, (1, mtime_b))], list(cache._lru._items.items())) six.assertCountEqual(self, ([h_a, h_b, cache.STATE_FILE]), (fs.listdir(cache.cache_dir))) # if the mtime is same with the timestamp in state.json, # the varification won't run. cache.cleanup() six.assertCountEqual(self, ([h_a, h_b, cache.STATE_FILE]), (fs.listdir(cache.cache_dir))) # if the mtime is after the timestamp in the state.json # the varification will run and removed the corrupted file. mtime_a += 1 mtime_b += 1 self.mock(cache._lru, 'time_fn', lambda: mtime_b) cache.cleanup() six.assertCountEqual(self, ([h_b, cache.STATE_FILE]), (fs.listdir(cache.cache_dir))) six.assertCountEqual(self, [(h_b, (1, mtime_b))], cache._lru._items.items())
def test_invalid_state(self): file_path.ensure_tree(self.cache_dir()) statefile = os.path.join( self.cache_dir(), local_caching.DiskContentAddressedCache.STATE_FILE) with open(statefile, 'w') as f: f.write('invalid') with open(os.path.join(self.cache_dir(), 'invalid'), 'w') as f: f.write('invalid') _ = self.get_cache(_get_policies()) self.assertEqual(fs.listdir(self.cache_dir()), ['state.json'])
def test_cycle_twice(self): # Ensure that named symlink works. cache = self.get_cache(_get_policies()) dest_dir = os.path.join(self.tempdir, 'dest') self.assertEqual(0, cache.install(dest_dir, u'1')) with fs.open(os.path.join(dest_dir, u'hi'), 'wb') as f: f.write('hello') self.assertEqual(5, cache.uninstall(dest_dir, u'1')) self.assertEqual([u'1'], fs.listdir( os.path.join(cache.cache_dir, cache.NAMED_DIR))) self.assertEqual(True, cache.cleanup()) self.assertEqual(5, cache.install(dest_dir, u'1')) self.assertEqual(5, cache.uninstall(dest_dir, u'1')) self.assertEqual([u'1'], fs.listdir( os.path.join(cache.cache_dir, cache.NAMED_DIR))) self.assertEqual([u'hi'], fs.listdir( os.path.join(cache.cache_dir, cache.NAMED_DIR, u'1')))
def test_cleanup_unexpected_named(self): os.mkdir(self.cache_dir) c = local_caching.NamedCache os.mkdir(os.path.join(self.cache_dir, c.NAMED_DIR)) p = os.path.join(self.cache_dir, c.NAMED_DIR, u'junk_file') with open(p, 'w') as f: f.write('random') os.mkdir(os.path.join(self.cache_dir, c.NAMED_DIR, u'junk_dir')) fs.symlink('invalid_dest', os.path.join(self.cache_dir, c.NAMED_DIR, u'junk_link')) cache = self.get_cache(_get_policies()) self.assertEqual([cache.NAMED_DIR], fs.listdir(cache.cache_dir)) self.assertEqual( ['junk_dir', 'junk_file', 'junk_link'], sorted(fs.listdir(os.path.join(cache.cache_dir, cache.NAMED_DIR)))) self.assertEqual(True, cache.cleanup()) self.assertEqual([cache.NAMED_DIR, cache.STATE_FILE], sorted(fs.listdir(cache.cache_dir))) self.assertEqual([], fs.listdir( os.path.join(cache.cache_dir, cache.NAMED_DIR)))
def find_item_native_case(root_path, item): """Gets the native path case of a single item based at root_path. There is no API to get the native path case of symlinks on OSX. So it needs to be done the slow way. """ if item == '..': return item item = item.lower() for element in fs.listdir(root_path): if element.lower() == item: return element
def delete_and_upload(storage, out_dir, leak_temp_dir): """Deletes the temporary run directory and uploads results back. Returns: tuple(outputs_ref, success, cold, hot) - outputs_ref: a dict referring to the results archived back to the isolated server, if applicable. - success: False if something occurred that means that the task must forcibly be considered a failure, e.g. zombie processes were left behind. - cold: list of size of cold items, they had to be uploaded. - hot: list of size of hot items, they didn't have to be uploaded. """ # Upload out_dir and generate a .isolated file out of this directory. It is # only done if files were written in the directory. outputs_ref = None cold = [] hot = [] if fs.isdir(out_dir) and fs.listdir(out_dir): with tools.Profiler('ArchiveOutput'): try: results, f_cold, f_hot = isolateserver.archive_files_to_storage( storage, [out_dir], None) outputs_ref = { 'isolated': results[0][0], 'isolatedserver': storage.location, 'namespace': storage.namespace, } cold = sorted(i.size for i in f_cold) hot = sorted(i.size for i in f_hot) except isolateserver.Aborted: # This happens when a signal SIGTERM was received while uploading data. # There is 2 causes: # - The task was too slow and was about to be killed anyway due to # exceeding the hard timeout. # - The amount of data uploaded back is very large and took too much # time to archive. sys.stderr.write('Received SIGTERM while uploading') # Re-raise, so it will be treated as an internal failure. raise try: if (not leak_temp_dir and fs.isdir(out_dir) and not file_path.rmtree(out_dir)): logging.error('Had difficulties removing out_dir %s', out_dir) return outputs_ref, False, cold, hot except OSError as e: # When this happens, it means there's a process error. logging.exception('Had difficulties removing out_dir %s: %s', out_dir, e) return outputs_ref, False, cold, hot return outputs_ref, True, cold, hot
def test_symlink_absolute(self): # A symlink to an absolute path is valid. # /dir # /dir/file # /ld -> /dir # /lf -> /ld/file dirpath = os.path.join(self.tempdir, 'dir') filepath = os.path.join(dirpath, 'file') fs.mkdir(dirpath) write_content(filepath, b'hello') linkfile = os.path.join(self.tempdir, 'lf') linkdir = os.path.join(self.tempdir, 'ld') dstfile = os.path.join(linkdir, 'file') fs.symlink(dstfile, linkfile) fs.symlink(dirpath, linkdir) self.assertEqual(True, fs.islink(linkfile)) self.assertEqual(True, fs.islink(linkdir)) self.assertEqual(dstfile, fs.readlink(linkfile)) self.assertEqual(dirpath, fs.readlink(linkdir)) self.assertEqual(['file'], fs.listdir(linkdir)) # /lf resolves to /dir/file. with fs.open(linkfile) as f: self.assertEqual('hello', f.read()) # Ensures that followlinks is respected in walk(). expected = [ (self.tempdir, ['dir', 'ld'], ['lf']), (dirpath, [], ['file']), ] actual = [ (r, sorted(d), sorted(f)) for r, d, f in sorted(fs.walk(self.tempdir, followlinks=False)) ] self.assertEqual(expected, actual) expected = [ (self.tempdir, ['dir', 'ld'], ['lf']), (dirpath, [], ['file']), (linkdir, [], ['file']), ] actual = [ (r, sorted(d), sorted(f)) for r, d, f in sorted(fs.walk(self.tempdir, followlinks=True)) ] self.assertEqual(expected, actual)
def cleanup(self): """Cleans up the cache directory. Ensures there is no unknown files in cache_dir. Ensures the read-only bits are set correctly. At that point, the cache was already loaded, trimmed to respect cache policies. """ with self._lock: fs.chmod(self.cache_dir, 0700) # Ensure that all files listed in the state still exist and add new ones. previous = set(self._lru) # It'd be faster if there were a readdir() function. for filename in fs.listdir(self.cache_dir): if filename == self.STATE_FILE: fs.chmod(os.path.join(self.cache_dir, filename), 0600) continue if filename in previous: fs.chmod(os.path.join(self.cache_dir, filename), 0400) previous.remove(filename) continue # An untracked file. Delete it. logging.warning('Removing unknown file %s from cache', filename) p = self._path(filename) if fs.isdir(p): try: file_path.rmtree(p) except OSError: pass else: file_path.try_remove(p) continue if previous: # Filter out entries that were not found. logging.warning('Removed %d lost files', len(previous)) for filename in previous: self._lru.pop(filename) self._save()
def test_native_case_alternate_datastream(self): # Create the file manually, since tempfile doesn't support ADS. tempdir = unicode(tempfile.mkdtemp(prefix=u"trace_inputs")) try: tempdir = file_path.get_native_path_case(tempdir) basename = "foo.txt" filename = basename + ":Zone.Identifier" filepath = os.path.join(tempdir, filename) open(filepath, "w").close() self.assertEqual(filepath, file_path.get_native_path_case(filepath)) data_suffix = ":$DATA" self.assertEqual(filepath + data_suffix, file_path.get_native_path_case(filepath + data_suffix)) open(filepath + "$DATA", "w").close() self.assertEqual(filepath + data_suffix, file_path.get_native_path_case(filepath + data_suffix)) # Ensure the ADS weren't created as separate file. You love NTFS, don't # you? self.assertEqual([basename], fs.listdir(tempdir)) finally: file_path.rmtree(tempdir)
def test_load_corrupted_state(self): os.mkdir(self.cache_dir) c = local_caching.NamedCache with open(os.path.join(self.cache_dir, c.STATE_FILE), 'w') as f: f.write('}}}}') fs.makedirs(os.path.join(self.cache_dir, '1'), 0777) cache = self.get_cache(_get_policies()) self._add_one_item(cache, 1) self.assertTrue( fs.exists(os.path.join(cache.cache_dir, cache.NAMED_DIR, '1'))) self.assertTrue( fs.islink(os.path.join(cache.cache_dir, cache.NAMED_DIR, '1'))) self.assertEqual([], cache.trim()) self.assertTrue( fs.exists(os.path.join(cache.cache_dir, cache.NAMED_DIR, '1'))) self.assertTrue( fs.islink(os.path.join(cache.cache_dir, cache.NAMED_DIR, '1'))) self.assertEqual(True, cache.cleanup()) self.assertEqual( sorted([cache.NAMED_DIR, cache.STATE_FILE, cache._lru[u'1'][0]]), sorted(fs.listdir(cache.cache_dir)))
def copy_recursively(src, dst): """Efficiently copies a file or directory from src_dir to dst_dir. `item` may be a file, directory, or a symlink to a file or directory. All symlinks are replaced with their targets, so the resulting directory structure in dst_dir will never have any symlinks. To increase speed, copy_recursively hardlinks individual files into the (newly created) directory structure if possible, unlike Python's shutil.copytree(). """ orig_src = src try: # Replace symlinks with their final target. while fs.islink(src): res = fs.readlink(src) src = os.path.join(os.path.dirname(src), res) # TODO(sadafm): Explicitly handle cyclic symlinks. # Note that fs.isfile (which is a wrapper around os.path.isfile) throws # an exception if src does not exist. A warning will be logged in that case. if fs.isfile(src): file_path.link_file(dst, src, file_path.HARDLINK_WITH_FALLBACK) return if not fs.exists(dst): os.makedirs(dst) for child in fs.listdir(src): copy_recursively(os.path.join(src, child), os.path.join(dst, child)) except OSError as e: if e.errno == errno.ENOENT: logging.warning('Path %s does not exist or %s is a broken symlink', src, orig_src) else: logging.info("Couldn't collect output file %s: %s", src, e)
def test_upgrade(self): # Make sure upgrading works. This is temporary as eventually all bots will # be updated. now = time.time() fs.mkdir(self.cache_dir) fs.mkdir(os.path.join(self.cache_dir, 'f1')) with fs.open(os.path.join(self.cache_dir, 'f1', 'hello'), 'wb') as f: f.write('world') # v1 old = { 'version': 2, 'items': [ ['cache1', ['f1', now]], ], } c = local_caching.NamedCache with fs.open(os.path.join(self.cache_dir, c.STATE_FILE), 'w') as f: json.dump(old, f) # It automatically upgrades to v2. cache = self.get_cache(_get_policies()) expected = {u'cache1': ((u'f1', len('world')), now)} self.assertEqual(expected, dict(cache._lru._items.iteritems())) self.assertEqual([u'f1', cache.STATE_FILE], sorted(fs.listdir(cache.cache_dir)))
def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks): """Expands a single input. It can result in multiple outputs. This function is recursive when relfile is a directory. Note: this code doesn't properly handle recursive symlink like one created with: ln -s .. foo """ if os.path.isabs(relfile): raise MappingError('Can\'t map absolute path %s' % relfile) infile = file_path.normpath(os.path.join(indir, relfile)) if not infile.startswith(indir): raise MappingError('Can\'t map file %s outside %s' % (infile, indir)) filepath = os.path.join(indir, relfile) native_filepath = file_path.get_native_path_case(filepath) if filepath != native_filepath: # Special case './'. if filepath != native_filepath + '.' + os.path.sep: # While it'd be nice to enforce path casing on Windows, it's impractical. # Also give up enforcing strict path case on OSX. Really, it's that sad. # The case where it happens is very specific and hard to reproduce: # get_native_path_case( # u'Foo.framework/Versions/A/Resources/Something.nib') will return # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'. # # Note that this is really something deep in OSX because running # ls Foo.framework/Versions/A # will print out 'Resources', while file_path.get_native_path_case() # returns a lower case 'r'. # # So *something* is happening under the hood resulting in the command 'ls' # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We # have no idea why. if sys.platform not in ('darwin', 'win32'): raise MappingError( 'File path doesn\'t equal native file path\n%s != %s' % (filepath, native_filepath)) symlinks = [] if follow_symlinks: try: relfile, symlinks = expand_symlinks(indir, relfile) except OSError: # The file doesn't exist, it will throw below. pass if relfile.endswith(os.path.sep): if not os.path.isdir(infile): raise MappingError( '%s is not a directory but ends with "%s"' % (infile, os.path.sep)) # Special case './'. if relfile.startswith('.' + os.path.sep): relfile = relfile[2:] outfiles = symlinks try: for filename in fs.listdir(infile): inner_relfile = os.path.join(relfile, filename) if blacklist and blacklist(inner_relfile): continue if os.path.isdir(os.path.join(indir, inner_relfile)): inner_relfile += os.path.sep outfiles.extend( expand_directory_and_symlink(indir, inner_relfile, blacklist, follow_symlinks)) return outfiles except OSError as e: raise MappingError( 'Unable to iterate over directory %s.\n%s' % (infile, e)) else: # Always add individual files even if they were blacklisted. if os.path.isdir(infile): raise MappingError( 'Input directory %s must have a trailing slash' % infile) if not os.path.isfile(infile): raise MappingError('Input file %s doesn\'t exist' % infile) return symlinks + [relfile]
def cleanup(self): """Cleans up the cache directory. Ensures there is no unknown files in cache_dir. Ensures the read-only bits are set correctly. At that point, the cache was already loaded, trimmed to respect cache policies. """ with self._lock: fs.chmod(self.cache_dir, 0o700) # Ensure that all files listed in the state still exist and add new ones. previous = set(self._lru) # It'd be faster if there were a readdir() function. for filename in fs.listdir(self.cache_dir): if filename == self.STATE_FILE: fs.chmod(os.path.join(self.cache_dir, filename), 0o600) continue if filename in previous: fs.chmod(os.path.join(self.cache_dir, filename), 0o400) previous.remove(filename) continue # An untracked file. Delete it. logging.warning('Removing unknown file %s from cache', filename) p = self._path(filename) if fs.isdir(p): try: file_path.rmtree(p) except OSError: pass else: file_path.try_remove(p) continue if previous: # Filter out entries that were not found. logging.warning('Removed %d lost files', len(previous)) for filename in previous: self._lru.pop(filename) self._save() # Verify hash of every single item to detect corruption. the corrupted # files will be evicted. with self._lock: for digest, (_, timestamp) in list(self._lru._items.items()): # verify only if the mtime is grather than the timestamp in state.json # to avoid take too long time. if self._get_mtime(digest) <= timestamp: continue logging.warning('Item has been modified. item: %s', digest) if self._is_valid_hash(digest): # Update timestamp in state.json self._lru.touch(digest) continue # remove corrupted file from LRU and file system self._lru.pop(digest) self._delete_file(digest, UNKNOWN_FILE_SIZE) logging.error('Deleted corrupted item: %s', digest) self._save()
def cleanup(self): """Removes unknown directories. Does not recalculate the cache size since it's surprisingly slow on some OSes. """ success = True with self._lock: try: actual = set(fs.listdir(self.cache_dir)) actual.discard(self.NAMED_DIR) actual.discard(self.STATE_FILE) expected = {v[0]: k for k, v in self._lru.iteritems()} # First, handle the actual cache content. # Remove missing entries. for missing in (set(expected) - actual): self._lru.pop(expected[missing]) # Remove unexpected items. for unexpected in (actual - set(expected)): try: p = os.path.join(self.cache_dir, unexpected) if fs.isdir(p) and not fs.islink(p): file_path.rmtree(p) else: fs.remove(p) except (IOError, OSError) as e: logging.error('Failed to remove %s: %s', unexpected, e) success = False # Second, fix named cache links. named = os.path.join(self.cache_dir, self.NAMED_DIR) if os.path.isdir(named): actual = set(fs.listdir(named)) expected = set(self._lru) # Confirm entries. Do not add missing ones for now. for name in expected.intersection(actual): p = os.path.join(self.cache_dir, self.NAMED_DIR, name) expected_link = os.path.join(self.cache_dir, self._lru[name][0]) if fs.islink(p): if sys.platform == 'win32': # TODO(maruel): Implement readlink() on Windows in fs.py, then # remove this condition. # https://crbug.com/853721 continue link = fs.readlink(p) if expected_link == link: continue logging.warning( 'Unexpected symlink for cache %s: %s, expected %s', name, link, expected_link) else: logging.warning( 'Unexpected non symlink for cache %s', name) if fs.isdir(p) and not fs.islink(p): file_path.rmtree(p) else: fs.remove(p) # Remove unexpected items. for unexpected in (actual - expected): try: p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected) if fs.isdir(p): file_path.rmtree(p) else: fs.remove(p) except (IOError, OSError) as e: logging.error('Failed to remove %s: %s', unexpected, e) success = False finally: self._save() return success
def test_policies_active_trimming(self): # Start with a larger cache, add many object. # Reload the cache with smaller policies, the cache should be trimmed on # load. h_a = self._algo('a').hexdigest() h_b = self._algo('b').hexdigest() h_c = self._algo('c').hexdigest() large = 'b' * 99 h_large = self._algo(large).hexdigest() def assertItems(expected): actual = [(digest, size) for digest, (size, _) in cache._lru._items.iteritems()] self.assertEqual(expected, actual) self._free_disk = 1101 cache = self.get_cache( _get_policies(max_cache_size=100, max_items=2, min_free_space=1000)) cache.write(h_a, 'a') cache.write(h_large, large) # Cache (size and # items) is not enforced while adding items. The # rationale is that a task may request more data than the size of the # cache policies. As long as there is free space, this is fine. cache.write(h_b, 'b') assertItems([(h_a, 1), (h_large, len(large)), (h_b, 1)]) self.assertEqual(h_a, cache._protected) self.assertEqual(1000, cache._free_disk) # Free disk is enforced, because otherwise we assume the task wouldn't # be able to start. In this case, it throws an exception since all items # are protected. The item is added since it's detected after the fact. with self.assertRaises(local_caching.NoMoreSpace): cache.write(h_c, 'c') self.assertEqual([1, 99], cache.trim()) # At this point, after the implicit trim in __exit__(), h_a and h_large were # evicted. self.assertEqual( sorted([unicode(h_b), unicode(h_c), cache.STATE_FILE]), sorted(fs.listdir(cache.cache_dir))) # Allow 3 items and 101 bytes so h_large is kept. cache = self.get_cache( _get_policies(max_cache_size=101, min_free_space=1000, max_items=3, max_age_secs=0)) cache.write(h_large, large) self.assertEqual(3, len(cache)) self.assertEqual(101, cache.total_size) self.assertEqual([], cache.trim()) self.assertEqual(sorted([h_b, h_c, h_large, cache.STATE_FILE]), sorted(fs.listdir(cache.cache_dir))) # Assert that trimming is done in constructor too. cache = self.get_cache( _get_policies(max_cache_size=100, min_free_space=1000, max_items=2, max_age_secs=0)) assertItems([(h_c, 1), (h_large, len(large))]) self.assertEqual(None, cache._protected) self.assertEqual(1202, cache._free_disk) self.assertEqual(2, len(cache)) self.assertEqual(100, cache.total_size) self.assertEqual([], cache.trim())