def test_BadRead(self): # Check that reading from a non-existant key, fails. with working_directory.TemporaryWorkingDirectory() as work_dir: mem_storage = fake_storage.FakeStorage() storage = local_storage_cache.LocalStorageCache( cache_path=os.path.join(work_dir, 'db'), storage=mem_storage) self.assertEquals(None, storage.GetData('foo'))
def test_UseCachedResultsFalse(self): # Check that the use_cached_results=False does indeed cause computations # to be redone, even when present in the cache. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('UseCachedResultsFalse', work_dir) self._tally = 0 def check_call(cmd, **kwargs): subprocess.check_call(cmd, **kwargs) self._tally += 1 o = once.Once(storage=fake_storage.FakeStorage(), use_cached_results=False, check_call=check_call) o.Run('test', self._input_dirs, self._output_dirs[0], [ command.Copy('%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) o.Run('test', self._input_dirs, self._output_dirs[1], [ command.Copy('%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) self.assertEquals(2, self._tally) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0])) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[1]))
def test_HitsCacheSecondTime(self): # Test that the computation is not performed on a second instance. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('HitsCacheSecondTime', work_dir) self._tally = 0 def check_call(cmd, **kwargs): self._tally += 1 subprocess.check_call(cmd, **kwargs) self._url = None def stash_url(urls): self._url = urls o = once.Once(storage=fake_storage.FakeStorage(), check_call=check_call, print_url=stash_url) o.Run('test', self._input_dirs, self._output_dirs[0], [ command.Copy('%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) initial_url = self._url self._url = None o.Run('test', self._input_dirs, self._output_dirs[1], [ command.Copy('%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0])) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[1])) self.assertEquals(1, self._tally) self.assertEquals(initial_url, self._url)
def test_HitsCacheSecondTime(self): # Test that the computation is not performed on a second instance. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('HitsCacheSecondTime', work_dir) self._tally = 0 def Copy(subst, src, dst): self._tally += 1 shutil.copyfile(subst.SubstituteAbsPaths(src), subst.SubstituteAbsPaths(dst)) self._url = None def stash_url(urls): self._url = urls o = once.Once(storage=fake_storage.FakeStorage(), print_url=stash_url, system_summary='test') o.Run('test', self._input_dirs, self._output_dirs[0], [command.Runnable(Copy, '%(input0)s/in0', '%(output)s/out')]) initial_url = self._url self._url = None o.Run('test', self._input_dirs, self._output_dirs[1], [command.Runnable(Copy, '%(input0)s/in0', '%(output)s/out')]) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0])) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[1])) self.assertEquals(1, self._tally) self.assertEquals(initial_url, self._url)
def test_HitWrappedStorage(self): # Check that if something isn't locally cached primary storage is hit. with working_directory.TemporaryWorkingDirectory() as work_dir: mem_storage = fake_storage.FakeStorage() storage = local_storage_cache.LocalStorageCache( cache_path=os.path.join(work_dir, 'db'), storage=mem_storage) mem_storage.PutData('hello', 'foo') self.assertEquals('hello', storage.GetData('foo'))
def test_WriteRead(self): # Check that things written with PutData can be read back. with working_directory.TemporaryWorkingDirectory() as work_dir: mem_storage = fake_storage.FakeStorage() storage = local_storage_cache.LocalStorageCache( cache_path=os.path.join(work_dir, 'db'), storage=mem_storage) storage.PutData('bar', 'foo') self.CanBeReadBothWays(storage, 'foo', os.path.join(work_dir, 'out'), 'bar')
def test_AcceptSlashesAndDots(self): # Check that keys with slashes and dots are okay. with working_directory.TemporaryWorkingDirectory() as work_dir: mem_storage = fake_storage.FakeStorage() storage = local_storage_cache.LocalStorageCache( cache_path=os.path.join(work_dir, 'db'), storage=mem_storage) storage.PutData('hello', 'this/is/a/cool_test.txt') self.assertEquals('hello', storage.GetData('this/is/a/cool_test.txt'))
def test_HitLocalFirst(self): # Check that reading hits local storage first. with working_directory.TemporaryWorkingDirectory() as work_dir: mem_storage = fake_storage.FakeStorage() storage = local_storage_cache.LocalStorageCache( cache_path=os.path.join(work_dir, 'db'), storage=mem_storage) storage.PutData('there', 'foo') mem_storage.PutData('hello', 'foo') self.assertEquals('there', storage.GetData('foo'))
def test_InputUntouched(self): # Check that PutDirectory doesn't alter its inputs. with working_directory.TemporaryWorkingDirectory() as work_dir: temp1 = os.path.join(work_dir, 'temp1') hashing_tools_test.GenerateTestTree('input_untouched', temp1) h1 = hashing_tools.StableHashPath(temp1) self._dir_storage.PutDirectory(temp1, 'hello') h2 = hashing_tools.StableHashPath(temp1) self.assertEqual(h1, h2)
def test_Exists(self): # Checks that exists works properly. with working_directory.TemporaryWorkingDirectory() as work_dir: mem_storage = fake_storage.FakeStorage() storage = local_storage_cache.LocalStorageCache( cache_path=os.path.join(work_dir, 'db'), storage=mem_storage) storage.PutData('bar', 'foo') self.assertTrue(storage.Exists('foo')) self.assertFalse(storage.Exists('bad_foo'))
def test_FirstTime(self): # Test that the computation is always performed if the cache is empty. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('FirstTime', work_dir) o = once.Once(storage=fake_storage.FakeStorage(), system_summary='test') o.Run('test', self._input_dirs, self._output_dirs[0], [command.Copy('%(input0)s/in0', '%(output)s/out')]) self.assertEquals('FirstTimedata0', file_tools.ReadFile(self._output_files[0]))
def test_URLsPropagate(self): # Check that consistent non-None URLs come from get and put. with working_directory.TemporaryWorkingDirectory() as work_dir: temp1 = os.path.join(work_dir, 'temp1') temp2 = os.path.join(work_dir, 'temp2') hashing_tools_test.GenerateTestTree('url_propagate', temp1) url1 = self._dir_storage.PutDirectory(temp1, 'me') url2 = self._dir_storage.GetDirectory('me', temp2) self.assertEqual(url1, url2) self.assertNotEqual(None, url1)
def test_WriteRead(self): # Check that a directory can be written and then read back. with working_directory.TemporaryWorkingDirectory() as work_dir: temp1 = os.path.join(work_dir, 'temp1') temp2 = os.path.join(work_dir, 'temp2') hashing_tools_test.GenerateTestTree('write_read', temp1) self._dir_storage.PutDirectory(temp1, 'foo') self._dir_storage.GetDirectory('foo', temp2) self.assertEqual(hashing_tools.StableHashPath(temp1), hashing_tools.StableHashPath(temp2))
def test_Directory(self): # Check that a directory tree works. with working_directory.TemporaryWorkingDirectory() as work_dir: a1 = os.path.join(work_dir, 'a1') a2 = os.path.join(work_dir, 'a2') for path in [a1, a2]: GenerateTestTree('gorp', path) h1 = hashing_tools.StableHashPath(a1) h2 = hashing_tools.StableHashPath(a2) self.assertEqual(h1, h2) self.assertEqual(40, len(h1))
def test_Mkdir(self): # Test the Mkdir convenience wrapper works. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('Mkdir', work_dir) foo = os.path.join(work_dir, 'foo') o = once.Once(storage=fake_storage.FakeStorage(), cache_results=False, system_summary='test') o.Run('test', self._input_dirs, foo, [command.Mkdir('%(output)s/hi')]) self.assertTrue(os.path.isdir(os.path.join(foo, 'hi')))
def test_File(self): # Check that one file works. with working_directory.TemporaryWorkingDirectory() as work_dir: filename1 = os.path.join(work_dir, 'myfile1') filename2 = os.path.join(work_dir, 'myfile2') file_tools.WriteFile('booga', filename1) file_tools.WriteFile('booga', filename2) h1 = hashing_tools.StableHashPath(filename1) h2 = hashing_tools.StableHashPath(filename2) self.assertEqual(h1, h2) self.assertEqual(40, len(h1))
def test_InvalidKey(self): # Check that an invalid key asserts. with working_directory.TemporaryWorkingDirectory() as work_dir: mem_storage = fake_storage.FakeStorage() storage = local_storage_cache.LocalStorageCache( cache_path=os.path.join(work_dir, 'db'), storage=mem_storage) bar = os.path.join(work_dir, 'bar_file') file_tools.WriteFile('bar', bar) self.assertRaises(KeyError, storage.PutData, 'bar', 'foo$') self.assertRaises(KeyError, storage.GetData, 'foo^') self.assertRaises(KeyError, storage.PutFile, bar, 'foo#') self.assertRaises(KeyError, storage.GetFile, 'foo!', 'bar')
def test_WriteOnlyToLocal(self): # Check that things written hit local storage, not the network. with working_directory.TemporaryWorkingDirectory() as work_dir: mem_storage = fake_storage.FakeStorage() storage = local_storage_cache.LocalStorageCache( cache_path=os.path.join(work_dir, 'db'), storage=mem_storage) storage.PutData('bar', 'foo') self.assertEquals(None, mem_storage.GetData('foo')) bar = os.path.join(work_dir, 'bar_file') file_tools.WriteFile('bar', bar) storage.PutFile(bar, 'foo') self.assertEquals(None, mem_storage.GetData('foo'))
def test_NumCores(self): # Test that the core count is substituted. Since we don't know how many # cores the test machine will have, just check that it's an integer. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('NumCores', work_dir) o = once.Once(storage=fake_storage.FakeStorage(), system_summary='test') def CheckCores(subst): self.assertNotEquals(0, int(subst.Substitute('%(cores)s'))) o.Run('test', {}, self._output_dirs[0], [command.Runnable(CheckCores)])
def test_CacheResultsFalse(self): # Check that setting cache_results=False prevents results from being written # to the cache. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('CacheResultsFalse', work_dir) storage = fake_storage.FakeStorage() o = once.Once(storage=storage, cache_results=False) o.Run('test', self._input_dirs, self._output_dirs[0], [ command.Copy('%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) self.assertEquals(0, storage.ItemCount()) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0]))
def test_BadWrite(self): def call(cmd): return 1 storage = gsd_storage.GSDStorage(gsutil=['mygsutil'], write_bucket='mybucket', read_buckets=[], call=call) dir_storage = directory_storage.DirectoryStorageAdapter(storage) # Check that storage exceptions come thru on failure. with working_directory.TemporaryWorkingDirectory() as work_dir: temp1 = os.path.join(work_dir, 'temp1') hashing_tools_test.GenerateTestTree('bad_write', temp1) self.assertRaises(gsd_storage.GSDStorageError, dir_storage.PutDirectory, temp1, 'bad')
def test_Command(self): # Test a plain command. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('Command', work_dir) o = once.Once(storage=fake_storage.FakeStorage(), system_summary='test') o.Run('test', self._input_dirs, self._output_dirs[0], [ command.Command([ sys.executable, '-c', 'import sys; open(sys.argv[1], "wb").write("hello")', '%(output)s/out' ]) ]) self.assertEquals('hello', file_tools.ReadFile(self._output_files[0]))
def test_FailsWhenWritingFails(self): # Check that once doesn't eat the storage layer failures for writes. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('FailsWhenWritingFails', work_dir) def call(cmd, **kwargs): # Cause gsutil commands to fail. return 1 bad_storage = gsd_storage.GSDStorage(gsutil=['mygsutil'], write_bucket='mybucket', read_buckets=[], call=call) o = once.Once(storage=bad_storage, system_summary='test') self.assertRaises( gsd_storage.GSDStorageError, o.Run, 'test', self._input_dirs, self._output_dirs[0], [command.Copy('%(input0)s/in0', '%(output)s/out')])
def test_RecomputeHashMatches(self): # Test that things don't get stored to the output cache if they exist # already. with working_directory.TemporaryWorkingDirectory() as work_dir: # Setup test data in input0, input1 using memory storage. self.GenerateTestData('RecomputeHashMatches', work_dir) fs = fake_storage.FakeStorage() ds = directory_storage.DirectoryStorageAdapter(storage=fs) o = once.Once(storage=fs) # Run the computation (compute the length of a file) from input0 to # output0. o.Run('test', self._input_dirs, self._output_dirs[0], [ self.FileLength( '%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) # Check that 2 writes have occurred. One to write a mapping from in->out, # and one for the output data. self.assertEquals(2, fs.WriteCount()) # Run the computation again from input1 to output1. # (These should have the same length.) o.Run('test', self._input_dirs, self._output_dirs[1], [ self.FileLength( '%(input1)s/in1', '%(output)s/out', cwd=work_dir) ]) # Write count goes up by one as an in->out hash is added, # but no new output is stored (as it is the same). self.assertEquals(3, fs.WriteCount()) # Check that the test is still valid: # - in0 and in1 have equal length. # - out0 and out1 have that length in them. # - out0 and out1 agree. self.assertEquals( str(len(file_tools.ReadFile(self._input_files[0]))), file_tools.ReadFile(self._output_files[0])) self.assertEquals( str(len(file_tools.ReadFile(self._input_files[1]))), file_tools.ReadFile(self._output_files[1])) self.assertEquals(file_tools.ReadFile(self._output_files[0]), file_tools.ReadFile(self._output_files[1]))
def WriteResultToCache(self, package, build_signature, output): """Cache a computed result by key. Also prints URLs when appropriate. Args: package: Package name (for tgz name). build_signature: The input hash of the computation. output: A path containing the output of the computation. """ if not self._cache_results: return out_hash = hashing_tools.StableHashPath(output) try: output_key = self.KeyForOutput(package, out_hash) # Try to get an existing copy in a temporary directory. wd = working_directory.TemporaryWorkingDirectory() with wd as work_dir: temp_output = os.path.join(work_dir, 'out') url = self._directory_storage.GetDirectory( output_key, temp_output) if url is None: # Isn't present. Cache the computed result instead. url = self._directory_storage.PutDirectory( output, output_key) logging.info('Computed fresh result and cached it.') else: # Cached version is present. Replace the current output with that. file_tools.RemoveDirectoryIfPresent(output) shutil.move(temp_output, output) logging.info('Recomputed result matches cached value, ' 'using cached value instead.') # Upload an entry mapping from computation input to output hash. self._storage.PutData(out_hash, self.KeyForBuildSignature(build_signature)) self.PrintDownloadURL(url) except gsd_storage.GSDStorageError: logging.info('Failed to cache result.') raise
def test_UnpackCommands(self): # Test that unpack commnds get run first and hashed_inputs get # used when present. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('UnpackCommands', work_dir) self._tally = 0 def check_call(cmd, **kwargs): self._tally += 1 subprocess.check_call(cmd, **kwargs) o = once.Once(storage=fake_storage.FakeStorage(), check_call=check_call) alt_inputs = {'input0': os.path.join(work_dir, 'alt_input')} unpack_commands = [ command.Copy('%(input0)s/in0', alt_inputs['input0']) ] commands = [ command.Copy('%(input0)s', '%(output)s/out', cwd=work_dir) ] o.Run('test', self._input_dirs, self._output_dirs[0], commands=commands, unpack_commands=unpack_commands, hashed_inputs=alt_inputs) o.Run('test', self._input_dirs, self._output_dirs[1], commands=commands, unpack_commands=unpack_commands, hashed_inputs=alt_inputs) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0])) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[1])) self.assertEquals(3, self._tally)
def test_UseCachedResultsFalse(self): # Check that the use_cached_results=False does indeed cause computations # to be redone, even when present in the cache. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('UseCachedResultsFalse', work_dir) self._tally = 0 def Copy(subst, src, dst): self._tally += 1 shutil.copyfile(subst.SubstituteAbsPaths(src), subst.SubstituteAbsPaths(dst)) o = once.Once(storage=fake_storage.FakeStorage(), use_cached_results=False, system_summary='test') o.Run('test', self._input_dirs, self._output_dirs[0], [command.Runnable(Copy, '%(input0)s/in0', '%(output)s/out')]) o.Run('test', self._input_dirs, self._output_dirs[1], [command.Runnable(Copy, '%(input0)s/in0', '%(output)s/out')]) self.assertEquals(2, self._tally) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0])) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[1]))
def Run(self, package, inputs, output, commands, working_dir=None, memoize=True, signature_file=None, subdir=None): """Run an operation once, possibly hitting cache. Args: package: Name of the computation/module. inputs: A dict of names mapped to files that are inputs. output: An output directory. commands: A list of command.Command objects to run. working_dir: Working directory to use, or None for a temp dir. memoize: Boolean indicating the the result should be memoized. signature_file: File to write human readable build signatures to or None. subdir: If not None, use this directory instead of the output dir as the substituter's output path. Must be a subdirectory of output. """ if working_dir is None: wdm = working_directory.TemporaryWorkingDirectory() else: wdm = working_directory.FixedWorkingDirectory(working_dir) file_tools.MakeDirectoryIfAbsent(output) nonpath_subst = {'package': package} with wdm as work_dir: # Compute the build signature with modified inputs. build_signature = self.BuildSignature(package, inputs=inputs, commands=commands) # Optionally write human readable version of signature. if signature_file: signature_file.write( self.BuildSignature(package, inputs=inputs, commands=commands, hasher=HumanReadableSignature())) signature_file.flush() # We're done if it's in the cache. if (memoize and self.ReadMemoizedResultFromCache( package, build_signature, output)): return if subdir: assert subdir.startswith(output) for command in commands: paths = inputs.copy() paths['output'] = subdir if subdir else output nonpath_subst['build_signature'] = build_signature subst = substituter.Substituter(work_dir, paths, nonpath_subst) command.Invoke(subst) if memoize: self.WriteResultToCache(package, build_signature, output)
def Run(self, package, inputs, output, commands, unpack_commands=None, hashed_inputs=None, working_dir=None): """Run an operation once, possibly hitting cache. Args: package: Name of the computation/module. inputs: A dict of names mapped to files that are inputs. output: An output directory. commands: A list of command.Command objects to run. unpack_commands: A list of command.Command object to run before computing the build hash. Or None. hashed_inputs: An alternate dict of inputs to use for hashing and after the packing stage (or None). working_dir: Working directory to use, or None for a temp dir. """ if working_dir is None: wdm = working_directory.TemporaryWorkingDirectory() else: wdm = working_directory.FixedWorkingDirectory(working_dir) # Cleanup destination. file_tools.RemoveDirectoryIfPresent(output) os.mkdir(output) with wdm as work_dir: # Optionally unpack before hashing. if unpack_commands is not None: for command in unpack_commands: command.Invoke(check_call=self._check_call, package=package, cwd=work_dir, inputs=inputs, output=output) # Use an alternate input set from here on. if hashed_inputs is not None: inputs = hashed_inputs # Compute the build signature with modified inputs. build_signature = self.BuildSignature(package, inputs=inputs, commands=commands) # We're done if it's in the cache. if self.ReadMemoizedResultFromCache(package, build_signature, output): return for command in commands: command.Invoke(check_call=self._check_call, package=package, cwd=work_dir, inputs=inputs, output=output, build_signature=build_signature) self.WriteResultToCache(package, build_signature, output)
def test_BadRead(self): # Check that storage exceptions come thru on failure. with working_directory.TemporaryWorkingDirectory() as work_dir: temp1 = os.path.join(work_dir, 'temp1') self.assertEqual(None, self._dir_storage.GetDirectory('foo', temp1))