def test_prepare_cache_dir(self, tmpdir): # _prepare_cache_dir creates a cache dir normally cm = CacheManager(str(tmpdir.join("cache"))) new_dir = tmpdir.join("new_dir") cm.cache_dir = str(new_dir) cm._prepare_cache_dir() assert new_dir.isdir() is True
def test_get_bucket_path(self, tmpdir): # we can get a bucket path from a hash value cm = CacheManager(str(tmpdir.join("cache"))) tmpdir.join("src.txt").write("source1\n") hash_val = cm.get_hash(str(tmpdir / "src.txt")) assert cm._get_bucket_path(hash_val) == ( tmpdir / "cache" / "73" / "737b337e605199de28b3b64c674f9422")
def test_init_fails_loudly(self, tmpdir): # If we get a file as cache dir (instead of a directory), we # fail loudly... a_file = tmpdir.join("some_file.txt") a_file.write("this-is-not-a-dir") with pytest.raises(IOError): CacheManager(str(a_file))
def test_prepare_cache_dir_broken(self, tmpdir): # we fail loudly if we cannot create a cache dir cm = CacheManager(str(tmpdir)) tmpdir.join("not-a-dir.txt").write("foo") # broken dir cm.cache_dir = str(tmpdir / "not-a-dir.txt") with pytest.raises(IOError): cm._prepare_cache_dir()
def test_get_cached_file(self, cache_env): # we can get a file cached before. cm = CacheManager(str(cache_env / "cache")) cache_key = cm.register_doc(str(cache_env / "src1.txt"), str(cache_env / "result1.txt")) path = cm.get_cached_file(cache_key) assert path is not None assert open(path, 'r').read() == (cache_env / "result1.txt").read()
def test_get_cached_file_w_key_from_file(self, cache_env): # we can get a cached file, stored under a key, which is a file cm = CacheManager(str(cache_env / "cache")) cache_key = cm.register_doc(str(cache_env / "src1.txt"), str(cache_env / "result1.txt"), repr_key=StringIO('foo')) path = cm.get_cached_file(cache_key) assert path is not None assert open(path, 'r').read() == (cache_env / "result1.txt").read()
def test_get_cached_file_by_src(self, cache_env): # we can get a cached file by source file and options cm = CacheManager(str(cache_env / "cache")) # without a cache key my_id = cm.register_doc(str(cache_env / "src1.txt"), str(cache_env / "result1.txt")) path, key = cm.get_cached_file_by_source(str(cache_env / "src1.txt")) assert open(path, "r").read() == (cache_env / "result1.txt").read() assert key == '737b337e605199de28b3b64c674f9422_1_1' assert my_id == key
def test_markerhandling(self, tmpdir): # we can dissolve markers from cache_keys. cm = CacheManager(str(tmpdir)) marker_string = cm._compose_cache_key('somefakedhash', 3) assert marker_string == "somefakedhash_3" hash_val, bucket_marker = cm._dissolve_cache_key("somefakedhash_3") assert hash_val == "somefakedhash" assert bucket_marker == "3" assert cm._dissolve_cache_key("asd") == (None, None) assert cm._dissolve_cache_key(None) == (None, None)
def test_get_hash(self, cache_env, samples_dir): # we can compute a hash for a source file. cm = CacheManager(str(cache_env)) hash1 = cm.get_hash(str(cache_env / "src1.txt")) hash2 = cm.get_hash(str(cache_env / "src2.txt")) hash3 = cm.get_hash(str(samples_dir / "testdoc1.doc")) assert hash1 == '737b337e605199de28b3b64c674f9422' assert hash2 == 'd5aa51d7fb180729089d2de904f7dffe' assert hash3 == '443a07e0e92b7dc6b21f8be6a388f05f' with pytest.raises(TypeError): cm.get_hash()
def test_get_cached(self): # we can get cached docs cm = CacheManager(self.cachedir) fake_result_path = os.path.join(self.src_dir, 'result.txt') with open(fake_result_path, 'w') as fd: fd.write('The Result\n') key = cm.register_doc(self.src_path, fake_result_path, 'somekey') assert key == '2b87e29fca6ee7f1df6c1a76cb58e101_1_1' result_path = self.proxy.get_cached(key) assert result_path is not None assert result_path != fake_result_path assert filecmp.cmp(result_path, fake_result_path, shallow=False)
def convert_doc(src_doc, options, cache_dir): """Convert `src_doc` according to the other parameters. `src_doc` is the path to the source document. `options` is a dict of options for processing, passed to the processors. `cache_dir` may be ``None`` in which no caching is requested during processing. Generates a converted representation of `src_doc` by calling :class:`ulif.openoffice.processor.MetaProcessor` with `options` as parameters. Afterwards the conversion result is stored in cache (if allowed/possible) for speedup of upcoming requests. Returns a triple: ``(<PATH>, <CACHE_KEY>, <METADATA>)`` where ``<PATH>`` is the path to the resulting document, ``<CACHE_KEY>`` an identifier (string) to retrieve a generated doc from cache on future requests, and ``<METADATA>`` is a dict of values returned during request (and set by the document processors, notably setting the `error` keyword). If errors happen or caching is disabled, ``<CACHE_KEY>`` is ``None``. """ result_path = None cache_key = None repr_key = get_marker(options) # Create unique marker out of options metadata = dict(error=False) # Generate result input_copy_dir = tempfile.mkdtemp() input_copy = os.path.join(input_copy_dir, os.path.basename(src_doc)) shutil.copy2(src_doc, input_copy) try: proc = MetaProcessor(options=options) # Removes original doc result_path, metadata = proc.process(input_copy) except Exception as exc: shutil.rmtree(input_copy_dir) raise exc error_state = metadata.get('error', False) if cache_dir and not error_state and result_path is not None: # Cache away generated doc cache_key = CacheManager(cache_dir).register_doc( src_doc, result_path, repr_key) return result_path, cache_key, metadata
def test_register_doc(self, cache_env): # we can register docs cm = CacheManager(str(cache_env / "cache")) src1 = str(cache_env / "src1.txt") src2 = str(cache_env / "src2.txt") result1 = str(cache_env / "result1.txt") result2 = str(cache_env / "result2.txt") marker1 = cm.register_doc(src1, result1) assert marker1 == '737b337e605199de28b3b64c674f9422_1_1' marker2 = cm.register_doc(src1, result1) assert marker2 == '737b337e605199de28b3b64c674f9422_1_1' marker3 = cm.register_doc(src1, result2, repr_key="foo") assert marker3 == '737b337e605199de28b3b64c674f9422_1_2' marker4 = cm.register_doc(src2, result2, repr_key="foo") assert marker4 == 'd5aa51d7fb180729089d2de904f7dffe_1_1' marker5 = cm.register_doc(src2, result2, repr_key=StringIO("bar")) assert marker5 == 'd5aa51d7fb180729089d2de904f7dffe_1_2'
def test_get_cached_file_by_src_w_key(self, cache_env): cm = CacheManager(str(cache_env / "cache")) src = cache_env / "src1.txt" result1 = cache_env / "result1.txt" result2 = cache_env / "result2.txt" my_id1 = cm.register_doc(str(src), str(result1), 'mykey') path1, key1 = cm.get_cached_file_by_source(str(src), 'mykey') assert filecmp.cmp(path1, str(result1), shallow=False) assert key1 == '737b337e605199de28b3b64c674f9422_1_1' assert key1 == my_id1 # yet not existent cache file path2, key2 = cm.get_cached_file_by_source(str(src), 'otherkey') assert path2 is None assert key2 is None # store and retrieve 2nd cache file my_id3 = cm.register_doc(str(src), str(result2), 'otherkey') path3, key3 = cm.get_cached_file_by_source(str(src), 'otherkey') assert filecmp.cmp(path3, str(result2), shallow=False) assert key3 == '737b337e605199de28b3b64c674f9422_1_2' assert key3 == my_id3 return
def test_keys(self, cache_env): # we can get all cache keys cm = CacheManager(str(cache_env / "cache")) src1 = str(cache_env / "src1.txt") src2 = str(cache_env / "src2.txt") result1 = str(cache_env / "result1.txt") result2 = str(cache_env / "result2.txt") key1 = cm.register_doc(src1, result1, 'foo') assert list(cm.keys()) == ['737b337e605199de28b3b64c674f9422_1_1'] assert key1 == '737b337e605199de28b3b64c674f9422_1_1' key2 = cm.register_doc(src1, result2, 'bar') assert sorted(list(cm.keys())) == [ '737b337e605199de28b3b64c674f9422_1_1', '737b337e605199de28b3b64c674f9422_1_2', ] assert key2 == '737b337e605199de28b3b64c674f9422_1_2' key3 = cm.register_doc(src2, result1, 'baz') assert sorted(list(cm.keys())) == [ '737b337e605199de28b3b64c674f9422_1_1', '737b337e605199de28b3b64c674f9422_1_2', 'd5aa51d7fb180729089d2de904f7dffe_1_1', ] assert key3 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
def test_keys_custom_level(self, cache_env): # we can get all cache keys, even if a custom cache level is set # (and keys are stored in different location). cm = CacheManager(str(cache_env / "cache"), level=3) src1 = str(cache_env / "src1.txt") src2 = str(cache_env / "src2.txt") result1 = str(cache_env / "result1.txt") result2 = str(cache_env / "result2.txt") key1 = cm.register_doc(src1, result1, 'foo') assert list(cm.keys()) == ['737b337e605199de28b3b64c674f9422_1_1'] assert key1 == '737b337e605199de28b3b64c674f9422_1_1' key2 = cm.register_doc(src1, result2, 'bar') assert sorted(list(cm.keys())) == [ '737b337e605199de28b3b64c674f9422_1_1', '737b337e605199de28b3b64c674f9422_1_2', ] assert key2 == '737b337e605199de28b3b64c674f9422_1_2' key3 = cm.register_doc(src2, result1, 'baz') assert sorted(list(cm.keys())) == [ '737b337e605199de28b3b64c674f9422_1_1', '737b337e605199de28b3b64c674f9422_1_2', 'd5aa51d7fb180729089d2de904f7dffe_1_1', ] assert key3 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
def test_init(self, tmpdir): # we can initialize a cache manager with default depth cm = CacheManager(str(tmpdir.join("cache"))) assert cm.level == 1 assert cm.cache_dir == tmpdir.join("cache")
def test_get_cached_file_by_src_failed(self, cache_env): # uncached files result in `None` as result cm = CacheManager(str(cache_env)) result, key = cm.get_cached_file_by_source(str(cache_env / "src1.txt")) assert result is None assert key is None
def test_init_level(self, tmpdir): # we can set a level (depth) when creating cache managers cm = CacheManager(str(tmpdir.join("cache")), level=3) assert cm.level == 3
def test_get_cached_file_invalid_cache_key(self, tmpdir): # invalid/unused cache keys return `None` as cached file. cm = CacheManager(str(tmpdir)) assert cm.get_cached_file("not-existing") is None
def test_init_creates_dir(self, tmpdir): # a cache dir is created if neccessary cache_dir = tmpdir / "cache" assert cache_dir.exists() is False CacheManager(str(cache_dir)) assert cache_dir.isdir() is True
def test_compose_marker(self, tmpdir): # we can compose cache keys cm = CacheManager(str(tmpdir)) marker2 = cm._compose_cache_key('some_hash_digest', 'bucket_marker') assert marker2 == 'some_hash_digest_bucket_marker'
def test_prepare_cache_dir_none(self, tmpdir): # we can create a cache manager without any cache dir cm = CacheManager(str(tmpdir)) cm.cache_dir = None cm._prepare_cache_dir() assert cm.cache_dir is None
def __init__(self, cache_dir=None): self.cache_dir = cache_dir self.cache_manager = None if self.cache_dir is not None: self.cache_manager = CacheManager(self.cache_dir)
def test_get_cached_file_empty(self, cache_env): # while cache is empty we get `None` when asking for cached files. cm = CacheManager(str(cache_env / "cache")) path = cm.get_cached_file(str(cache_env / "src1.txt")) assert path is None