예제 #1
0
 def test_prepare_cache_dir(self, tmpdir):
     # _prepare_cache_dir creates a cache dir normally
     cm = CacheManager(str(tmpdir.join("cache")))
     new_dir = tmpdir.join("new_dir")
     cm.cache_dir = str(new_dir)
     cm._prepare_cache_dir()
     assert new_dir.isdir() is True
예제 #2
0
 def test_get_bucket_path(self, tmpdir):
     # we can get a bucket path from a hash value
     cm = CacheManager(str(tmpdir.join("cache")))
     tmpdir.join("src.txt").write("source1\n")
     hash_val = cm.get_hash(str(tmpdir / "src.txt"))
     assert cm._get_bucket_path(hash_val) == (
         tmpdir / "cache" / "73" / "737b337e605199de28b3b64c674f9422")
예제 #3
0
 def test_init_fails_loudly(self, tmpdir):
     # If we get a file as cache dir (instead of a directory), we
     # fail loudly...
     a_file = tmpdir.join("some_file.txt")
     a_file.write("this-is-not-a-dir")
     with pytest.raises(IOError):
         CacheManager(str(a_file))
예제 #4
0
 def test_prepare_cache_dir_broken(self, tmpdir):
     # we fail loudly if we cannot create a cache dir
     cm = CacheManager(str(tmpdir))
     tmpdir.join("not-a-dir.txt").write("foo")  # broken dir
     cm.cache_dir = str(tmpdir / "not-a-dir.txt")
     with pytest.raises(IOError):
         cm._prepare_cache_dir()
예제 #5
0
 def test_get_cached_file(self, cache_env):
     # we can get a file cached before.
     cm = CacheManager(str(cache_env / "cache"))
     cache_key = cm.register_doc(str(cache_env / "src1.txt"),
                                 str(cache_env / "result1.txt"))
     path = cm.get_cached_file(cache_key)
     assert path is not None
     assert open(path, 'r').read() == (cache_env / "result1.txt").read()
예제 #6
0
 def test_get_cached_file_w_key_from_file(self, cache_env):
     # we can get a cached file, stored under a key, which is a file
     cm = CacheManager(str(cache_env / "cache"))
     cache_key = cm.register_doc(str(cache_env / "src1.txt"),
                                 str(cache_env / "result1.txt"),
                                 repr_key=StringIO('foo'))
     path = cm.get_cached_file(cache_key)
     assert path is not None
     assert open(path, 'r').read() == (cache_env / "result1.txt").read()
예제 #7
0
 def test_get_cached_file_by_src(self, cache_env):
     # we can get a cached file by source file and options
     cm = CacheManager(str(cache_env / "cache"))
     # without a cache key
     my_id = cm.register_doc(str(cache_env / "src1.txt"),
                             str(cache_env / "result1.txt"))
     path, key = cm.get_cached_file_by_source(str(cache_env / "src1.txt"))
     assert open(path, "r").read() == (cache_env / "result1.txt").read()
     assert key == '737b337e605199de28b3b64c674f9422_1_1'
     assert my_id == key
예제 #8
0
 def test_markerhandling(self, tmpdir):
     # we can dissolve markers from cache_keys.
     cm = CacheManager(str(tmpdir))
     marker_string = cm._compose_cache_key('somefakedhash', 3)
     assert marker_string == "somefakedhash_3"
     hash_val, bucket_marker = cm._dissolve_cache_key("somefakedhash_3")
     assert hash_val == "somefakedhash"
     assert bucket_marker == "3"
     assert cm._dissolve_cache_key("asd") == (None, None)
     assert cm._dissolve_cache_key(None) == (None, None)
예제 #9
0
 def test_get_hash(self, cache_env, samples_dir):
     # we can compute a hash for a source file.
     cm = CacheManager(str(cache_env))
     hash1 = cm.get_hash(str(cache_env / "src1.txt"))
     hash2 = cm.get_hash(str(cache_env / "src2.txt"))
     hash3 = cm.get_hash(str(samples_dir / "testdoc1.doc"))
     assert hash1 == '737b337e605199de28b3b64c674f9422'
     assert hash2 == 'd5aa51d7fb180729089d2de904f7dffe'
     assert hash3 == '443a07e0e92b7dc6b21f8be6a388f05f'
     with pytest.raises(TypeError):
         cm.get_hash()
예제 #10
0
 def test_get_cached(self):
     # we can get cached docs
     cm = CacheManager(self.cachedir)
     fake_result_path = os.path.join(self.src_dir, 'result.txt')
     with open(fake_result_path, 'w') as fd:
         fd.write('The Result\n')
     key = cm.register_doc(self.src_path, fake_result_path, 'somekey')
     assert key == '2b87e29fca6ee7f1df6c1a76cb58e101_1_1'
     result_path = self.proxy.get_cached(key)
     assert result_path is not None
     assert result_path != fake_result_path
     assert filecmp.cmp(result_path, fake_result_path, shallow=False)
예제 #11
0
def convert_doc(src_doc, options, cache_dir):
    """Convert `src_doc` according to the other parameters.

    `src_doc` is the path to the source document. `options` is a dict
    of options for processing, passed to the processors.

    `cache_dir` may be ``None`` in which no caching is requested
    during processing.

    Generates a converted representation of `src_doc` by calling
    :class:`ulif.openoffice.processor.MetaProcessor` with `options` as
    parameters.

    Afterwards the conversion result is stored in cache (if
    allowed/possible) for speedup of upcoming requests.

    Returns a triple:

      ``(<PATH>, <CACHE_KEY>, <METADATA>)``

    where ``<PATH>`` is the path to the resulting document,
    ``<CACHE_KEY>`` an identifier (string) to retrieve a generated doc
    from cache on future requests, and ``<METADATA>`` is a dict of values
    returned during request (and set by the document processors,
    notably setting the `error` keyword).

    If errors happen or caching is disabled, ``<CACHE_KEY>`` is
    ``None``.
    """
    result_path = None
    cache_key = None
    repr_key = get_marker(options)  # Create unique marker out of options
    metadata = dict(error=False)

    # Generate result
    input_copy_dir = tempfile.mkdtemp()
    input_copy = os.path.join(input_copy_dir, os.path.basename(src_doc))
    shutil.copy2(src_doc, input_copy)
    try:
        proc = MetaProcessor(options=options)  # Removes original doc
        result_path, metadata = proc.process(input_copy)
    except Exception as exc:
        shutil.rmtree(input_copy_dir)
        raise exc

    error_state = metadata.get('error', False)
    if cache_dir and not error_state and result_path is not None:
        # Cache away generated doc
        cache_key = CacheManager(cache_dir).register_doc(
            src_doc, result_path, repr_key)
    return result_path, cache_key, metadata
예제 #12
0
 def test_register_doc(self, cache_env):
     # we can register docs
     cm = CacheManager(str(cache_env / "cache"))
     src1 = str(cache_env / "src1.txt")
     src2 = str(cache_env / "src2.txt")
     result1 = str(cache_env / "result1.txt")
     result2 = str(cache_env / "result2.txt")
     marker1 = cm.register_doc(src1, result1)
     assert marker1 == '737b337e605199de28b3b64c674f9422_1_1'
     marker2 = cm.register_doc(src1, result1)
     assert marker2 == '737b337e605199de28b3b64c674f9422_1_1'
     marker3 = cm.register_doc(src1, result2, repr_key="foo")
     assert marker3 == '737b337e605199de28b3b64c674f9422_1_2'
     marker4 = cm.register_doc(src2, result2, repr_key="foo")
     assert marker4 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
     marker5 = cm.register_doc(src2, result2, repr_key=StringIO("bar"))
     assert marker5 == 'd5aa51d7fb180729089d2de904f7dffe_1_2'
예제 #13
0
 def test_get_cached_file_by_src_w_key(self, cache_env):
     cm = CacheManager(str(cache_env / "cache"))
     src = cache_env / "src1.txt"
     result1 = cache_env / "result1.txt"
     result2 = cache_env / "result2.txt"
     my_id1 = cm.register_doc(str(src), str(result1), 'mykey')
     path1, key1 = cm.get_cached_file_by_source(str(src), 'mykey')
     assert filecmp.cmp(path1, str(result1), shallow=False)
     assert key1 == '737b337e605199de28b3b64c674f9422_1_1'
     assert key1 == my_id1
     # yet not existent cache file
     path2, key2 = cm.get_cached_file_by_source(str(src), 'otherkey')
     assert path2 is None
     assert key2 is None
     # store and retrieve 2nd cache file
     my_id3 = cm.register_doc(str(src), str(result2), 'otherkey')
     path3, key3 = cm.get_cached_file_by_source(str(src), 'otherkey')
     assert filecmp.cmp(path3, str(result2), shallow=False)
     assert key3 == '737b337e605199de28b3b64c674f9422_1_2'
     assert key3 == my_id3
     return
예제 #14
0
 def test_keys(self, cache_env):
     # we can get all cache keys
     cm = CacheManager(str(cache_env / "cache"))
     src1 = str(cache_env / "src1.txt")
     src2 = str(cache_env / "src2.txt")
     result1 = str(cache_env / "result1.txt")
     result2 = str(cache_env / "result2.txt")
     key1 = cm.register_doc(src1, result1, 'foo')
     assert list(cm.keys()) == ['737b337e605199de28b3b64c674f9422_1_1']
     assert key1 == '737b337e605199de28b3b64c674f9422_1_1'
     key2 = cm.register_doc(src1, result2, 'bar')
     assert sorted(list(cm.keys())) == [
         '737b337e605199de28b3b64c674f9422_1_1',
         '737b337e605199de28b3b64c674f9422_1_2',
     ]
     assert key2 == '737b337e605199de28b3b64c674f9422_1_2'
     key3 = cm.register_doc(src2, result1, 'baz')
     assert sorted(list(cm.keys())) == [
         '737b337e605199de28b3b64c674f9422_1_1',
         '737b337e605199de28b3b64c674f9422_1_2',
         'd5aa51d7fb180729089d2de904f7dffe_1_1',
     ]
     assert key3 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
예제 #15
0
 def test_keys_custom_level(self, cache_env):
     # we can get all cache keys, even if a custom cache level is set
     # (and keys are stored in different location).
     cm = CacheManager(str(cache_env / "cache"), level=3)
     src1 = str(cache_env / "src1.txt")
     src2 = str(cache_env / "src2.txt")
     result1 = str(cache_env / "result1.txt")
     result2 = str(cache_env / "result2.txt")
     key1 = cm.register_doc(src1, result1, 'foo')
     assert list(cm.keys()) == ['737b337e605199de28b3b64c674f9422_1_1']
     assert key1 == '737b337e605199de28b3b64c674f9422_1_1'
     key2 = cm.register_doc(src1, result2, 'bar')
     assert sorted(list(cm.keys())) == [
         '737b337e605199de28b3b64c674f9422_1_1',
         '737b337e605199de28b3b64c674f9422_1_2',
     ]
     assert key2 == '737b337e605199de28b3b64c674f9422_1_2'
     key3 = cm.register_doc(src2, result1, 'baz')
     assert sorted(list(cm.keys())) == [
         '737b337e605199de28b3b64c674f9422_1_1',
         '737b337e605199de28b3b64c674f9422_1_2',
         'd5aa51d7fb180729089d2de904f7dffe_1_1',
     ]
     assert key3 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
예제 #16
0
 def test_init(self, tmpdir):
     # we can initialize a cache manager with default depth
     cm = CacheManager(str(tmpdir.join("cache")))
     assert cm.level == 1
     assert cm.cache_dir == tmpdir.join("cache")
예제 #17
0
 def test_get_cached_file_by_src_failed(self, cache_env):
     # uncached files result in `None` as result
     cm = CacheManager(str(cache_env))
     result, key = cm.get_cached_file_by_source(str(cache_env / "src1.txt"))
     assert result is None
     assert key is None
예제 #18
0
 def test_init_level(self, tmpdir):
     # we can set a level (depth) when creating cache managers
     cm = CacheManager(str(tmpdir.join("cache")), level=3)
     assert cm.level == 3
예제 #19
0
 def test_get_cached_file_invalid_cache_key(self, tmpdir):
     # invalid/unused cache keys return `None` as cached file.
     cm = CacheManager(str(tmpdir))
     assert cm.get_cached_file("not-existing") is None
예제 #20
0
 def test_init_creates_dir(self, tmpdir):
     # a cache dir is created if neccessary
     cache_dir = tmpdir / "cache"
     assert cache_dir.exists() is False
     CacheManager(str(cache_dir))
     assert cache_dir.isdir() is True
예제 #21
0
 def test_compose_marker(self, tmpdir):
     # we can compose cache keys
     cm = CacheManager(str(tmpdir))
     marker2 = cm._compose_cache_key('some_hash_digest', 'bucket_marker')
     assert marker2 == 'some_hash_digest_bucket_marker'
예제 #22
0
 def test_prepare_cache_dir_none(self, tmpdir):
     # we can create a cache manager without any cache dir
     cm = CacheManager(str(tmpdir))
     cm.cache_dir = None
     cm._prepare_cache_dir()
     assert cm.cache_dir is None
예제 #23
0
 def __init__(self, cache_dir=None):
     self.cache_dir = cache_dir
     self.cache_manager = None
     if self.cache_dir is not None:
         self.cache_manager = CacheManager(self.cache_dir)
예제 #24
0
 def test_get_cached_file_empty(self, cache_env):
     # while cache is empty we get `None` when asking for cached files.
     cm = CacheManager(str(cache_env / "cache"))
     path = cm.get_cached_file(str(cache_env / "src1.txt"))
     assert path is None