Пример #1
0
 def test_prepare_cache_dir(self, tmpdir):
     # _prepare_cache_dir creates a cache dir normally
     cm = CacheManager(str(tmpdir.join("cache")))
     new_dir = tmpdir.join("new_dir")
     cm.cache_dir = str(new_dir)
     cm._prepare_cache_dir()
     assert new_dir.isdir() is True
Пример #2
0
 def test_prepare_cache_dir(self, tmpdir):
     # _prepare_cache_dir creates a cache dir normally
     cm = CacheManager(str(tmpdir.join("cache")))
     new_dir = tmpdir.join("new_dir")
     cm.cache_dir = str(new_dir)
     cm._prepare_cache_dir()
     assert new_dir.isdir() is True
Пример #3
0
 def test_get_bucket_path(self, tmpdir):
     # we can get a bucket path from a hash value
     cm = CacheManager(str(tmpdir.join("cache")))
     tmpdir.join("src.txt").write("source1\n")
     hash_val = cm.get_hash(str(tmpdir / "src.txt"))
     assert cm._get_bucket_path(hash_val) == (
         tmpdir / "cache" / "73" / "737b337e605199de28b3b64c674f9422")
Пример #4
0
 def test_get_cached_file_by_src_failed(self, cache_env):
     # uncached files result in `None` as result
     cm = CacheManager(str(cache_env))
     result, key = cm.get_cached_file_by_source(
         str(cache_env / "src1.txt"))
     assert result is None
     assert key is None
Пример #5
0
 def test_prepare_cache_dir_broken(self, tmpdir):
     # we fail loudly if we cannot create a cache dir
     cm = CacheManager(str(tmpdir))
     tmpdir.join("not-a-dir.txt").write("foo")     # broken dir
     cm.cache_dir = str(tmpdir / "not-a-dir.txt")
     with pytest.raises(IOError):
         cm._prepare_cache_dir()
Пример #6
0
    def test_get_all_sources(self):
        cm = CacheManager(self.workdir)
        result1 = cm.getAllSources()
        self.assertTrue(isinstance(result1, types.GeneratorType))
        self.assertEqual(list(result1), [])

        cm.registerDoc(
            self.src_path1, self.result_path1, suffix=None)
        cm.registerDoc(
            self.src_path2, self.result_path2, suffix='foo')
        result2 = list(cm.getAllSources())
        self.assertTrue(len(result2) == 2)

        open(os.path.join(self.workdir, 'crapfile'), 'wb').write('crap')
        result3 = list(cm.getAllSources())
        self.assertFalse('crap' in result3)

        os.mkdir(os.path.join(self.workdir, 'crapdir'))
        result4 = list(cm.getAllSources())
        self.assertFalse('crapdir' in result4)

        os.makedirs(os.path.join(self.workdir, '66', 'invalid_hashdir'))
        result5 = list(cm.getAllSources())
        self.assertFalse('66' in result5)
        return
Пример #7
0
 def test_prepare_cache_dir_broken(self, tmpdir):
     # we fail loudly if we cannot create a cache dir
     cm = CacheManager(str(tmpdir))
     tmpdir.join("not-a-dir.txt").write("foo")  # broken dir
     cm.cache_dir = str(tmpdir / "not-a-dir.txt")
     with pytest.raises(IOError):
         cm._prepare_cache_dir()
Пример #8
0
 def test_get_bucket_path(self, tmpdir):
     # we can get a bucket path from a hash value
     cm = CacheManager(str(tmpdir.join("cache")))
     tmpdir.join("src.txt").write("source1\n")
     hash_val = cm.get_hash(str(tmpdir / "src.txt"))
     assert cm._get_bucket_path(hash_val) == (
         tmpdir / "cache" / "73" / "737b337e605199de28b3b64c674f9422")
Пример #9
0
 def test_get_cached_file(self, cache_env):
     # we can get a file cached before.
     cm = CacheManager(str(cache_env / "cache"))
     cache_key = cm.register_doc(str(cache_env / "src1.txt"),
                                 str(cache_env / "result1.txt"))
     path = cm.get_cached_file(cache_key)
     assert path is not None
     assert open(path, 'r').read() == (cache_env / "result1.txt").read()
Пример #10
0
 def test_get_cached_file_by_src(self):
     # we can get a cached file by source file and options
     cm = CacheManager(self.workdir)
     # without a cache key
     my_id1 = cm.register_doc(self.src_path1, self.result_path1)
     result, key = cm.get_cached_file_by_source(self.src_path1)
     assert filecmp.cmp(result, self.result_path1, shallow=False)
     assert key == '737b337e605199de28b3b64c674f9422_1_1'
Пример #11
0
 def test_get_bucket_path_from_path(self):
     cm = CacheManager(self.workdir)
     path = cm._getBucketPathFromPath(self.src_path1)
     expected_path_end = os.path.join(
         '73', '737b337e605199de28b3b64c674f9422')
     self.assertEqual(os.listdir(self.workdir), [])
     self.assertTrue(path.endswith(expected_path_end))
     return
Пример #12
0
 def test_get_bucket_from_path(self):
     cache_dir_len1 = len(os.listdir(self.workdir))
     cm = CacheManager(self.workdir)
     bucket1 = cm.getBucketFromPath(self.src_path1)
     cache_dir_len2 = len(os.listdir(self.workdir))
     self.assertTrue(isinstance(bucket1, Bucket))
     self.assertTrue(cache_dir_len2 == cache_dir_len1+1)
     return
Пример #13
0
 def test_get_hash(self):
     cm = CacheManager(self.workdir)
     hash1 = cm.getHash(self.src_path1)
     hash2 = cm.getHash(self.src_path2)
     self.assertEqual(hash1, '737b337e605199de28b3b64c674f9422')
     self.assertEqual(hash2, 'd5aa51d7fb180729089d2de904f7dffe')
     self.assertRaises(TypeError, cm.getHash)
     return
Пример #14
0
 def register_fakedoc_in_cache(self, src, options):
     # register a fake doc in cache. Result cache_key is based on
     # path to src document and options given.
     cm = CacheManager(self.cachedir)
     fake_result_path = os.path.join(self.workdir, 'result.html')
     open(fake_result_path, 'w').write('A fake result.')
     marker = get_marker(options)
     cache_key = cm.register_doc(src, fake_result_path, repr_key=marker)
     return cache_key
Пример #15
0
 def test_get_cached_file_w_key_from_file(self, cache_env):
     # we can get a cached file, stored under a key, which is a file
     cm = CacheManager(str(cache_env / "cache"))
     cache_key = cm.register_doc(str(cache_env / "src1.txt"),
                                 str(cache_env / "result1.txt"),
                                 repr_key=StringIO('foo'))
     path = cm.get_cached_file(cache_key)
     assert path is not None
     assert open(path, 'r').read() == (cache_env / "result1.txt").read()
Пример #16
0
 def test_get_cached_file(self, cache_env):
     # we can get a file cached before.
     cm = CacheManager(str(cache_env / "cache"))
     cache_key = cm.register_doc(
         str(cache_env / "src1.txt"),
         str(cache_env / "result1.txt"))
     path = cm.get_cached_file(cache_key)
     assert path is not None
     assert open(path, 'r').read() == (
         cache_env / "result1.txt").read()
Пример #17
0
 def test_get_cached_file_by_src(self, cache_env):
     # we can get a cached file by source file and options
     cm = CacheManager(str(cache_env / "cache"))
     # without a cache key
     my_id = cm.register_doc(str(cache_env / "src1.txt"),
                             str(cache_env / "result1.txt"))
     path, key = cm.get_cached_file_by_source(str(cache_env / "src1.txt"))
     assert open(path, "r").read() == (cache_env / "result1.txt").read()
     assert key == '737b337e605199de28b3b64c674f9422_1_1'
     assert my_id == key
Пример #18
0
 def test_markerhandling(self, tmpdir):
     # we can dissolve markers from cache_keys.
     cm = CacheManager(str(tmpdir))
     marker_string = cm._compose_cache_key('somefakedhash', 3)
     assert marker_string == "somefakedhash_3"
     hash_val, bucket_marker = cm._dissolve_cache_key("somefakedhash_3")
     assert hash_val == "somefakedhash"
     assert bucket_marker == "3"
     assert cm._dissolve_cache_key("asd") == (None, None)
     assert cm._dissolve_cache_key(None) == (None, None)
Пример #19
0
 def test_get_cached_file_w_key_from_file(self, cache_env):
     # we can get a cached file, stored under a key, which is a file
     cm = CacheManager(str(cache_env / "cache"))
     cache_key = cm.register_doc(
         str(cache_env / "src1.txt"),
         str(cache_env / "result1.txt"),
         repr_key=StringIO('foo'))
     path = cm.get_cached_file(cache_key)
     assert path is not None
     assert open(path, 'r').read() == (
         cache_env / "result1.txt").read()
Пример #20
0
 def test_get_hash(self, cache_env, samples_dir):
     # we can compute a hash for a source file.
     cm = CacheManager(str(cache_env))
     hash1 = cm.get_hash(str(cache_env / "src1.txt"))
     hash2 = cm.get_hash(str(cache_env / "src2.txt"))
     hash3 = cm.get_hash(str(samples_dir / "testdoc1.doc"))
     assert hash1 == '737b337e605199de28b3b64c674f9422'
     assert hash2 == 'd5aa51d7fb180729089d2de904f7dffe'
     assert hash3 == '443a07e0e92b7dc6b21f8be6a388f05f'
     with pytest.raises(TypeError):
         cm.get_hash()
Пример #21
0
 def test_markerhandling(self):
     cm = CacheManager(self.workdir)
     marker_string = cm._composeMarker(
         'somefakedhash', 3)
     self.assertEqual(marker_string, 'somefakedhash_3')
     hash, bucket_marker = cm._dissolveMarker('somefakedhash_3')
     self.assertEqual(hash, 'somefakedhash')
     self.assertEqual(bucket_marker, '3')
     self.assertEqual(cm._dissolveMarker('asd'), (None, None))
     self.assertEqual(cm._dissolveMarker(object()), (None, None))
     return
Пример #22
0
 def test_get_cached(self):
     # we can get cached docs
     cm = CacheManager(self.cachedir)
     fake_result_path = os.path.join(self.src_dir, 'result.txt')
     open(fake_result_path, 'wb').write('The Result\n')
     key = cm.register_doc(self.src_path, fake_result_path, 'somekey')
     assert key == '2b87e29fca6ee7f1df6c1a76cb58e101_1_1'
     result_path = self.proxy.get_cached(key)
     assert result_path is not None
     assert result_path != fake_result_path
     assert filecmp.cmp(result_path, fake_result_path, shallow=False)
Пример #23
0
 def test_get_hash(self):
     cm = CacheManager(self.workdir)
     hash1 = cm.get_hash(self.src_path1)
     hash2 = cm.get_hash(self.src_path2)
     src = os.path.join(  # a binary stream not convertible to utf-8
         os.path.dirname(__file__), 'input', 'testdoc1.doc')
     hash3 = cm.get_hash(src)
     self.assertEqual(hash1, '737b337e605199de28b3b64c674f9422')
     self.assertEqual(hash2, 'd5aa51d7fb180729089d2de904f7dffe')
     self.assertEqual(hash3, '443a07e0e92b7dc6b21f8be6a388f05f')
     self.assertRaises(TypeError, cm.get_hash)
     return
Пример #24
0
 def test_get_cached(self):
     # we can get cached docs
     cm = CacheManager(self.cachedir)
     fake_result_path = os.path.join(self.src_dir, 'result.txt')
     with open(fake_result_path, 'w') as fd:
         fd.write('The Result\n')
     key = cm.register_doc(self.src_path, fake_result_path, 'somekey')
     assert key == '2b87e29fca6ee7f1df6c1a76cb58e101_1_1'
     result_path = self.proxy.get_cached(key)
     assert result_path is not None
     assert result_path != fake_result_path
     assert filecmp.cmp(result_path, fake_result_path, shallow=False)
Пример #25
0
    def test_get_bucket_path_from_hash(self):
        cm = CacheManager(self.workdir)
        hash_val = cm.getHash(self.src_path1)
        path = cm._getBucketPathFromHash(hash_val)
        expected_path_end = os.path.join(
            '73', '737b337e605199de28b3b64c674f9422')
        self.assertEqual(os.listdir(self.workdir), [])
        self.assertTrue(path.endswith(expected_path_end))

        path = cm._getBucketPathFromHash('nonsense')
        self.assertEqual(path, None)
        return
Пример #26
0
 def test_get_cached_file_by_src(self, cache_env):
     # we can get a cached file by source file and options
     cm = CacheManager(str(cache_env / "cache"))
     # without a cache key
     my_id = cm.register_doc(
         str(cache_env / "src1.txt"),
         str(cache_env / "result1.txt"))
     path, key = cm.get_cached_file_by_source(
         str(cache_env / "src1.txt"))
     assert open(path, "r").read() == (
         cache_env / "result1.txt").read()
     assert key == '737b337e605199de28b3b64c674f9422_1_1'
     assert my_id == key
Пример #27
0
 def test_register_doc(self):
     cm = CacheManager(self.workdir)
     marker1 = cm.registerDoc(
         self.src_path1, self.result_path1, suffix=None)
     marker2 = cm.registerDoc(
         self.src_path1, self.result_path1, suffix=None)
     marker3 = cm.registerDoc(
         self.src_path1, self.result_path2, suffix='foo')
     marker4 = cm.registerDoc(
         self.src_path2, self.result_path2, suffix='foo')
     self.assertEqual(marker1, '737b337e605199de28b3b64c674f9422_1')
     self.assertEqual(marker2, '737b337e605199de28b3b64c674f9422_1')
     self.assertEqual(marker3, '737b337e605199de28b3b64c674f9422_1')
     self.assertEqual(marker4, 'd5aa51d7fb180729089d2de904f7dffe_1')
     return
Пример #28
0
 def test_convert_to_pdf_cached_wo_cache_key(self):
     # We can get a cached doc also without a cache key (but
     # it is extensive)
     self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
     pdf1, cache_key1 = self.doc.convertToPDF()  # store doc in cache
     # modfiy result to distuingish it from freshly converted doc
     from ulif.openoffice.cachemanager import CacheManager
     cm = CacheManager(self.workdir)
     cached_path = cm.get_cached_file(cache_key1)
     open(cached_path, 'wb').write('My Fake Result')
     # now re-get the document. We should get the cached copy
     self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
     pdf2, cache_key2 = self.doc.convertToPDF()
     self.assertEqual(pdf2, 'My Fake Result')
     self.assertEqual(cache_key2, cache_key1)
Пример #29
0
 def test_init_fails_loudly(self, tmpdir):
     # If we get a file as cache dir (instead of a directory), we
     # fail loudly...
     a_file = tmpdir.join("some_file.txt")
     a_file.write("this-is-not-a-dir")
     with pytest.raises(IOError):
         CacheManager(str(a_file))
Пример #30
0
    def test_get_cached_file(self):
        cm = CacheManager(self.workdir)
        path = cm.getCachedFile(self.src_path1)
        self.assertTrue(path is None)
        self.assertEqual(os.listdir(self.workdir), [])

        cm.registerDoc(self.src_path1, self.result_path1, suffix=None)
        path1 = cm.getCachedFile(self.src_path1)
        path2 = cm.getCachedFile(self.src_path1, suffix='bar')
        path3 = cm.getCachedFile(self.src_path1, suffix='foo')
        self.assertTrue(path1 is not None)
        self.assertTrue(path2 is None)
        self.assertTrue(path3 is None)

        cm.registerDoc(self.src_path2, self.result_path1, suffix='foo')
        path1 = cm.getCachedFile(self.src_path2)
        path2 = cm.getCachedFile(self.src_path2, suffix='bar')
        path3 = cm.getCachedFile(self.src_path2, suffix='foo')
        self.assertTrue(path1 is None)
        self.assertTrue(path2 is None)
        self.assertTrue(path3 is not None)
        return
Пример #31
0
def convert_doc(src_doc, options, cache_dir):
    """Convert `src_doc` according to the other parameters.

    `src_doc` is the path to the source document. `options` is a dict
    of options for processing, passed to the processors.

    `cache_dir` may be ``None`` in which no caching is requested
    during processing.

    Generates a converted representation of `src_doc` by calling
    :class:`ulif.openoffice.processor.MetaProcessor` with `options` as
    parameters.

    Afterwards the conversion result is stored in cache (if
    allowed/possible) for speedup of upcoming requests.

    Returns a triple:

      ``(<PATH>, <CACHE_KEY>, <METADATA>)``

    where ``<PATH>`` is the path to the resulting document,
    ``<CACHE_KEY>`` an identifier (string) to retrieve a generated doc
    from cache on future requests, and ``<METADATA>`` is a dict of values
    returned during request (and set by the document processors,
    notably setting the `error` keyword).

    If errors happen or caching is disabled, ``<CACHE_KEY>`` is
    ``None``.
    """
    result_path = None
    cache_key = None
    repr_key = get_marker(options)  # Create unique marker out of options
    metadata = dict(error=False)

    # Generate result
    input_copy_dir = tempfile.mkdtemp()
    input_copy = os.path.join(input_copy_dir, os.path.basename(src_doc))
    shutil.copy2(src_doc, input_copy)
    try:
        proc = MetaProcessor(options=options)  # Removes original doc
        result_path, metadata = proc.process(input_copy)
    except Exception as exc:
        shutil.rmtree(input_copy_dir)
        raise exc

    error_state = metadata.get('error', False)
    if cache_dir and not error_state and result_path is not None:
        # Cache away generated doc
        cache_key = CacheManager(cache_dir).register_doc(
            src_doc, result_path, repr_key)
    return result_path, cache_key, metadata
Пример #32
0
 def test_markerhandling(self, tmpdir):
     # we can dissolve markers from cache_keys.
     cm = CacheManager(str(tmpdir))
     marker_string = cm._compose_cache_key('somefakedhash', 3)
     assert marker_string == "somefakedhash_3"
     hash_val, bucket_marker = cm._dissolve_cache_key("somefakedhash_3")
     assert hash_val == "somefakedhash"
     assert bucket_marker == "3"
     assert cm._dissolve_cache_key("asd") == (None, None)
     assert cm._dissolve_cache_key(None) == (None, None)
Пример #33
0
 def test_get_hash(self, cache_env, samples_dir):
     # we can compute a hash for a source file.
     cm = CacheManager(str(cache_env))
     hash1 = cm.get_hash(str(cache_env / "src1.txt"))
     hash2 = cm.get_hash(str(cache_env / "src2.txt"))
     hash3 = cm.get_hash(str(samples_dir / "testdoc1.doc"))
     assert hash1 == '737b337e605199de28b3b64c674f9422'
     assert hash2 == 'd5aa51d7fb180729089d2de904f7dffe'
     assert hash3 == '443a07e0e92b7dc6b21f8be6a388f05f'
     with pytest.raises(TypeError):
         cm.get_hash()
Пример #34
0
 def test_get_cached_file_by_src_w_key(self):
     cm = CacheManager(self.workdir)
     my_id = cm.register_doc(self.src_path1, self.result_path1, 'mykey')
     result1, key1 = cm.get_cached_file_by_source(self.src_path1, 'mykey')
     assert filecmp.cmp(result1, self.result_path1, shallow=False)
     assert key1 == '737b337e605199de28b3b64c674f9422_1_1'
     result2, key2 = cm.get_cached_file_by_source(
         self.src_path1, 'otherkey')
     assert result2 is None
     assert key2 is None
     cm.register_doc(self.src_path1, self.result_path2, 'otherkey')
     result3, key3 = cm.get_cached_file_by_source(
         self.src_path1, 'otherkey')
     assert filecmp.cmp(result3, self.result_path2, shallow=False)
     assert key3 == '737b337e605199de28b3b64c674f9422_1_2'
Пример #35
0
    def test_get_cached_file_from_marker(self):
        cm = CacheManager(self.workdir)
        path1 = cm.getCachedFileFromMarker('not-a-marker')
        path2 = cm.getCachedFileFromMarker('737b337e605199de28b3b64c674f9422_1')
        path3 = cm.getCachedFileFromMarker('not-a-marker-with_underscore')
        marker1 = cm.registerDoc(
            self.src_path1, self.result_path1, suffix=None)
        path4 = cm.getCachedFileFromMarker(marker1)

        marker2 = cm.registerDoc(
            self.src_path1, self.result_path2, suffix='foo')
        path5 = cm.getCachedFileFromMarker(marker2, suffix='foo')

        self.assertTrue(path1 is None)
        self.assertTrue(path2 is None)
        self.assertTrue(path3 is None)
        self.assertTrue(path4 is not None)
        self.assertTrue(path5 is not None)
        return
Пример #36
0
    def test_prepare_cache_dir(self):
        new_cache_dir = os.path.join(self.workdir, 'newcache')
        broken_cache_dir = os.path.join(self.workdir, 'broken')
        open(broken_cache_dir, 'wb').write('broken')
        cm = CacheManager(self.workdir)

        cm.cache_dir = None
        self.assertEqual(cm.prepareCacheDir(), None)

        cm.cache_dir = new_cache_dir
        cm.prepareCacheDir()
        self.assertTrue(os.path.isdir(new_cache_dir))

        cm.cache_dir = broken_cache_dir
        self.assertRaises(IOError, cm.prepareCacheDir)
        return
Пример #37
0
    def test_get_cached_file(self):
        cm = CacheManager(self.workdir)
        path = cm.get_cached_file(self.src_path1)
        self.assertTrue(path is None)
        self.assertEqual(os.listdir(self.workdir), [])

        my_id1 = cm.register_doc(self.src_path1, self.result_path1)
        path1 = cm.get_cached_file(my_id1)
        self.assertTrue(path1 is not None)

        my_id2 = cm.register_doc(
            self.src_path2, self.result_path1, repr_key='foo')
        path1 = cm.get_cached_file(my_id2)
        self.assertTrue(path1 is not None)

        my_id3 = cm.register_doc(
            self.src_path2, self.result_path1, repr_key=StringIO('foo'))
        path1 = cm.get_cached_file(my_id3)
        self.assertTrue(path1 is not None)
        self.assertEqual(my_id2, my_id3)

        self.assertEqual(cm.get_cached_file('nonsense_really'), None)
        return
Пример #38
0
 def test_register_doc(self, cache_env):
     # we can register docs
     cm = CacheManager(str(cache_env / "cache"))
     src1 = str(cache_env / "src1.txt")
     src2 = str(cache_env / "src2.txt")
     result1 = str(cache_env / "result1.txt")
     result2 = str(cache_env / "result2.txt")
     marker1 = cm.register_doc(src1, result1)
     assert marker1 == '737b337e605199de28b3b64c674f9422_1_1'
     marker2 = cm.register_doc(src1, result1)
     assert marker2 == '737b337e605199de28b3b64c674f9422_1_1'
     marker3 = cm.register_doc(src1, result2, repr_key="foo")
     assert marker3 == '737b337e605199de28b3b64c674f9422_1_2'
     marker4 = cm.register_doc(src2, result2, repr_key="foo")
     assert marker4 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
     marker5 = cm.register_doc(src2, result2, repr_key=StringIO("bar"))
     assert marker5 == 'd5aa51d7fb180729089d2de904f7dffe_1_2'
Пример #39
0
 def test_keys(self, cache_env):
     # we can get all cache keys
     cm = CacheManager(str(cache_env / "cache"))
     src1 = str(cache_env / "src1.txt")
     src2 = str(cache_env / "src2.txt")
     result1 = str(cache_env / "result1.txt")
     result2 = str(cache_env / "result2.txt")
     key1 = cm.register_doc(src1, result1, 'foo')
     assert list(cm.keys()) == ['737b337e605199de28b3b64c674f9422_1_1']
     assert key1 == '737b337e605199de28b3b64c674f9422_1_1'
     key2 = cm.register_doc(src1, result2, 'bar')
     assert sorted(list(cm.keys())) == [
         '737b337e605199de28b3b64c674f9422_1_1',
         '737b337e605199de28b3b64c674f9422_1_2',
     ]
     assert key2 == '737b337e605199de28b3b64c674f9422_1_2'
     key3 = cm.register_doc(src2, result1, 'baz')
     assert sorted(list(cm.keys())) == [
         '737b337e605199de28b3b64c674f9422_1_1',
         '737b337e605199de28b3b64c674f9422_1_2',
         'd5aa51d7fb180729089d2de904f7dffe_1_1',
     ]
     assert key3 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
Пример #40
0
 def test_keys_custom_level(self, cache_env):
     # we can get all cache keys, even if a custom cache level is set
     # (and keys are stored in different location).
     cm = CacheManager(str(cache_env / "cache"), level=3)
     src1 = str(cache_env / "src1.txt")
     src2 = str(cache_env / "src2.txt")
     result1 = str(cache_env / "result1.txt")
     result2 = str(cache_env / "result2.txt")
     key1 = cm.register_doc(src1, result1, 'foo')
     assert list(cm.keys()) == ['737b337e605199de28b3b64c674f9422_1_1']
     assert key1 == '737b337e605199de28b3b64c674f9422_1_1'
     key2 = cm.register_doc(src1, result2, 'bar')
     assert sorted(list(cm.keys())) == [
         '737b337e605199de28b3b64c674f9422_1_1',
         '737b337e605199de28b3b64c674f9422_1_2',
     ]
     assert key2 == '737b337e605199de28b3b64c674f9422_1_2'
     key3 = cm.register_doc(src2, result1, 'baz')
     assert sorted(list(cm.keys())) == [
         '737b337e605199de28b3b64c674f9422_1_1',
         '737b337e605199de28b3b64c674f9422_1_2',
         'd5aa51d7fb180729089d2de904f7dffe_1_1',
     ]
     assert key3 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
Пример #41
0
 def test_register_doc(self, cache_env):
     # we can register docs
     cm = CacheManager(str(cache_env / "cache"))
     src1 = str(cache_env / "src1.txt")
     src2 = str(cache_env / "src2.txt")
     result1 = str(cache_env / "result1.txt")
     result2 = str(cache_env / "result2.txt")
     marker1 = cm.register_doc(src1, result1)
     assert marker1 == '737b337e605199de28b3b64c674f9422_1_1'
     marker2 = cm.register_doc(src1, result1)
     assert marker2 == '737b337e605199de28b3b64c674f9422_1_1'
     marker3 = cm.register_doc(src1, result2, repr_key="foo")
     assert marker3 == '737b337e605199de28b3b64c674f9422_1_2'
     marker4 = cm.register_doc(src2, result2, repr_key="foo")
     assert marker4 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
     marker5 = cm.register_doc(src2, result2, repr_key=StringIO("bar"))
     assert marker5 == 'd5aa51d7fb180729089d2de904f7dffe_1_2'
Пример #42
0
 def test_get_cached_file_by_src_w_key(self, cache_env):
     cm = CacheManager(str(cache_env / "cache"))
     src = cache_env / "src1.txt"
     result1 = cache_env / "result1.txt"
     result2 = cache_env / "result2.txt"
     my_id1 = cm.register_doc(str(src), str(result1), 'mykey')
     path1, key1 = cm.get_cached_file_by_source(str(src), 'mykey')
     assert filecmp.cmp(path1, str(result1), shallow=False)
     assert key1 == '737b337e605199de28b3b64c674f9422_1_1'
     assert key1 == my_id1
     # yet not existent cache file
     path2, key2 = cm.get_cached_file_by_source(str(src), 'otherkey')
     assert path2 is None
     assert key2 is None
     # store and retrieve 2nd cache file
     my_id3 = cm.register_doc(str(src), str(result2), 'otherkey')
     path3, key3 = cm.get_cached_file_by_source(str(src), 'otherkey')
     assert filecmp.cmp(path3, str(result2), shallow=False)
     assert key3 == '737b337e605199de28b3b64c674f9422_1_2'
     assert key3 == my_id3
     return
Пример #43
0
 def test_register_doc(self):
     cm = CacheManager(self.workdir)
     marker1 = cm.register_doc(
         self.src_path1, self.result_path1)
     marker2 = cm.register_doc(
         self.src_path1, self.result_path1)
     marker3 = cm.register_doc(
         self.src_path1, self.result_path2, repr_key=b'foo')
     marker4 = cm.register_doc(
         self.src_path2, self.result_path2, repr_key=b'foo')
     marker5 = cm.register_doc(
         self.src_path2, self.result_path2, repr_key=StringIO(b'bar'))
     self.assertEqual(marker1, b'737b337e605199de28b3b64c674f9422_1_1')
     self.assertEqual(marker2, b'737b337e605199de28b3b64c674f9422_1_1')
     self.assertEqual(marker3, b'737b337e605199de28b3b64c674f9422_1_2')
     self.assertEqual(marker4, b'd5aa51d7fb180729089d2de904f7dffe_1_1')
     self.assertEqual(marker5, b'd5aa51d7fb180729089d2de904f7dffe_1_2')
     return
Пример #44
0
 def test_keys(self, cache_env):
     # we can get all cache keys
     cm = CacheManager(str(cache_env / "cache"))
     src1 = str(cache_env / "src1.txt")
     src2 = str(cache_env / "src2.txt")
     result1 = str(cache_env / "result1.txt")
     result2 = str(cache_env / "result2.txt")
     key1 = cm.register_doc(src1, result1, 'foo')
     assert list(cm.keys()) == ['737b337e605199de28b3b64c674f9422_1_1']
     assert key1 == '737b337e605199de28b3b64c674f9422_1_1'
     key2 = cm.register_doc(src1, result2, 'bar')
     assert sorted(list(cm.keys())) == [
         '737b337e605199de28b3b64c674f9422_1_1',
         '737b337e605199de28b3b64c674f9422_1_2',
     ]
     assert key2 == '737b337e605199de28b3b64c674f9422_1_2'
     key3 = cm.register_doc(src2, result1, 'baz')
     assert sorted(list(cm.keys())) == [
         '737b337e605199de28b3b64c674f9422_1_1',
         '737b337e605199de28b3b64c674f9422_1_2',
         'd5aa51d7fb180729089d2de904f7dffe_1_1',
     ]
     assert key3 == 'd5aa51d7fb180729089d2de904f7dffe_1_1'
Пример #45
0
 def test_keys_custom_level(self):
     # we can get all cache keys also with custom level set
     cm = CacheManager(self.workdir, level=3)
     key1 = cm.register_doc(self.src_path1, self.result_path1, 'foo')
     self.assertEqual(
         list(cm.keys()),
         ['737b337e605199de28b3b64c674f9422_1_1']
         )
     key2 = cm.register_doc(self.src_path1, self.result_path2, 'bar')
     self.assertEqual(
         sorted(list(cm.keys())),
         ['737b337e605199de28b3b64c674f9422_1_1',
          '737b337e605199de28b3b64c674f9422_1_2',
          ]
         )
     key3 = cm.register_doc(self.src_path2, self.result_path1, 'baz')
     self.assertEqual(
         sorted(list(cm.keys())),
         ['737b337e605199de28b3b64c674f9422_1_1',
          '737b337e605199de28b3b64c674f9422_1_2',
          'd5aa51d7fb180729089d2de904f7dffe_1_1',
          ]
         )
     return
Пример #46
0
 def test_get_cached_file_empty(self, cache_env):
     # while cache is empty we get `None` when asking for cached files.
     cm = CacheManager(str(cache_env / "cache"))
     path = cm.get_cached_file(str(cache_env / "src1.txt"))
     assert path is None
Пример #47
0
 def test_get_cached_file_invalid_cache_key(self, tmpdir):
     # invalid/unused cache keys return `None` as cached file.
     cm = CacheManager(str(tmpdir))
     assert cm.get_cached_file("not-existing") is None
Пример #48
0
 def test_get_cached_file_by_src_failed(self, cache_env):
     # uncached files result in `None` as result
     cm = CacheManager(str(cache_env))
     result, key = cm.get_cached_file_by_source(str(cache_env / "src1.txt"))
     assert result is None
     assert key is None
Пример #49
0
class Client(object):
    """A client to trigger document conversions.
    """
    def __init__(self, cache_dir=None):
        self.cache_dir = cache_dir
        self.cache_manager = None
        if self.cache_dir is not None:
            self.cache_manager = CacheManager(self.cache_dir)

    def convert(self, src_doc_path, options={}):
        """Convert `src_doc_path` according to `options`.

        Calls :func:`convert_doc` internally and returns the result
        given by this function.
        """
        return convert_doc(src_doc_path, options, self.cache_dir)

    def get_cached(self, cache_key):
        """Get the document from cache stored under `cache_key`.

        Returns ``None`` if no such file can be found or no cache dir
        was set at all.

        .. warning:: The returned path (if any) is part of cache! Do
                     not remove or change the file. Copy it to another
                     location instead.

        .. versionadded:: 1.1

        """
        if self.cache_manager is not None:
            return self.cache_manager.get_cached_file(cache_key)
        return None

    def get_cached_by_source(self, src_doc_path, options={}):
        """Get the document from cache by source doc and options.

        Find a cached document, which was created from the given
        `src_doc_path` and `options`.

        Returns the path to the document and a cache key you are
        encouraged to use for future access.

        Please note that this method is much more expensive than
        :meth:`get_cached`. Use it only if the `cache_key` returned
        upon registering a doc is absolutely not available any more.

        Returns ``(None, None)`` if no such file can be found or no
        cache dir was set at all.

        .. warning:: The returned path (if any) is part of cache! Do
                     not remove or change the file. Copy it to another
                     location instead.

        .. versionadded:: 1.1

        """
        repr_key = get_marker(options)
        if self.cache_manager is not None:
            return self.cache_manager.get_cached_file_by_source(
                src_doc_path, repr_key)
        return None, None
Пример #50
0
class RESTfulDocConverter(object):
    """A WSGI app that caches and converts office documents via LibreOffice.

    It acts as a RESTful document store that supports HTTP actions to
    add/modify/retrieve converted documents.

    Accepted arguments:

    - `cache_dir`:
        Path to a directory, where cached files can be stored. The
        directory is created if it does not exist.

    """
    # cf: https://routes.readthedocs.io/en/latest/restful.html
    #     http://www.ianbicking.org/blog/2010/03/12/a-webob-app-example/
    map = Mapper()
    map.resource('doc', 'docs')

    #: A cache manager instance.
    cache_manager = None
    template_dir = os.path.join(os.path.dirname(__file__), 'templates')

    def __init__(self, cache_dir=None):
        self.cache_dir = cache_dir
        self.cache_manager = None
        if self.cache_dir is not None:
            self.cache_manager = CacheManager(self.cache_dir)

    def _url(self, req, *args, **kw):
        """Generate an URL pointing to some REST service.

        `req` is the current request.

        Arguments and keywords are passed on to the generated
        :class:`routes.util.URLGenerator` instance. So you can use it
        like the `url` method described in the `routes` docs, except
        that you have to pass in the `req` parameter first.
        """
        url = URLGenerator(self.map, req.environ)
        return url(*args, **kw)

    @wsgify
    def __call__(self, req):
        results = self.map.routematch(environ=req.environ)
        if not results:
            return exc.HTTPNotFound()
        match, route = results
        return getattr(self, match['action'])(req)

    def index(self, req):
        # get index of all docs
        return Response(str(mydocs.keys()))

    def create(self, req):
        # post a new doc
        options = dict([(name, val) for name, val in req.params.items()
                        if name not in ('CREATE', 'doc', 'docid')])
        if 'out_fmt' in req.params.keys():
            options['oocp-out-fmt'] = options['out_fmt']
            del options['out_fmt']
        if 'CREATE' in req.params.keys():
            if options.get('oocp-out-fmt', 'html') == 'pdf':
                options['meta-procord'] = 'unzip,oocp,zip'
        doc = req.POST['doc']
        # write doc to filesystem
        tmp_dir = tempfile.mkdtemp()
        src_path = os.path.join(tmp_dir, doc.filename)
        with open(src_path, 'wb') as f:
            for chunk in iter(lambda: doc.file.read(8 * 1024), b''):
                f.write(chunk)
        # do the conversion
        result_path, id_tag, metadata = convert_doc(src_path, options,
                                                    self.cache_dir)
        # deliver the created file
        resp = make_response(result_path)
        if id_tag is not None:
            # we can only signal new resources if cache is enabled
            resp.status = '201 Created'
            resp.location = self._url(req, 'doc', id=id_tag, qualified=True)
        return resp

    def new(self, req):
        # get a form to create a new doc
        template = open(os.path.join(self.template_dir, 'form_new.tpl')).read()
        template = template.format(target_url=self._url(req, 'docs'))
        return Response(template)

    def update(self, req):
        # put/update an existing doc
        pass  # pragma: no cover

    def delete(self, req):
        # delete a doc
        pass  # pragma: no cover

    def edit(self, req):
        # edit a doc
        pass  # pragma: no cover

    def show(self, req):
        # show a doc
        doc_id = req.path.split('/')[-1]
        result_path = self.cache_manager.get_cached_file(doc_id)
        if result_path is None:
            return exc.HTTPNotFound()
        return make_response(result_path)
Пример #51
0
 def __init__(self, cache_dir=None):
     self.cache_dir = cache_dir
     self.cache_manager = None
     if self.cache_dir is not None:
         self.cache_manager = CacheManager(self.cache_dir)
Пример #52
0
 def test_compose_marker(self, tmpdir):
     # we can compose cache keys
     cm = CacheManager(str(tmpdir))
     marker2 = cm._compose_cache_key('some_hash_digest', 'bucket_marker')
     assert marker2 == 'some_hash_digest_bucket_marker'
Пример #53
0
 def test_init_creates_dir(self, tmpdir):
     # a cache dir is created if neccessary
     cache_dir = tmpdir / "cache"
     assert cache_dir.exists() is False
     CacheManager(str(cache_dir))
     assert cache_dir.isdir() is True
Пример #54
0
 def test_prepare_cache_dir_none(self, tmpdir):
     # we can create a cache manager without any cache dir
     cm = CacheManager(str(tmpdir))
     cm.cache_dir = None
     cm._prepare_cache_dir()
     assert cm.cache_dir is None
Пример #55
0
 def test_init(self, tmpdir):
     # we can initialize a cache manager with default depth
     cm = CacheManager(str(tmpdir.join("cache")))
     assert cm.level == 1
     assert cm.cache_dir == tmpdir.join("cache")
Пример #56
0
 def test_init_level(self, tmpdir):
     # we can set a level (depth) when creating cache managers
     cm = CacheManager(str(tmpdir.join("cache")), level=3)
     assert cm.level == 3