예제 #1
0
 def test_argument_error(self, client_env):
     # wrong args lead to ArgumentErrors
     client = Client()
     # illegal output format and not existing processors
     options = {'oocp-out-fmt': 'foo', 'meta-procord': 'foo,bar'}
     with pytest.raises(ArgumentParserError):
         client.convert(client_env.src_doc, options=options)
예제 #2
0
 def test_convert(self, client_env):
     client = Client()
     result_path, cache_key, metadata = client.convert(client_env.src_doc)
     assert result_path.endswith('/sample.html.zip')
     assert os.path.isfile(result_path)
     assert cache_key is None  # no cache, no cache_key
     assert metadata == {'error': False, 'oocp_status': 0}
예제 #3
0
 def test_convert(self, client_env):
     client = Client()
     result_path, cache_key, metadata = client.convert(client_env.src_doc)
     assert result_path.endswith('/sample.html.zip')
     assert os.path.isfile(result_path)
     assert cache_key is None  # no cache, no cache_key
     assert metadata == {'error': False, 'oocp_status': 0}
예제 #4
0
 def test_argument_error(self, client_env):
     # wrong args lead to ArgumentErrors
     client = Client()
     # illegal output format and not existing processors
     options = {'oocp-out-fmt': 'foo', 'meta-procord': 'foo,bar'}
     with pytest.raises(ArgumentParserError):
         client.convert(client_env.src_doc, options=options)
예제 #5
0
 def test_options(self, client_env):
     # we can pass in options
     client = Client()
     options = {'oocp-out-fmt': 'pdf', 'meta-procord': 'oocp'}
     result_path, cache_key, metadata = client.convert(
         client_env.src_doc, options=options)
     assert result_path.endswith('/sample.pdf')
     assert metadata == {'error': False, 'oocp_status': 0}
예제 #6
0
    def get_cached(self, cache_key):
        """Get a cached document.

        Retrieve the document representation stored under `cache_key`
        in cache if it exists. Returns `None` otherwise.
        """
        client = Client(cache_dir=self.cache_dir)
        return client.get_cached(cache_key)
예제 #7
0
 def test_get_cached(self, client_env):
     # we can get an already cached doc
     client = Client(cache_dir=client_env.cache_dir)
     result_path, cache_key, metadata = client.convert(client_env.src_doc)
     assert cache_key == '396199333edbf40ad43e62a1c1397793_1_1'
     cached_path = client.get_cached(cache_key)
     assert filecmp.cmp(result_path, cached_path, shallow=False)
     assert client_env.cache_dir in cached_path
예제 #8
0
 def test_get_cached(self, client_env):
     # we can get an already cached doc
     client = Client(cache_dir=client_env.cache_dir)
     result_path, cache_key, metadata = client.convert(client_env.src_doc)
     assert cache_key == '396199333edbf40ad43e62a1c1397793_1_1'
     cached_path = client.get_cached(cache_key)
     assert filecmp.cmp(result_path, cached_path, shallow=False)
     assert client_env.cache_dir in cached_path
예제 #9
0
    def get_cached(self, cache_key):
        """Get a cached document.

        Retrieve the document representation stored under `cache_key`
        in cache if it exists. Returns `None` otherwise.
        """
        client = Client(cache_dir=self.cache_dir)
        return client.get_cached(cache_key)
예제 #10
0
 def test_convert(self):
     client = Client()
     result_path, cache_key, metadata = client.convert(self.src_doc)
     self.resultdir = os.path.dirname(result_path)  # for cleanup
     assert result_path[-16:] == '/sample.html.zip'
     assert os.path.isfile(result_path)
     assert cache_key is None  # no cache, no cache_key
     assert metadata == {'error': False, 'oocp_status': 0}
예제 #11
0
 def test_options(self, client_env):
     # we can pass in options
     client = Client()
     options = {'oocp-out-fmt': 'pdf', 'meta-procord': 'oocp'}
     result_path, cache_key, metadata = client.convert(client_env.src_doc,
                                                       options=options)
     assert result_path.endswith('/sample.pdf')
     assert metadata == {'error': False, 'oocp_status': 0}
예제 #12
0
 def test_get_cached_by_source(self, client_env):
     # we can get a file when cached and by source/options
     client = Client(cache_dir=client_env.cache_dir)
     result_path, cache_key, metadata = client.convert(client_env.src_doc)
     assert cache_key == '396199333edbf40ad43e62a1c1397793_1_1'
     c_path, c_key = client.get_cached_by_source(client_env.src_doc)
     assert filecmp.cmp(result_path, c_path, shallow=False)
     assert client_env.cache_dir in c_path
     assert c_key == '396199333edbf40ad43e62a1c1397793_1_1'
예제 #13
0
 def test_get_cached(self):
     # we can get an already cached doc
     client = Client(cache_dir=self.cachedir)
     result_path, cache_key, metadata = client.convert(self.src_doc)
     self.resultdir = os.path.dirname(result_path)  # for cleanup
     assert cache_key == '164dfcf01584bd0e3595b62fb53cf12c_1_1'
     cached_path = client.get_cached(cache_key)
     assert filecmp.cmp(result_path, cached_path, shallow=False)
     assert self.cachedir in cached_path
예제 #14
0
 def test_get_cached_by_source(self, client_env):
     # we can get a file when cached and by source/options
     client = Client(cache_dir=client_env.cache_dir)
     result_path, cache_key, metadata = client.convert(client_env.src_doc)
     assert cache_key == '396199333edbf40ad43e62a1c1397793_1_1'
     c_path, c_key = client.get_cached_by_source(client_env.src_doc)
     assert filecmp.cmp(result_path, c_path, shallow=False)
     assert client_env.cache_dir in c_path
     assert c_key == '396199333edbf40ad43e62a1c1397793_1_1'
예제 #15
0
 def test_options(self):
     # we can pass in options
     client = Client()
     options = {'oocp-out-fmt': 'pdf', 'meta-procord': 'oocp'}
     result_path, cache_key, metadata = client.convert(
         self.src_doc, options=options)
     self.resultdir = os.path.dirname(result_path)
     assert result_path[-11:] == '/sample.pdf'
     assert metadata == {'error': False, 'oocp_status': 0}
예제 #16
0
 def test_get_cached_by_source(self):
     # we can get a file when cached and by source/options
     client = Client(cache_dir=self.cachedir)
     result_path, cache_key, metadata = client.convert(self.src_doc)
     self.resultdir = os.path.dirname(result_path)  # for cleanup
     assert cache_key == '164dfcf01584bd0e3595b62fb53cf12c_1_1'
     cached_path, cache_key = client.get_cached_by_source(self.src_doc)
     assert filecmp.cmp(result_path, cached_path, shallow=False)
     assert self.cachedir in cached_path
     assert cache_key == '164dfcf01584bd0e3595b62fb53cf12c_1_1'
예제 #17
0
 def __init__(self, name, data, cache_dir=None):
     commandtransform.__init__(self, name)
     name = self.name()
     self.tmpdir, self.fullname = self.initialize_tmpdir(
         data, filename=name)
     self.cache_dir = cache_dir
     self.client = Client(cache_dir=cache_dir)
예제 #18
0
 def test_get_cached_no_cache_dir(self):
     # when asking for cached files we cope with no cache dirs
     client = Client()
     assert client.get_cached(
         '164dfcf01584bd0e3595b62fb53cf12c_1_1') is None
예제 #19
0
 def test_get_cached_no_file(self):
     # when asking for cached files we cope with nonexistent docs
     client = Client(cache_dir=self.cachedir)
     assert client.get_cached(
         '164dfcf01584bd0e3595b62fb53cf12c_1_1') is None
예제 #20
0
 def test_get_cached_by_source_no_cache_dir(self):
     # we cannot get a cached file if w/o cache_dir set
     client = Client()
     cached_path, cache_key = client.get_cached_by_source(self.src_doc)
     assert cached_path is None
     assert cache_key is None
예제 #21
0
 def test_get_cached_no_file(self, client_env):
     # when asking for cached files we cope with nonexistent docs
     client = Client(cache_dir=client_env.cache_dir)
     assert client.get_cached(
         '396199333edbf40ad43e62a1c1397793_1_1') is None
예제 #22
0
 def test_get_cached_no_manager(self, client_env):
     # no cache_dir -> no cache_manager, but we cope with it
     client = Client(cache_dir=None)
     assert client.get_cached(
         '396199333edbf40ad43e62a1c1397793_1_1') is None
예제 #23
0
 def test_get_cached_no_manager(self, client_env):
     # no cache_dir -> no cache_manager, but we cope with it
     client = Client(cache_dir=None)
     assert client.get_cached(
         '396199333edbf40ad43e62a1c1397793_1_1') is None
예제 #24
0
 def test_get_cached_no_file(self, client_env):
     # when asking for cached files we cope with nonexistent docs
     client = Client(cache_dir=client_env.cache_dir)
     assert client.get_cached(
         '396199333edbf40ad43e62a1c1397793_1_1') is None
예제 #25
0
 def test_get_cached_by_source_no_file(self, client_env):
     # we cannot get a cached file if it has not been cached before
     client = Client(cache_dir=client_env.cache_dir)
     c_path, c_key = client.get_cached_by_source(client_env.src_doc)
     assert c_path is None
     assert c_key is None
예제 #26
0
 def test_get_cached_by_source_no_file(self):
     # we cannot get a cached file not cached before
     client = Client(cache_dir=self.cachedir)
     cached_path, cache_key = client.get_cached_by_source(self.src_doc)
     assert cached_path is None
     assert cache_key is None
예제 #27
0
 def test_get_cached_by_source_no_cache_dir(self, client_env):
     # we cannot get a cached file if w/o cache_dir set
     client = Client(cache_dir=None)
     c_path, c_key = client.get_cached_by_source(client_env.src_doc)
     assert c_path is None
     assert c_key is None
예제 #28
0
 def test_get_cached_by_source_no_cache_dir(self, client_env):
     # we cannot get a cached file if w/o cache_dir set
     client = Client(cache_dir=None)
     c_path, c_key = client.get_cached_by_source(client_env.src_doc)
     assert c_path is None
     assert c_key is None
예제 #29
0
 def test_get_cached_by_source_no_file(self, client_env):
     # we cannot get a cached file if it has not been cached before
     client = Client(cache_dir=client_env.cache_dir)
     c_path, c_key = client.get_cached_by_source(client_env.src_doc)
     assert c_path is None
     assert c_key is None
예제 #30
0
class Document(commandtransform):
    """A document that can be converted via ulif.openoffice client.

    `name` - basename of file

    `data` - (binary) data of file, the file contents
    """
    def __init__(self, name, data, cache_dir=None):
        commandtransform.__init__(self, name)
        name = self.name()
        self.tmpdir, self.fullname = self.initialize_tmpdir(
            data, filename=name)
        self.cache_dir = cache_dir
        self.client = Client(cache_dir=cache_dir)

    def __del__(self):
        """Remove the temporary directory and loop on all base
        destructors.

        This method is protected against diamond inheritance.
        """
        if isdir(self.tmpdir):
            self.cleanDir(self.tmpdir)
        basekeys = []
        for base in self.__class__.__bases__:
            basekey = str(base)
            if basekey in basekeys:
                continue
            basekeys.append(basekey)
            if hasattr(base, '__del__'):
                base.__del__(self)

    @classmethod
    def subObjects(cls, path):
        """Overwritten from base.

        Return `path` and a list of basenames of allowed files found
        in `path`. Allowed files are such with filename extension
        '.png', '.jpg', '.gif', '.css'.

        The ``.css`` filename extension is not allowed in the original
        method.
        """
        filenames = []
        for filename in os.listdir(path):
            result = re.match("^.+\.(?P<ext>.+)$", filename)
            if result is not None:
                ext = result.group('ext')
                if ext in ('png', 'jpg', 'gif', 'css'):
                    filenames.append(filename)
        path = os.path.join(path, '')
        return path, filenames

    def convert(self, cache_key=None):
        """Convert the document to HTML.

        Returns the main document content as string and a cache_key
        for quick later retrieval. Additional documents (images, etc.)
        which are result of the conversion are placed in the `tmpdir`
        of this `Document`.

        If `cache_key` is given (and a `cache_dir` set before) we will
        lookup the cache before performing any real conversion.

        Raises `IOError` if conversion fails.
        """
        name = self.name()
        src_path = os.path.join(self.tmpdir, name)
        resultpath = self.client.get_cached(cache_key)
        if resultpath is not None:
            # Lookup cached doc by cache key (fast)
            newdir = copy_to_secure_location(resultpath)
            resultpath = os.path.join(newdir, os.path.basename(resultpath))
        if resultpath is None:
            # Lookup cached doc by source (expensive)
            resultpath, cache_key = self.client.get_cached_by_source(
                src_path, OPTIONS_HTML)
            if resultpath is not None:
                newdir = copy_to_secure_location(resultpath)
                resultpath = os.path.join(newdir, os.path.basename(resultpath))
        if resultpath is None:
            # Convert to HTML, new doc will be in resultpath
            resultpath, cache_key, metadata = self.client.convert(
                src_path, OPTIONS_HTML)
            if metadata['error']:
                descr = metadata.get('error-descr', 'Descr. not avail.')
                raise IOError('Could not convert: %s [%s]' % (name, descr))
            newdir = os.path.dirname(resultpath)
        html = open(resultpath, 'r').read()
        self.cleanDir(self.tmpdir)
        self.tmpdir = newdir
        return html, cache_key

    def convertToPDF(self, cache_key=None):
        """Convert the document to PDF.

        Returns the generated document contents as string and a cache
        key. The cache_key might be None if no cache_dir was set
        before.

        If `cache_key` is given (and a `cache_dir` set before) we will
        lookup the cache before performing any real conversion.

        Raises `IOError` if conversion fails.
        """
        pdffilepath = self.client.get_cached(cache_key)
        if pdffilepath is not None:
            return open(pdffilepath, 'r').read(), cache_key
        name = self.name()
        src_path = os.path.join(self.tmpdir, name)
        pdffilepath, cache_key = self.client.get_cached_by_source(
            src_path, OPTIONS_PDF)
        if pdffilepath is not None:
            return open(pdffilepath, 'r').read(), cache_key
        pdffilepath, cache_key, metadata = self.client.convert(
            src_path, OPTIONS_PDF)
        if metadata['error']:
            descr = metadata.get('error-descr', 'Descr. not avail.')
            raise IOError('Could not convert: %s [%s]' % (name, descr))
        pdf = open(pdffilepath, 'r').read()

        # Remove temporary dir...
        self.tmpdir = os.path.dirname(pdffilepath)
        self.cleanDir(self.tmpdir)
        return pdf, cache_key
예제 #31
0
 def create_source(self):
     # create an additional CSS file for use with the in_path HTML
     client = Client()
     self.result_path, cache_key, metadata = client.convert(
         self.doc_path, self.transform_options)
     assert self.result_path is not None