def test_argument_error(self, client_env): # wrong args lead to ArgumentErrors client = Client() # illegal output format and not existing processors options = {'oocp-out-fmt': 'foo', 'meta-procord': 'foo,bar'} with pytest.raises(ArgumentParserError): client.convert(client_env.src_doc, options=options)
def test_convert(self, client_env): client = Client() result_path, cache_key, metadata = client.convert(client_env.src_doc) assert result_path.endswith('/sample.html.zip') assert os.path.isfile(result_path) assert cache_key is None # no cache, no cache_key assert metadata == {'error': False, 'oocp_status': 0}
def test_options(self, client_env): # we can pass in options client = Client() options = {'oocp-out-fmt': 'pdf', 'meta-procord': 'oocp'} result_path, cache_key, metadata = client.convert(client_env.src_doc, options=options) assert result_path.endswith('/sample.pdf') assert metadata == {'error': False, 'oocp_status': 0}
def test_get_cached(self, client_env): # we can get an already cached doc client = Client(cache_dir=client_env.cache_dir) result_path, cache_key, metadata = client.convert(client_env.src_doc) assert cache_key == '396199333edbf40ad43e62a1c1397793_1_1' cached_path = client.get_cached(cache_key) assert filecmp.cmp(result_path, cached_path, shallow=False) assert client_env.cache_dir in cached_path
def test_options(self, client_env): # we can pass in options client = Client() options = {'oocp-out-fmt': 'pdf', 'meta-procord': 'oocp'} result_path, cache_key, metadata = client.convert( client_env.src_doc, options=options) assert result_path.endswith('/sample.pdf') assert metadata == {'error': False, 'oocp_status': 0}
def test_convert(self): client = Client() result_path, cache_key, metadata = client.convert(self.src_doc) self.resultdir = os.path.dirname(result_path) # for cleanup assert result_path[-16:] == '/sample.html.zip' assert os.path.isfile(result_path) assert cache_key is None # no cache, no cache_key assert metadata == {'error': False, 'oocp_status': 0}
def test_get_cached_by_source(self, client_env): # we can get a file when cached and by source/options client = Client(cache_dir=client_env.cache_dir) result_path, cache_key, metadata = client.convert(client_env.src_doc) assert cache_key == '396199333edbf40ad43e62a1c1397793_1_1' c_path, c_key = client.get_cached_by_source(client_env.src_doc) assert filecmp.cmp(result_path, c_path, shallow=False) assert client_env.cache_dir in c_path assert c_key == '396199333edbf40ad43e62a1c1397793_1_1'
def test_options(self): # we can pass in options client = Client() options = {'oocp-out-fmt': 'pdf', 'meta-procord': 'oocp'} result_path, cache_key, metadata = client.convert( self.src_doc, options=options) self.resultdir = os.path.dirname(result_path) assert result_path[-11:] == '/sample.pdf' assert metadata == {'error': False, 'oocp_status': 0}
def test_get_cached(self): # we can get an already cached doc client = Client(cache_dir=self.cachedir) result_path, cache_key, metadata = client.convert(self.src_doc) self.resultdir = os.path.dirname(result_path) # for cleanup assert cache_key == '164dfcf01584bd0e3595b62fb53cf12c_1_1' cached_path = client.get_cached(cache_key) assert filecmp.cmp(result_path, cached_path, shallow=False) assert self.cachedir in cached_path
def test_get_cached_by_source(self): # we can get a file when cached and by source/options client = Client(cache_dir=self.cachedir) result_path, cache_key, metadata = client.convert(self.src_doc) self.resultdir = os.path.dirname(result_path) # for cleanup assert cache_key == '164dfcf01584bd0e3595b62fb53cf12c_1_1' cached_path, cache_key = client.get_cached_by_source(self.src_doc) assert filecmp.cmp(result_path, cached_path, shallow=False) assert self.cachedir in cached_path assert cache_key == '164dfcf01584bd0e3595b62fb53cf12c_1_1'
def create_source(self): # create an additional CSS file for use with the in_path HTML client = Client() self.result_path, cache_key, metadata = client.convert( self.doc_path, self.transform_options) assert self.result_path is not None
class Document(commandtransform): """A document that can be converted via ulif.openoffice client. `name` - basename of file `data` - (binary) data of file, the file contents """ def __init__(self, name, data, cache_dir=None): commandtransform.__init__(self, name) name = self.name() self.tmpdir, self.fullname = self.initialize_tmpdir( data, filename=name) self.cache_dir = cache_dir self.client = Client(cache_dir=cache_dir) def __del__(self): """Remove the temporary directory and loop on all base destructors. This method is protected against diamond inheritance. """ if isdir(self.tmpdir): self.cleanDir(self.tmpdir) basekeys = [] for base in self.__class__.__bases__: basekey = str(base) if basekey in basekeys: continue basekeys.append(basekey) if hasattr(base, '__del__'): base.__del__(self) @classmethod def subObjects(cls, path): """Overwritten from base. Return `path` and a list of basenames of allowed files found in `path`. Allowed files are such with filename extension '.png', '.jpg', '.gif', '.css'. The ``.css`` filename extension is not allowed in the original method. """ filenames = [] for filename in os.listdir(path): result = re.match("^.+\.(?P<ext>.+)$", filename) if result is not None: ext = result.group('ext') if ext in ('png', 'jpg', 'gif', 'css'): filenames.append(filename) path = os.path.join(path, '') return path, filenames def convert(self, cache_key=None): """Convert the document to HTML. Returns the main document content as string and a cache_key for quick later retrieval. Additional documents (images, etc.) which are result of the conversion are placed in the `tmpdir` of this `Document`. If `cache_key` is given (and a `cache_dir` set before) we will lookup the cache before performing any real conversion. Raises `IOError` if conversion fails. """ name = self.name() src_path = os.path.join(self.tmpdir, name) resultpath = self.client.get_cached(cache_key) if resultpath is not None: # Lookup cached doc by cache key (fast) newdir = copy_to_secure_location(resultpath) resultpath = os.path.join(newdir, os.path.basename(resultpath)) if resultpath is None: # Lookup cached doc by source (expensive) resultpath, cache_key = self.client.get_cached_by_source( src_path, OPTIONS_HTML) if resultpath is not None: newdir = copy_to_secure_location(resultpath) resultpath = os.path.join(newdir, os.path.basename(resultpath)) if resultpath is None: # Convert to HTML, new doc will be in resultpath resultpath, cache_key, metadata = self.client.convert( src_path, OPTIONS_HTML) if metadata['error']: descr = metadata.get('error-descr', 'Descr. not avail.') raise IOError('Could not convert: %s [%s]' % (name, descr)) newdir = os.path.dirname(resultpath) html = open(resultpath, 'r').read() self.cleanDir(self.tmpdir) self.tmpdir = newdir return html, cache_key def convertToPDF(self, cache_key=None): """Convert the document to PDF. Returns the generated document contents as string and a cache key. The cache_key might be None if no cache_dir was set before. If `cache_key` is given (and a `cache_dir` set before) we will lookup the cache before performing any real conversion. Raises `IOError` if conversion fails. """ pdffilepath = self.client.get_cached(cache_key) if pdffilepath is not None: return open(pdffilepath, 'r').read(), cache_key name = self.name() src_path = os.path.join(self.tmpdir, name) pdffilepath, cache_key = self.client.get_cached_by_source( src_path, OPTIONS_PDF) if pdffilepath is not None: return open(pdffilepath, 'r').read(), cache_key pdffilepath, cache_key, metadata = self.client.convert( src_path, OPTIONS_PDF) if metadata['error']: descr = metadata.get('error-descr', 'Descr. not avail.') raise IOError('Could not convert: %s [%s]' % (name, descr)) pdf = open(pdffilepath, 'r').read() # Remove temporary dir... self.tmpdir = os.path.dirname(pdffilepath) self.cleanDir(self.tmpdir) return pdf, cache_key