def test_process_html_unzipped(self): proc = MetaProcessor(options={'oocp-out-fmt': 'html', 'meta-procord': 'unzip,oocp'}) self.resultpath, metadata = proc.process(self.input) assert os.path.isfile(self.resultpath) assert metadata['error'] is False and metadata['oocp_status'] == 0 assert open(self.resultpath, 'r').read().startswith('<!DOCTYPE') assert self.resultpath.endswith('sample.html')
def test_process_with_errors(self): proc = MetaProcessor(options={'meta-procord': 'error'}) self.resultpath, metadata = proc.process(self.input) assert self.resultpath is None assert metadata == { 'error': True, 'error-descr': 'Intentional error. Please ignore', }
def test_process_with_errors(self, workdir): proc = MetaProcessor(options={'meta-procord': 'error'}) resultpath, metadata = proc.process(str(workdir / "src" / "sample.txt")) assert resultpath is None assert metadata == { 'error': True, 'error-descr': 'Intentional error. Please ignore', }
def test_process_with_errors(self, workdir): proc = MetaProcessor(options={'meta-procord': 'error'}) resultpath, metadata = proc.process( str(workdir / "src" / "sample.txt")) assert resultpath is None assert metadata == { 'error': True, 'error-descr': 'Intentional error. Please ignore', }
def test_process_xhtml_unzipped(self, workdir): proc = MetaProcessor(options={'oocp-out-fmt': 'xhtml', 'meta-procord': 'unzip,oocp'}) resultpath, metadata = proc.process( str(workdir / "src" / "sample.txt")) assert os.path.isfile(resultpath) assert metadata['error'] is False and metadata['oocp_status'] == 0 assert open(resultpath, 'r').read().startswith('<?xml ') assert resultpath.endswith('sample.html')
def test_options_as_strings(self): proc = MetaProcessor(options={'meta.procord': 'oocp, oocp'}) result = proc.get_options_as_string() assert result == ( "css_cleaner_minified=Truehtml_cleaner_fix_heading_numbers=True" "html_cleaner_fix_image_links=Truehtml_cleaner_fix_sd_fields=True" "meta_processor_order=('unzip', 'oocp', 'tidy', 'html_cleaner', " "'css_cleaner', 'zip')oocp_hostname=localhost" "oocp_output_format=htmloocp_pdf_tagged=False" "oocp_pdf_version=Falseoocp_port=2002")
def test_process_html_unzipped(self, workdir): proc = MetaProcessor(options={ 'oocp-out-fmt': 'html', 'meta-procord': 'unzip,oocp' }) resultpath, metadata = proc.process(str(workdir / "src" / "sample.txt")) assert os.path.isfile(resultpath) assert metadata['error'] is False and metadata['oocp_status'] == 0 assert open(resultpath, 'r').read().startswith('<!DOCTYPE') assert resultpath.endswith('sample.html')
def convert_doc(src_doc, options, cache_dir): """Convert `src_doc` according to the other parameters. `src_doc` is the path to the source document. `options` is a dict of options for processing, passed to the processors. `cache_dir` may be ``None`` in which no caching is requested during processing. Generates a converted representation of `src_doc` by calling :class:`ulif.openoffice.processor.MetaProcessor` with `options` as parameters. Afterwards the conversion result is stored in cache (if allowed/possible) for speedup of upcoming requests. Returns a triple: ``(<PATH>, <CACHE_KEY>, <METADATA>)`` where ``<PATH>`` is the path to the resulting document, ``<CACHE_KEY>`` an identifier (string) to retrieve a generated doc from cache on future requests, and ``<METADATA>`` is a dict of values returned during request (and set by the document processors, notably setting the `error` keyword). If errors happen or caching is disabled, ``<CACHE_KEY>`` is ``None``. """ result_path = None cache_key = None repr_key = get_marker(options) # Create unique marker out of options metadata = dict(error=False) # Generate result input_copy_dir = tempfile.mkdtemp() input_copy = os.path.join(input_copy_dir, os.path.basename(src_doc)) shutil.copy2(src_doc, input_copy) try: proc = MetaProcessor(options=options) # Removes original doc result_path, metadata = proc.process(input_copy) except Exception as exc: shutil.rmtree(input_copy_dir) raise exc error_state = metadata.get('error', False) if cache_dir and not error_state and result_path is not None: # Cache away generated doc cache_key = CacheManager(cache_dir).register_doc( src_doc, result_path, repr_key) return result_path, cache_key, metadata
def test_options_as_strings(self): proc = MetaProcessor(options={'meta.procord': 'oocp, oocp'}) result = proc.get_options_as_string() assert result == ( "css_cleaner_minified=True" "css_cleaner_prettify_html=False" "html_cleaner_fix_heading_numbers=True" "html_cleaner_fix_image_links=True" "html_cleaner_fix_sd_fields=True" "meta_processor_order=('unzip', 'oocp', 'tidy', 'html_cleaner', " "'css_cleaner', 'zip')" "oocp_hostname=localhost" "oocp_output_format=html" "oocp_pdf_tagged=False" "oocp_pdf_version=False" "oocp_port=2002")
def test_ignored_options(self): # We ignore keys not in default dict proc = MetaProcessor(options={'meta-foo': '12'}) assert 'meta-foo' not in proc.options.keys()
def test_process_default(self, workdir): proc = MetaProcessor(options={}) resultpath, metadata = proc.process( str(workdir / "src" / "sample.txt")) assert metadata['error'] is False and metadata['oocp_status'] == 0 assert resultpath.endswith('sample.html.zip')
def test_build_pipeline_empty_elements(self): proc = MetaProcessor(options={'meta-procord': 'oocp,,,oocp'}) result = proc._build_pipeline() assert result == (OOConvProcessor, OOConvProcessor)
def test_build_pipeline_empty(self): proc = MetaProcessor(options={'meta-procord': ''}) result = proc._build_pipeline() assert result == ()
def test_build_pipeline_single(self): proc = MetaProcessor(options={'meta-procord': 'oocp'}) result = proc._build_pipeline() assert result == (OOConvProcessor,)
def test_options_invalid(self): # Make sure that invalid options lead to exceptions with pytest.raises(ArgumentParserError): MetaProcessor(options={'meta-procord': 'oop,nonsense'})
def test_avail_processors(self): # Make sure processors defined via entry points are found proc = MetaProcessor(options={'meta-procord': 'oocp, oocp'}) assert proc.avail_procs['oocp'] is OOConvProcessor assert len(proc.avail_procs.items()) > 0
def test_process_default(self): proc = MetaProcessor(options={}) self.resultpath, metadata = proc.process(self.input) assert metadata['error'] is False and metadata['oocp_status'] == 0 assert self.resultpath.endswith('sample.html.zip')
def test_build_pipeline_single(self): proc = MetaProcessor(options={'meta-procord': 'oocp'}) result = proc._build_pipeline() assert result == (OOConvProcessor, )
def test_no_options(self): # We cope with no options set proc = MetaProcessor() assert 'meta_processor_order' in proc.options.keys()
def test_process_default(self, workdir): proc = MetaProcessor(options={}) resultpath, metadata = proc.process(str(workdir / "src" / "sample.txt")) assert metadata['error'] is False and metadata['oocp_status'] == 0 assert resultpath.endswith('sample.html.zip')
def test_non_meta_options(self): # We ignore options not determined for the meta processor proc = MetaProcessor(options={'foo.bar': '12'}) assert 'bar' not in proc.options.keys()
def test_option_set(self): # We respect options set if available in the defaults dict proc = MetaProcessor(options={'meta-procord': 'oocp,oocp'}) assert proc.options['meta_processor_order'] == ('oocp', 'oocp')