def test_remove_file_dir_dir(self, tmpdir): # We remove a directory if given as argument, of course. tmpdir.join("sample_dir").mkdir() tmpdir.join("sample_dir").join("sample.txt").write("Hi!") remove_file_dir(str(tmpdir / "sample_dir")) # different to above assert tmpdir.exists() is True assert tmpdir.join("sample_dir").exists() is False
def test_remove_file_dir_file(self, tmpdir): # When we remove a file, also the containung dir is removed tmpdir.join("sample_dir").mkdir() tmpdir.join("sample_dir").join("sample.txt").write("Hi!") remove_file_dir(str(tmpdir / "sample_dir" / "sample.txt")) assert tmpdir.exists() is True assert tmpdir.join("sample_dir").exists() is False
def _handle_error(self, proc, input, output, metadata): metadata['error-descr'] = metadata.get( 'error-descr', 'problem while processing %s' % proc.prefix) remove_file_dir(input) remove_file_dir(output) return metadata
def test_remove_file_dir_dir(self): sample_path = os.path.join(self.workdir, 'sampledir') sample_file = os.path.join(sample_path, 'sample.txt') os.mkdir(sample_path) open(sample_file, 'wb').write('Hi!') remove_file_dir(sample_path) assert os.path.exists(self.workdir) is True assert os.path.exists(sample_path) is False
def test_remove_file_dir_file(self): # When we remove a file, also the containung dir is removed sample_path = os.path.join(self.workdir, 'sampledir') sample_file = os.path.join(sample_path, 'sample.txt') os.mkdir(sample_path) open(sample_file, 'wb').write('Hi!') remove_file_dir(sample_file) assert os.path.exists(self.workdir) is True assert os.path.exists(sample_path) is False
def process(self, input=None, metadata={'error': False}): """Run all processors defined in options. If all processors run successful, the output of the last along with (maybe modified) metadata is returned. Each processor is fed with the `metadata` dict and an `input` (normally a filepath). Feeding a processor means to call its `process` method. If a processor sets the ``error`` entry of `metadata` to ``True`` this indicates some problem and the whole process is aborted returning ``None`` as output and the `metadata`, maybe containing some smart hints about the reasons. If all processors work correctly, the output of the last processor is returned along with the last `metadata`. The set and order of processors called depends on the ``procord`` option passed in. If this option is set to some value like ``oocp,oocp`` then the ``oocp`` processor (which is the :class:`OOConvProcessor`, registered under ``oocp`` in `setup.py`) is called two times. .. note:: after each processing, the (then old) input is removed. """ metadata = metadata.copy() pipeline = self._build_pipeline() output = None for processor in pipeline: proc_instance = processor(self.all_options) output, metadata = proc_instance.process(input, metadata) if metadata['error'] is True: metadata = self._handle_error(processor, input, output, metadata) return None, metadata if input != output: remove_file_dir(input) input = output return input, metadata
def process(self, input=None, metadata={'error': False}): """Run all processors defined in options. If all processors run successful, the output of the last along with (maybe modified) metadata is returned. Each processor is fed with the `metadata` dict and an `input` (normally a filepath). Feeding a processor means to call its `process` method. If a processor sets the ``error`` entry of `metadata` to ``True`` this indicates some problem and the whole process is aborted returning ``None`` as output and the `metadata`, maybe containing some smart hints about the reasons. If all processors work correctly, the output of the last processor is returned along with the last `metadata`. The set and order of processors called depends on the ``procord`` option passed in. If this option is set to some value like ``oocp,oocp`` then the ``oocp`` processor (which is the :class:`OOConvProcessor`, registered under ``oocp`` in `setup.py`) is called two times. .. note:: after each processing, the (then old) input is removed. """ metadata = metadata.copy() pipeline = self._build_pipeline() output = None for processor in pipeline: proc_instance = processor(self.all_options) output, metadata = proc_instance.process(input, metadata) if metadata['error'] is True: metadata = self._handle_error( processor, input, output, metadata) return None, metadata if input != output: remove_file_dir(input) input = output return input, metadata
def process(self, path, metadata): ext = os.path.splitext(path)[1] if ext not in self.supported_extensions: return path, metadata basename = os.path.basename(path) src_path = os.path.join(copy_to_secure_location(path), basename) src_dir = os.path.dirname(src_path) remove_file_dir(path) new_html, img_name_map = cleanup_html( codecs.open(src_path, 'r', 'utf-8').read(), basename, fix_head_nums=self.options['html_cleaner_fix_heading_numbers'], fix_img_links=self.options['html_cleaner_fix_image_links'], fix_sdfields=self.options['html_cleaner_fix_sd_fields'], ) with codecs.open(src_path, 'wb', 'utf-8') as fd: fd.write(new_html) # Rename images self.rename_img_files(src_dir, img_name_map) return src_path, metadata
def process(self, path, metadata): ext = os.path.splitext(path)[1] if ext not in self.supported_extensions: return path, metadata basename = os.path.basename(path) src_path = os.path.join( copy_to_secure_location(path), basename) src_dir = os.path.dirname(src_path) remove_file_dir(path) new_html, img_name_map = cleanup_html( codecs.open(src_path, 'r', 'utf-8').read(), basename, fix_head_nums=self.options['html_cleaner_fix_heading_numbers'], fix_img_links=self.options['html_cleaner_fix_image_links'], fix_sdfields=self.options['html_cleaner_fix_sd_fields'], ) with codecs.open(src_path, 'wb', 'utf-8') as fd: fd.write(new_html) # Rename images self.rename_img_files(src_dir, img_name_map) return src_path, metadata
def process(self, path, metadata): ext = os.path.splitext(path)[1] if ext not in self.supported_extensions: return path, metadata basename = os.path.basename(path) src_path = os.path.join(copy_to_secure_location(path), basename) src_dir = os.path.dirname(src_path) remove_file_dir(path) # Remove <SDFIELD> tags if any cleaned_html = rename_sdfield_tags( open(src_path, 'rb').read().decode('utf-8')) with open(src_path, 'wb') as fd: fd.write(cleaned_html.encode('utf-8')) error_file = os.path.join(src_dir, 'tidy-errors') cmd = 'tidy -asxhtml -clean -indent -modify -utf8 -f %s %s' % ( error_file, src_path) os.system(cmd) os.unlink(error_file) return src_path, metadata
def process(self, path, metadata): ext = os.path.splitext(path)[1] if ext not in self.supported_extensions: return path, metadata basename = os.path.basename(path) src_path = os.path.join( copy_to_secure_location(path), basename) src_dir = os.path.dirname(src_path) remove_file_dir(path) # Remove <SDFIELD> tags if any cleaned_html = rename_sdfield_tags( open(src_path, 'rb').read().decode('utf-8')) with open(src_path, 'wb') as fd: fd.write(cleaned_html.encode('utf-8')) error_file = os.path.join(src_dir, 'tidy-errors') cmd = 'tidy -asxhtml -clean -indent -modify -utf8 -f %s %s' % ( error_file, src_path) os.system(cmd) os.unlink(error_file) return src_path, metadata
def process(self, path, metadata): ext = os.path.splitext(path)[1] if ext not in self.supported_extensions: return path, metadata basename = os.path.basename(path) src_path = os.path.join( copy_to_secure_location(path), basename) remove_file_dir(path) new_html, css = extract_css( open(src_path, 'rb').read().decode('utf-8'), basename, prettify_html=self.options['css_cleaner_prettify_html']) css, errors = cleanup_css( css, minified=self.options['css_cleaner_minified']) css_file = os.path.splitext(src_path)[0] + '.css' if css is not None: with open(css_file, 'wb') as fd: fd.write(css.encode('utf-8')) with open(src_path, 'wb') as fd: fd.write(new_html.encode('utf-8')) return src_path, metadata
def process(self, path, metadata): ext = os.path.splitext(path)[1] if ext not in self.supported_extensions: return path, metadata basename = os.path.basename(path) src_path = os.path.join(copy_to_secure_location(path), basename) remove_file_dir(path) new_html, css = extract_css( open(src_path, 'rb').read().decode('utf-8'), basename, prettify_html=self.options['css_cleaner_prettify_html']) css, errors = cleanup_css( css, minified=self.options['css_cleaner_minified']) css_file = os.path.splitext(src_path)[0] + '.css' if css is not None: with open(css_file, 'wb') as fd: fd.write(css.encode('utf-8')) with open(src_path, 'wb') as fd: fd.write(new_html.encode('utf-8')) return src_path, metadata
def process(self, path, metadata): """Do PSJ-specific adaptions of generated HTML input. `path` gives any (beforehand) generated HTML document. The path might be located in a directory with additional files (images, etc.) that could also be processed. `metadata` is a dictionary of metadata concerning the conversion process. It contains at least a key ``error`` with a boolean value (should alway be `False`, otherwise the document conversion failed), and a key ``error-descr`` which contains some error message in case of failures. The ``error`` and ``error-descr`` should be set when unresolvable processing problems occur. Returns a tuple (``result_path``, ``metadata``) with ``result_path`` containing the path to the modified document and ``metadata`` containing the updated ``metadata`` directory passed in. """ ext = os.path.splitext(path)[1] if ext not in self.supported_extensions: return path, metadata basename = os.path.basename(path) src_path = os.path.join( copy_to_secure_location(path), basename) remove_file_dir(path) html = self.fix_html(open(src_path, 'r').read()) open(src_path, 'w').write(html.encode('utf-8')) css = self.get_css(os.path.dirname(src_path)) css = self.fix_css(css) open(os.path.join( os.path.dirname(src_path), 'psj.css'), 'w').write(css) return src_path, metadata
def test_remove_file_dir_none(self): # we do not complain about files that do not exist assert remove_file_dir(None) is None
def test_remove_file_dir_not_existing(self): # we do not complain about not existing file paths assert remove_file_dir('not-existing-path') is None
def test_remove_file_dir_not_existiing(self): assert remove_file_dir('not-existing-path') is None
def test_remove_file_dir_non_path(self): assert remove_file_dir(object()) is None
def test_remove_file_dir_none(self): assert remove_file_dir(None) is None
def test_remove_file_dir_non_path(self): # we do not complain about objects that are not file paths assert remove_file_dir(object()) is None
def tearDown(self): remove_file_dir(self.workdir) remove_file_dir(self.resultpath)