Exemplo n.º 1
0
    def process(self, path, metadata):
        ext = os.path.splitext(path)[1]
        if ext not in self.supported_extensions:
            return path, metadata
        basename = os.path.basename(path)
        src_path = os.path.join(copy_to_secure_location(path), basename)
        src_dir = os.path.dirname(src_path)
        remove_file_dir(path)

        # Remove <SDFIELD> tags if any
        cleaned_html = rename_sdfield_tags(
            open(src_path, 'rb').read().decode('utf-8'))
        with open(src_path, 'wb') as fd:
            fd.write(cleaned_html.encode('utf-8'))

        error_file = os.path.join(src_dir, 'tidy-errors')
        cmd = 'tidy -asxhtml -clean -indent -modify -utf8 -f %s %s' % (
            error_file, src_path)
        os.system(cmd)
        os.unlink(error_file)
        return src_path, metadata
Exemplo n.º 2
0
    def process(self, path, metadata):
        ext = os.path.splitext(path)[1]
        if ext not in self.supported_extensions:
            return path, metadata
        basename = os.path.basename(path)
        src_path = os.path.join(
            copy_to_secure_location(path), basename)
        src_dir = os.path.dirname(src_path)
        remove_file_dir(path)

        # Remove <SDFIELD> tags if any
        cleaned_html = rename_sdfield_tags(
            open(src_path, 'rb').read().decode('utf-8'))
        with open(src_path, 'wb') as fd:
            fd.write(cleaned_html.encode('utf-8'))

        error_file = os.path.join(src_dir, 'tidy-errors')
        cmd = 'tidy -asxhtml -clean -indent -modify -utf8 -f %s %s' % (
            error_file, src_path)
        os.system(cmd)
        os.unlink(error_file)
        return src_path, metadata
Exemplo n.º 3
0
 def test_rename_sdfield_tags_nested(self):
     html_input = '<p>Blah<sdfield>12<span>b</span></sdfield></p>'
     result = rename_sdfield_tags(html_input)
     expected = '<p>Blah<span class="sdfield">12<span>b</span></span></p>'
     assert result == expected
Exemplo n.º 4
0
 def test_rename_sdfield_tags_empty(self):
     html_input = '<p>Blah</p>'
     result = rename_sdfield_tags(html_input)
     expected = '<p>Blah</p>'
     assert result == expected
Exemplo n.º 5
0
 def test_rename_sdfield_tags_uppercase(self):
     html_input = '<P>Blah<SDFIELD TYPE="PAGE">8</SDFIELD></P>'
     result = rename_sdfield_tags(html_input)
     expected = '<P>Blah<span class="sdfield" TYPE="PAGE">8</span></P>'
     assert result == expected
Exemplo n.º 6
0
 def test_rename_sdfield_tags(self):
     html_input = '<p>Blah<sdfield type="PAGE">8</sdfield></p>'
     result = rename_sdfield_tags(html_input)
     expected = '<p>Blah<span class="sdfield" type="PAGE">8</span></p>'
     assert result == expected