def testRaisePI(): """Check propagation of handler exceptions.""" def handler(name, arg): raise KeyError(f'unknown PI: {name}') with raises(KeyError): parseHTML('<p>A processing <?jump> instruction.</p>', piHandler=handler)
def testIgnoreXMLSyntax(): """Check parsing of a PI using XML syntax (question mark at end).""" def handler(name, arg): assert name == 'jump' return arg.upper() parsed = parseHTML('<p>A processing <?jump lazy fox?> instruction.</p>', piHandler=handler) assert parsed.flattenXML() == ('<p xmlns="http://www.w3.org/1999/xhtml">' 'A processing LAZY FOX instruction.' '</p>')
def testNoArgPI(): """Check parsing of processing instruction with no arguments.""" def handler(name, arg): assert name == 'jump' assert arg == '' return xhtml.br parsed = parseHTML('<p>A processing <?jump> instruction.</p>', piHandler=handler) assert parsed.flattenXML() == ('<p xmlns="http://www.w3.org/1999/xhtml">' 'A processing <br/> instruction.' '</p>')
def testIgnoreXMLDecl(): """Check parsing of XML declaration.""" def handler(name, arg): assert False parsed = parseHTML( '<?xml version="1.0" encoding="UTF-8" ?>' '<html><body><p>XHTML document.</p></body></html>', piHandler=handler) assert parsed.flattenXML() == ( '<html xmlns="http://www.w3.org/1999/xhtml">' '<body><p>XHTML document.</p></body>' '</html>')
def testArgPI(): """Check parsing of processing instruction with an argument.""" def handler(name, arg): assert name == 'jump' return xhtml.span[arg] parsed = parseHTML( '<p>A processing <?jump a little higher> instruction.</p>', piHandler=handler) assert parsed.flattenXML() == ( '<p xmlns="http://www.w3.org/1999/xhtml">' 'A processing <span>a little higher</span> instruction.' '</p>')
def postProcess(self) -> Optional[XML]: """Returns a post-processed version of previously rendered content, or None if no rendered content is available or post-processing failed. """ # Check whether table of contents needs updating. toc = tuple( (name + '/', page.extracted) for name, page in self.childPages) # Note that ExtractedInfo doesn't define __eq__, but since it is # cached, comparing object identity is good enough. if toc != self.__toc: self.__toc = toc self.__renderedXML = None # Use cached version if available. renderedXML = self.__renderedXML if renderedXML is not None: return renderedXML # Check whether we can post-process. module = self.module if module is None: return None renderedStr = self.__renderedStr if renderedStr is None: return None # While Python-Markdown uses ElementTree internally, there is # no way to get the full output as a tree, since inline HTML # is re-inserted after the tree has been serialized. # So unfortunately we have to parse the serialized output. try: renderedXML = parseHTML(renderedStr, piHandler=self.piHandler) except Exception: logging.exception('Error post-processing content for %s', self.resource.packageName) self.errors |= DocErrors.RENDERING return None else: self.__renderedXML = renderedXML return renderedXML
def testMultiTopLevel(): """Check whether we can handle multiple top-level tags.""" parsed = parseHTML('<h1>Hello!</h1><h1>Goodbye!</h1>') assert parsed.flattenXML() == ( '<h1 xmlns="http://www.w3.org/1999/xhtml">Hello!</h1>' '<h1 xmlns="http://www.w3.org/1999/xhtml">Goodbye!</h1>')
def testBasic(): """Check whether basic functionality works.""" parsed = parseHTML('<h1>Hello!</h1>') assert parsed.flattenXML() == ( '<h1 xmlns="http://www.w3.org/1999/xhtml">Hello!</h1>')
def testIgnorePI(): """Check parsing of processing instruction with no handlers.""" parsed = parseHTML('<p>A processing <?jump> instruction.</p>') assert parsed.flattenXML() == ('<p xmlns="http://www.w3.org/1999/xhtml">' 'A processing instruction.' '</p>')
def testVoid(): """Check handling of void elements.""" parsed = parseHTML('<p>Text with<br/>a void element.</p>') assert parsed.flattenXML() == ('<p xmlns="http://www.w3.org/1999/xhtml">' 'Text with<br/>a void element.' '</p>')
def testNested(): """Check handling of nested content.""" parsed = parseHTML('<p>Text with <i>nested</i> tags.</p>') assert parsed.flattenXML() == ('<p xmlns="http://www.w3.org/1999/xhtml">' 'Text with <i>nested</i> tags.' '</p>')