def xhtmlify(html, *args, **kwargs): """Call the real xhtmlify and check it outputs well-formed XML and that it is idempotent (makes no changes when fed its output).""" _wrap = None if "_wrap" in kwargs: _wrap = kwargs.pop("_wrap") unicode_input = isinstance(html, six.text_type) xhtml = _xhtmlify(html, *args, **kwargs) assert isinstance(xhtml, six.text_type) == unicode_input regex_type = six.u if unicode_input else six.b stripped_xhtml = None try: # ET can't handle <!...> stripped_xhtml = re.sub(regex_type(r"(?s)<!(?!\[).*?>"), "", xhtml) xmlparse(stripped_xhtml, wrap=_wrap) except Exception as e: assert False, (stripped_xhtml, str(e)) assert xhtml == _xhtmlify(xhtml, *args, **kwargs), xhtml return xhtml
def test_complex_doctype(): s = r"""<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" [ <!ATTLIST form autocomplete CDATA #IMPLIED> ]> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <head><title /></head><body/></html>""" assert _xhtmlify(s) == s