def parse(xml: str) -> Document: # Normalise whitespace xml = xml.replace("\u000d\u000a", "\u000a") xml = xml.replace("\u000d", "\u000a") # Parse document document = Document(xml) document.parse() return document
def test_with_xmldeclaration(self): document = Document( "<?xml version='1.0' encoding='utf-8' standalone='yes'?> <root></root>" ) document.parse() self.assertEqual("1.0", document.version) self.assertEqual("utf-8", document.encoding) self.assertEqual(True, document.standalone) self.assertIsInstance(document.root, Element) self.assertEqual(document.root.name, "root")
def test_without_xmldeclaration(self): with self.subTest("Open tag"): document = Document("<root></root>") document.parse() self.assertIsInstance(document.root, Element) self.assertEqual(document.root.name, "root") with self.subTest("Closed tag"): document = Document("<root/>") document.parse() self.assertIsInstance(document.root, Element) self.assertEqual(document.root.name, "root")
def test_with_empty_dtd(self): document = Document(""" <?xml version='1.0' encoding='utf-8' standalone='yes'?> <!DOCTYPE root []> <root></root> """) document.parse() self.assertEqual("1.0", document.version) self.assertEqual("utf-8", document.encoding) self.assertEqual(True, document.standalone) self.assertIsInstance(document.root, Element) self.assertEqual(document.root.name, "root")
def test_with_fluff(self): document = Document(""" <?xml version='1.0' encoding='utf-8' standalone='yes'?> <!DOCTYPE root []> <?Target A rogue processing instruction?> <!-- And even a comment! --> <root></root> <!-- Another comment --> <?Target And more PIs?> <?Target?> """) document.parse() self.assertEqual("1.0", document.version) self.assertEqual("utf-8", document.encoding) self.assertEqual(True, document.standalone) self.assertIsInstance(document.root, Element) self.assertEqual(document.root.name, "root") self.assertEqual(3, len(document.processing_instructions))
def test_no_superfluous_characters(self): with self.subTest("Before xml declaration"): document = Document("some text<?xml version='1.0'?><root></root>") with self.assertRaises(XMLError): document.parse() with self.subTest("Between xml declaration and dtd"): document = Document( "<?xml version='1.0'?>some text<!DOCTYPE root []><root></root>" ) with self.assertRaises(XMLError): document.parse() with self.subTest("Between dtd and root element"): document = Document( "<?xml version='1.0'?><!DOCTYPE root []>some text<root></root>" ) with self.assertRaises(XMLError): document.parse() with self.subTest("After root element"): document = Document( "<?xml version='1.0'?><!DOCTYPE root []><root></root>some text" ) with self.assertRaises(XMLError): document.parse()
def test_only_one_root_element(self): document = Document( "<?xml version='1.0' encoding='utf-8' standalone='yes'?> <root1></root1> <root2></root2>" ) with self.assertRaises(XMLError): document.parse()