示例#1
0
    def test_invalid_decl_1(self):
        docinfo = etree.Element('test').getroottree().docinfo

        def set_public_id(value):
            docinfo.public_id = value
        self.assertRaises(ValueError, set_public_id, _str('ä'))
        self.assertRaises(ValueError, set_public_id, _str('qwerty ä asdf'))
示例#2
0
    def test_parse_encoding_8bit_explicit(self):
        text = _str('Søk på nettet')
        html_latin1 = (_str('<p>%s</p>') % text).encode('iso-8859-1')

        tree = self.etree.parse(BytesIO(html_latin1),
                                self.etree.HTMLParser(encoding="iso-8859-1"))
        p = tree.find("//p")
        self.assertEquals(p.text, text)
示例#3
0
文件: test_dtd.py 项目: zym1010/lxml
    def test_invalid_decl_1(self):
        docinfo = etree.Element('test').getroottree().docinfo

        def set_public_id(value):
            docinfo.public_id = value

        self.assertRaises(ValueError, set_public_id, _str('ä'))
        self.assertRaises(ValueError, set_public_id, _str('qwerty ä asdf'))
示例#4
0
    def test_parse_encoding_8bit_explicit(self):
        text = _str('Søk på nettet')
        html_latin1 = (_str('<p>%s</p>') % text).encode('iso-8859-1')

        tree = self.etree.parse(
            BytesIO(html_latin1),
            self.etree.HTMLParser(encoding="iso-8859-1"))
        p = tree.find("//p")
        self.assertEqual(p.text, text)
示例#5
0
    def test_uniname(self):
        Element = etree.Element
        def el(name):
            return Element(name)

        self.assertRaises(ValueError, el, ':')
        self.assertRaises(ValueError, el, '0a')
        self.assertRaises(ValueError, el, _str('\u203f'))
        # should not Raise
        el(_str('\u0132'))
示例#6
0
 def test_etree_parse_io_error(self):
     # this is a directory name that contains characters beyond latin-1
     dirnameEN = _str('Directory')
     dirnameRU = _str('Каталог')
     filename = _str('nosuchfile.xml')
     dn = tempfile.mkdtemp(prefix=dirnameEN)
     try:
         self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename))
     finally:
         os.rmdir(dn)
     dn = tempfile.mkdtemp(prefix=dirnameRU)
     try:
         self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename))
     finally:
         os.rmdir(dn)
示例#7
0
 def test_etree_parse_io_error(self):
     # this is a directory name that contains characters beyond latin-1
     dirnameEN = _str('Directory')
     dirnameRU = _str('Каталог')
     filename = _str('nosuchfile.xml')
     dn = tempfile.mkdtemp(prefix=dirnameEN)
     try:
         self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename))
     finally:
         os.rmdir(dn)
     dn = tempfile.mkdtemp(prefix=dirnameRU)
     try:
         self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename))
     finally:
         os.rmdir(dn)
示例#8
0
    def test_parse_encoding_8bit_override(self):
        text = _str('Søk på nettet')
        wrong_head = _str('''
        <head>
          <meta http-equiv="Content-Type"
                content="text/html; charset=UTF-8" />
        </head>''')
        html_latin1 = (_str('<html>%s<body><p>%s</p></body></html>') %
                       (wrong_head, text)).encode('iso-8859-1')

        self.assertRaises(self.etree.ParseError, self.etree.parse,
                          BytesIO(html_latin1))

        tree = self.etree.parse(BytesIO(html_latin1),
                                self.etree.HTMLParser(encoding="iso-8859-1"))
        p = tree.find("//p")
        self.assertEquals(p.text, text)
示例#9
0
    def test_unicode_text(self):
        e = etree.Element('e')

        def settext(text):
            e.text = text

        self.assertRaises(ValueError, settext, _str('ab\ufffe'))
        self.assertRaises(ValueError, settext, _str('ö\ffff'))
        self.assertRaises(ValueError, settext, _str('\u0123\ud800'))
        self.assertRaises(ValueError, settext, _str('x\ud8ff'))
        self.assertRaises(ValueError, settext, _str('\U00010000\udfff'))
        self.assertRaises(ValueError, settext, _str('abd\x00def'))
        # should not Raise
        settext(_str('\ud7ff\ue000\U00010000\U0010FFFFäöas'))

        for char_val in range(0xD800, 0xDFFF+1):
            self.assertRaises(ValueError, settext, 'abc' + _chr(char_val))
            self.assertRaises(ValueError, settext, _chr(char_val))
            self.assertRaises(ValueError, settext, _chr(char_val) + 'abc')

        self.assertRaises(ValueError, settext, _bytes('\xe4'))
        self.assertRaises(ValueError, settext, _bytes('\x80'))
        self.assertRaises(ValueError, settext, _bytes('\xff'))
        self.assertRaises(ValueError, settext, _bytes('\x08'))
        self.assertRaises(ValueError, settext, _bytes('\x19'))
        self.assertRaises(ValueError, settext, _bytes('\x20\x00'))
        # should not Raise
        settext(_bytes('\x09\x0A\x0D\x20\x60\x7f'))
示例#10
0
    def test_unicode_text(self):
        e = etree.Element('e')

        def settext(text):
            e.text = text

        self.assertRaises(ValueError, settext, _str('ab\ufffe'))
        self.assertRaises(ValueError, settext, _str('ö\ffff'))
        self.assertRaises(ValueError, settext, _str('\u0123\ud800'))
        self.assertRaises(ValueError, settext, _str('x\ud8ff'))
        self.assertRaises(ValueError, settext, _str('\U00010000\udfff'))
        self.assertRaises(ValueError, settext, _str('abd\x00def'))
        # should not Raise
        settext(_str('\ud7ff\ue000\U00010000\U0010FFFFäöas'))

        for char_val in range(0xD800, 0xDFFF + 1):
            self.assertRaises(ValueError, settext, 'abc' + _chr(char_val))
            self.assertRaises(ValueError, settext, _chr(char_val))
            self.assertRaises(ValueError, settext, _chr(char_val) + 'abc')

        self.assertRaises(ValueError, settext, _bytes('\xe4'))
        self.assertRaises(ValueError, settext, _bytes('\x80'))
        self.assertRaises(ValueError, settext, _bytes('\xff'))
        self.assertRaises(ValueError, settext, _bytes('\x08'))
        self.assertRaises(ValueError, settext, _bytes('\x19'))
        self.assertRaises(ValueError, settext, _bytes('\x20\x00'))
        # should not Raise
        settext(_bytes('\x09\x0A\x0D\x20\x60\x7f'))
示例#11
0
    def test_parse_encoding_8bit_override(self):
        text = _str('Søk på nettet')
        wrong_head = _str('''
        <head>
          <meta http-equiv="Content-Type"
                content="text/html; charset=UTF-8" />
        </head>''')
        html_latin1 = (_str('<html>%s<body><p>%s</p></body></html>') % (wrong_head,
                                                                        text)
                      ).encode('iso-8859-1')

        self.assertRaises(self.etree.ParseError,
                          self.etree.parse,
                          BytesIO(html_latin1))

        tree = self.etree.parse(
            BytesIO(html_latin1),
            self.etree.HTMLParser(encoding="iso-8859-1"))
        p = tree.find("//p")
        self.assertEqual(p.text, text)
示例#12
0
文件: test_io.py 项目: saschpe/lxml
 def test_parse_utf8_bom(self):
     utext = _str('Søk på nettet')
     uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext
     bom = _bytes('\\xEF\\xBB\\xBF').decode(
         "unicode_escape").encode("latin1")
     self.assertEqual(3, len(bom))
     f = tempfile.NamedTemporaryFile()
     try:
         f.write(bom)
         f.write(uxml.encode("utf-8"))
         f.flush()
         tree = self.etree.parse(f.name)
     finally:
         f.close()
     self.assertEqual(utext, tree.getroot().text)
示例#13
0
文件: test_io.py 项目: saschpe/lxml
 def test_iterparse_utf8_bom(self):
     utext = _str('Søk på nettet')
     uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext
     bom = _bytes('\\xEF\\xBB\\xBF').decode(
         "unicode_escape").encode("latin1")
     self.assertEqual(3, len(bom))
     f = tempfile.NamedTemporaryFile()
     try:
         f.write(bom)
         f.write(uxml.encode("utf-8"))
         f.flush()
         elements = [el for _, el in self.etree.iterparse(f.name)]
         self.assertEqual(1, len(elements))
         root = elements[0]
     finally:
         f.close()
     self.assertEqual(utext, root.text)
示例#14
0
 def test_parse_utf8_bom(self):
     utext = _str('Søk på nettet')
     uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext
     bom = _bytes('\\xEF\\xBB\\xBF').decode(
         "unicode_escape").encode("latin1")
     self.assertEqual(3, len(bom))
     f = tempfile.NamedTemporaryFile(delete=False)
     try:
         try:
             f.write(bom)
             f.write(uxml.encode("utf-8"))
         finally:
             f.close()
         tree = self.etree.parse(f.name)
     finally:
         os.unlink(f.name)
     self.assertEqual(utext, tree.getroot().text)
示例#15
0
 def test_iterparse_utf8_bom(self):
     utext = _str('Søk på nettet')
     uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext
     bom = _bytes('\\xEF\\xBB\\xBF').decode("unicode_escape").encode(
         "latin1")
     self.assertEqual(3, len(bom))
     f = NamedTemporaryFile(delete=False)
     try:
         try:
             f.write(bom)
             f.write(uxml.encode("utf-8"))
         finally:
             f.close()
         elements = [el for _, el in self.etree.iterparse(f.name)]
         self.assertEqual(1, len(elements))
         root = elements[0]
     finally:
         os.unlink(f.name)
     self.assertEqual(utext, root.text)
示例#16
0
文件: test_io.py 项目: saschpe/lxml
    def test_iterparse_utf16_bom(self):
        utext = _str('Søk på nettet')
        uxml = '<?xml version="1.0" encoding="UTF-16"?><p>%s</p>' % utext
        boms = _bytes('\\xFE\\xFF \\xFF\\xFE').decode(
            "unicode_escape").encode("latin1")
        self.assertEqual(5, len(boms))
        xml = uxml.encode("utf-16")
        self.assertTrue(xml[:2] in boms, repr(xml[:2]))

        f = tempfile.NamedTemporaryFile()
        try:
            f.write(xml)
            f.flush()
            elements = [el for _, el in self.etree.iterparse(f.name)]
            self.assertEqual(1, len(elements))
            root = elements[0]
        finally:
            f.close()
        self.assertEqual(utext, root.text)
示例#17
0
    def test_iterparse_utf16_bom(self):
        utext = _str('Søk på nettet')
        uxml = '<?xml version="1.0" encoding="UTF-16"?><p>%s</p>' % utext
        boms = _bytes('\\xFE\\xFF \\xFF\\xFE').decode(
            "unicode_escape").encode("latin1")
        self.assertEqual(5, len(boms))
        xml = uxml.encode("utf-16")
        self.assertTrue(xml[:2] in boms, repr(xml[:2]))

        f = tempfile.NamedTemporaryFile(delete=False)
        try:
            try:
                f.write(xml)
            finally:
                f.close()
            elements = [el for _, el in self.etree.iterparse(f.name)]
            self.assertEqual(1, len(elements))
            root = elements[0]
        finally:
            os.unlink(f.name)
        self.assertEqual(utext, root.text)
示例#18
0
    def test_write_compressed_text(self):
        Element = self.etree.Element
        SubElement = self.etree.SubElement
        ElementTree = self.etree.ElementTree
        text = _str("qwrtioüöä")

        root = Element('root')
        root.text = text
        child = SubElement(root, 'sub')
        child.text = 'TEXT'
        child.tail = 'TAIL'
        SubElement(root, 'sub').text = text

        tree = ElementTree(root)
        out = BytesIO()
        tree.write(out, method='text', encoding='utf8', compression=9)
        out.seek(0)

        f = gzip.GzipFile(fileobj=out)
        try:
            result = f.read().decode('utf8')
        finally:
            f.close()
        self.assertEqual(text+'TEXTTAIL'+text, result)
示例#19
0
    def test_tree_io_latin1(self):
        Element = self.etree.Element
        ElementTree = self.etree.ElementTree

        element = Element('top')
        element.text = _str("qwrtioüöäßá")
        tree = ElementTree(element)
        self.buildNodes(element, 10, 3)
        f = open(self.getTestFilePath('testdump.xml'), 'wb')
        tree.write(f, encoding='iso-8859-1')
        f.close()
        f = open(self.getTestFilePath('testdump.xml'), 'rb')
        tree = ElementTree(file=f)
        f.close()
        f = open(self.getTestFilePath('testdump2.xml'), 'wb')
        tree.write(f, encoding='iso-8859-1')
        f.close()
        f = open(self.getTestFilePath('testdump.xml'), 'rb')
        data1 = f.read()
        f.close()
        f = open(self.getTestFilePath('testdump2.xml'), 'rb')
        data2 = f.read()
        f.close()
        self.assertEqual(data1, data2)
示例#20
0
    def test_write_compressed_text(self):
        Element = self.etree.Element
        SubElement = self.etree.SubElement
        ElementTree = self.etree.ElementTree
        text = _str("qwrtioüöä")

        root = Element('root')
        root.text = text
        child = SubElement(root, 'sub')
        child.text = 'TEXT'
        child.tail = 'TAIL'
        SubElement(root, 'sub').text = text

        tree = ElementTree(root)
        out = BytesIO()
        tree.write(out, method='text', encoding='utf8', compression=9)
        out.seek(0)

        f = gzip.GzipFile(fileobj=out)
        try:
            result = f.read().decode('utf8')
        finally:
            f.close()
        self.assertEqual(text + 'TEXTTAIL' + text, result)
示例#21
0
    def test_tree_io_latin1(self):
        Element = self.etree.Element
        ElementTree = self.etree.ElementTree

        element = Element('top')
        element.text = _str("qwrtioüöäßá")
        tree = ElementTree(element)
        self.buildNodes(element, 10, 3)
        f = open(self.getTestFilePath('testdump.xml'), 'wb')
        tree.write(f, encoding='iso-8859-1')
        f.close()
        f = open(self.getTestFilePath('testdump.xml'), 'rb')
        tree = ElementTree(file=f)
        f.close()
        f = open(self.getTestFilePath('testdump2.xml'), 'wb')
        tree.write(f, encoding='iso-8859-1')
        f.close()
        f = open(self.getTestFilePath('testdump.xml'), 'rb')
        data1 = f.read()
        f.close()
        f = open(self.getTestFilePath('testdump2.xml'), 'rb')
        data2 = f.read()
        f.close()
        self.assertEquals(data1, data2)
示例#22
0
 def test_unicode_nstag_invalid(self):
     # sadly, Klingon is not well-formed
     tag = _str("{http://abc/}%s") % invalid_tag
     self.assertRaises(ValueError, etree.Element, tag)
示例#23
0
 def test_unicode_ns_invalid(self):
     # namespace URIs must conform to RFC 3986
     tag = _str("{http://%s/}abc") % uni
     self.assertRaises(ValueError, etree.Element, tag)
示例#24
0
 def test_unicode_nstag(self):
     tag = _str("{http://abc/}%s") % uni
     el = etree.Element(tag)
     self.assertEquals(tag, el.tag)
示例#25
0
 def test_unicode_repr4(self):
     x = etree.Entity(_str('ä'))
     repr(x)
示例#26
0
 def test_unicode_nstag(self):
     tag = _str("{%s}%s") % (uni, uni)
     el = etree.Element(tag)
     self.assertEquals(tag, el.tag)
示例#27
0
class HtmlParserTestCase(HelperTestCase):
    """HTML parser test cases
    """
    etree = etree

    html_str = _bytes(
        "<html><head><title>test</title></head><body><h1>page title</h1></body></html>"
    )
    html_str_pretty = _bytes("""\
<html>
<head><title>test</title></head>
<body><h1>page title</h1></body>
</html>
""")
    broken_html_str = _bytes(
        "<html><head><title>test<body><h1>page title</h3></p></html>")
    uhtml_str = _str(
        "<html><head><title>test á\uF8D2</title></head><body><h1>page á\uF8D2 title</h1></body></html>"
    )

    def tearDown(self):
        super(HtmlParserTestCase, self).tearDown()
        self.etree.set_default_parser()

    def test_module_HTML(self):
        element = self.etree.HTML(self.html_str)
        self.assertEqual(self.etree.tostring(element, method="html"),
                         self.html_str)

    def test_module_HTML_unicode(self):
        element = self.etree.HTML(self.uhtml_str)
        self.assertEqual(
            unicode(
                self.etree.tostring(element, method="html", encoding='UTF8'),
                'UTF8'), unicode(self.uhtml_str.encode('UTF8'), 'UTF8'))

    def test_module_HTML_pretty_print(self):
        element = self.etree.HTML(self.html_str)
        self.assertEqual(
            self.etree.tostring(element, method="html", pretty_print=True),
            self.html_str_pretty)

    def test_module_parse_html_error(self):
        parser = self.etree.HTMLParser(recover=False)
        parse = self.etree.parse
        f = BytesIO("<html></body>")
        self.assertRaises(self.etree.XMLSyntaxError, parse, f, parser)

    def test_html_element_name_empty(self):
        parser = self.etree.HTMLParser()
        Element = parser.makeelement

        el = Element('name')
        self.assertRaises(ValueError, Element, '{}')
        self.assertRaises(ValueError, setattr, el, 'tag', '{}')

        self.assertRaises(ValueError, Element, '{test}')
        self.assertRaises(ValueError, setattr, el, 'tag', '{test}')

    def test_html_element_name_colon(self):
        parser = self.etree.HTMLParser()
        Element = parser.makeelement

        pname = Element('p:name')
        self.assertEquals(pname.tag, 'p:name')

        pname = Element('{test}p:name')
        self.assertEquals(pname.tag, '{test}p:name')

        pname = Element('name')
        pname.tag = 'p:name'
        self.assertEquals(pname.tag, 'p:name')

    def test_html_element_name_quote(self):
        parser = self.etree.HTMLParser()
        Element = parser.makeelement

        self.assertRaises(ValueError, Element, 'p"name')
        self.assertRaises(ValueError, Element, "na'me")
        self.assertRaises(ValueError, Element, '{test}"name')
        self.assertRaises(ValueError, Element, "{test}name'")

        el = Element('name')
        self.assertRaises(ValueError, setattr, el, 'tag', "pname'")
        self.assertRaises(ValueError, setattr, el, 'tag', '"pname')
        self.assertEquals(el.tag, "name")

    def test_html_element_name_space(self):
        parser = self.etree.HTMLParser()
        Element = parser.makeelement

        self.assertRaises(ValueError, Element, ' name ')
        self.assertRaises(ValueError, Element, 'na me')
        self.assertRaises(ValueError, Element, '{test} name')

        el = Element('name')
        self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
        self.assertEquals(el.tag, "name")

    def test_html_subelement_name_empty(self):
        parser = self.etree.HTMLParser()
        Element = parser.makeelement

        SubElement = self.etree.SubElement

        el = Element('name')
        self.assertRaises(ValueError, SubElement, el, '{}')
        self.assertRaises(ValueError, SubElement, el, '{test}')

    def test_html_subelement_name_colon(self):
        parser = self.etree.HTMLParser()
        Element = parser.makeelement
        SubElement = self.etree.SubElement

        el = Element('name')
        pname = SubElement(el, 'p:name')
        self.assertEquals(pname.tag, 'p:name')

        pname = SubElement(el, '{test}p:name')
        self.assertEquals(pname.tag, '{test}p:name')

    def test_html_subelement_name_quote(self):
        parser = self.etree.HTMLParser()
        Element = parser.makeelement
        SubElement = self.etree.SubElement

        el = Element('name')
        self.assertRaises(ValueError, SubElement, el, "name'")
        self.assertRaises(ValueError, SubElement, el, 'na"me')
        self.assertRaises(ValueError, SubElement, el, "{test}na'me")
        self.assertRaises(ValueError, SubElement, el, '{test}"name')

    def test_html_subelement_name_space(self):
        parser = self.etree.HTMLParser()
        Element = parser.makeelement
        SubElement = self.etree.SubElement

        el = Element('name')
        self.assertRaises(ValueError, SubElement, el, ' name ')
        self.assertRaises(ValueError, SubElement, el, 'na me')
        self.assertRaises(ValueError, SubElement, el, '{test} name')

    def test_module_parse_html_norecover(self):
        parser = self.etree.HTMLParser(recover=False)
        parse = self.etree.parse
        f = BytesIO(self.broken_html_str)
        self.assertRaises(self.etree.XMLSyntaxError, parse, f, parser)

    def test_parse_encoding_8bit_explicit(self):
        text = _str('Søk på nettet')
        html_latin1 = (_str('<p>%s</p>') % text).encode('iso-8859-1')

        tree = self.etree.parse(BytesIO(html_latin1),
                                self.etree.HTMLParser(encoding="iso-8859-1"))
        p = tree.find("//p")
        self.assertEquals(p.text, text)

    def test_parse_encoding_8bit_override(self):
        text = _str('Søk på nettet')
        wrong_head = _str('''
        <head>
          <meta http-equiv="Content-Type"
                content="text/html; charset=UTF-8" />
        </head>''')
        html_latin1 = (_str('<html>%s<body><p>%s</p></body></html>') %
                       (wrong_head, text)).encode('iso-8859-1')

        self.assertRaises(self.etree.ParseError, self.etree.parse,
                          BytesIO(html_latin1))

        tree = self.etree.parse(BytesIO(html_latin1),
                                self.etree.HTMLParser(encoding="iso-8859-1"))
        p = tree.find("//p")
        self.assertEquals(p.text, text)

    def test_module_HTML_broken(self):
        element = self.etree.HTML(self.broken_html_str)
        self.assertEqual(self.etree.tostring(element, method="html"),
                         self.html_str)

    def test_module_HTML_cdata(self):
        # by default, libxml2 generates CDATA nodes for <script> content
        html = _bytes('<html><head><style>foo</style></head></html>')
        element = self.etree.HTML(html)
        self.assertEquals(element[0][0].text, "foo")

    def test_module_HTML_access(self):
        element = self.etree.HTML(self.html_str)
        self.assertEqual(element[0][0].tag, 'title')

    def test_module_parse_html(self):
        parser = self.etree.HTMLParser()
        filename = tempfile.mktemp(suffix=".html")
        write_to_file(filename, self.html_str, 'wb')
        try:
            f = open(filename, 'rb')
            tree = self.etree.parse(f, parser)
            f.close()
            self.assertEqual(
                self.etree.tostring(tree.getroot(), method="html"),
                self.html_str)
        finally:
            os.remove(filename)

    def test_module_parse_html_filelike(self):
        parser = self.etree.HTMLParser()
        f = SillyFileLike(self.html_str)
        tree = self.etree.parse(f, parser)
        html = self.etree.tostring(tree.getroot(),
                                   method="html",
                                   encoding='UTF-8')
        self.assertEqual(html, self.html_str)


##     def test_module_parse_html_filelike_unicode(self):
##         parser = self.etree.HTMLParser()
##         f = SillyFileLike(self.uhtml_str)
##         tree = self.etree.parse(f, parser)
##         html = self.etree.tostring(tree.getroot(), encoding='UTF-8')
##         self.assertEqual(unicode(html, 'UTF-8'), self.uhtml_str)

    def test_html_file_error(self):
        parser = self.etree.HTMLParser()
        parse = self.etree.parse
        self.assertRaises(IOError, parse,
                          "__some_hopefully_nonexisting_file__.html", parser)

    def test_default_parser_HTML_broken(self):
        self.assertRaises(self.etree.XMLSyntaxError, self.etree.parse,
                          BytesIO(self.broken_html_str))

        self.etree.set_default_parser(self.etree.HTMLParser())

        tree = self.etree.parse(BytesIO(self.broken_html_str))
        self.assertEqual(self.etree.tostring(tree.getroot(), method="html"),
                         self.html_str)

        self.etree.set_default_parser()

        self.assertRaises(self.etree.XMLSyntaxError, self.etree.parse,
                          BytesIO(self.broken_html_str))

    def test_html_iterparse(self):
        iterparse = self.etree.iterparse
        f = BytesIO(
            '<html><head><title>TITLE</title><body><p>P</p></body></html>')

        iterator = iterparse(f, html=True)
        self.assertEquals(None, iterator.root)

        events = list(iterator)
        root = iterator.root
        self.assert_(root is not None)
        self.assertEquals([('end', root[0][0]), ('end', root[0]),
                           ('end', root[1][0]), ('end', root[1]),
                           ('end', root)], events)

    def test_html_iterparse_file(self):
        iterparse = self.etree.iterparse
        iterator = iterparse(fileInTestDir("css_shakespear.html"), html=True)

        self.assertEquals(None, iterator.root)
        events = list(iterator)
        root = iterator.root
        self.assert_(root is not None)
        self.assertEquals(249, len(events))
        self.assertEquals(
            [], [event for (event, element) in events if event != 'end'])

    def test_html_iterparse_start(self):
        iterparse = self.etree.iterparse
        f = BytesIO(
            '<html><head><title>TITLE</title><body><p>P</p></body></html>')

        iterator = iterparse(f, html=True, events=('start', ))
        self.assertEquals(None, iterator.root)

        events = list(iterator)
        root = iterator.root
        self.assert_(root is not None)
        self.assertEquals([('start', root), ('start', root[0]),
                           ('start', root[0][0]), ('start', root[1]),
                           ('start', root[1][0])], events)
示例#28
0
 def test_unicode_repr1(self):
     x = etree.Element(_str('å'))
     # must not raise UnicodeEncodeError
     repr(x)
示例#29
0
 def test_unicode_xml_declared_unknown_fails(self):
     uxml = _str('<?xml version="1.0" encoding="unknown"?>') + \
            _str('<p>%s</p>') % uni
     self.assertRaises(etree.XMLSyntaxError, etree.XML, uxml)
示例#30
0
 def test_unicode_repr2(self):
     x = etree.Comment(_str('ö'))
     repr(x)
示例#31
0
 def test_unicode_xml(self):
     self._assert_unicode(_str('<p>%s</p>') % uni)
示例#32
0
 def test_unicode_repr2(self):
     x = etree.Comment(_str('ö'))
     repr(x)
示例#33
0
 def test_unicode_repr1(self):
     x = etree.Element(_str('å'))
     # must not raise UnicodeEncodeError
     repr(x)
示例#34
0
 def test_unicode_repr3(self):
     x = etree.ProcessingInstruction(_str('Å'), _str('\u0131'))
     repr(x)
示例#35
0
 def test_unicode_qname(self):
     qname = etree.QName(uni, uni)
     tag = _str("{%s}%s") % (uni, uni)
     self.assertEquals(qname.text, tag)
     self.assertEquals(unicode(qname), tag)
示例#36
0
 def test_unicode_parse_stringio(self):
     el = etree.parse(StringIO(_str('<p>%s</p>') % uni)).getroot()
     self.assertEquals(uni, el.text)
示例#37
0
# -*- coding: utf-8 -*-
import unittest, doctest, sys, os.path

this_dir = os.path.dirname(__file__)
if this_dir not in sys.path:
    sys.path.insert(0, this_dir) # needed for Py3

from common_imports import StringIO, etree, SillyFileLike, HelperTestCase
from common_imports import _str, _bytes

try:
    unicode
except NameError:
    unicode = str

ascii_uni = _str('a')

klingon = _bytes("\\uF8D2").decode("unicode_escape") # not valid for XML names

invalid_tag = _str("test") + klingon

uni = _bytes('\\xc3\\u0680\\u3120').decode("unicode_escape") # some non-ASCII characters

uxml = _bytes("<test><title>test \\xc3\\xa1\\u3120</title><h1>page \\xc3\\xa1\\u3120 title</h1></test>"
              ).decode("unicode_escape")

class UnicodeTestCase(HelperTestCase):
    def test_unicode_xml(self):
        tree = etree.XML(_str('<p>%s</p>') % uni)
        self.assertEquals(uni, tree.text)
示例#38
0
 def test__str(self):
     # test the testing framework, namely _str from common_imports
     self.assertEqual(_str('\x10'), _str('\u0010'))
     self.assertEqual(_str('\x10'), _str('\U00000010'))
     self.assertEqual(_str('\u1234'), _str('\U00001234'))
示例#39
0
 def test_unicode_xml(self):
     tree = etree.XML(_str('<p>%s</p>') % uni)
     self.assertEquals(uni, tree.text)
示例#40
0
 def test_unicode_repr4(self):
     x = etree.Entity(_str('ä'))
     repr(x)
示例#41
0
 def test_unicode_xml_broken(self):
     uxml = _str('<?xml version="1.0" encoding="UTF-8"?>') + \
            _str('<p>%s</p>') % uni
     self.assertRaises(ValueError, etree.XML, uxml)
示例#42
0
 def test_unicode_xml_declared_latin1_works(self):
     self._assert_unicode(
             _str('<?xml version="1.0" encoding="latin1"?>') + \
             _str('<p>%s</p>') % uni
     )
示例#43
0
 def test_unicode_repr3(self):
     x = etree.ProcessingInstruction(_str('Å'), _str('\u0131'))
     repr(x)
示例#44
0
 def test__str(self):
     # test the testing framework, namely _str from common_imports
     self.assertEqual(_str('\x10'), _str('\u0010'))
     self.assertEqual(_str('\x10'), _str('\U00000010'))
     self.assertEqual(_str('\u1234'), _str('\U00001234'))
示例#45
0
 def test_unicode_nstag(self):
     tag = _str("{%s}%s") % (uni, uni)
     el = etree.Element(tag)
     self.assertEquals(tag, el.tag)