Пример #1
0
    def test_default_parser_HTML_broken(self):
        self.assertRaises(self.etree.XMLSyntaxError, self.etree.parse,
                          BytesIO(self.broken_html_str))

        self.etree.set_default_parser(self.etree.HTMLParser())

        tree = self.etree.parse(BytesIO(self.broken_html_str))
        self.assertEqual(self.etree.tostring(tree.getroot(), method="html"),
                         self.html_str)

        self.etree.set_default_parser()

        self.assertRaises(self.etree.XMLSyntaxError, self.etree.parse,
                          BytesIO(self.broken_html_str))
 def test_void_elements(self):
     for tag in ("area", "base", "br", "col", "hr", "img", "input", "link",
                 "meta", "param"):
         with etree.htmlfile(self._file) as xf:
             xf.write(etree.Element(tag))
         self.assertXml('<%s>' % tag)
         self._file = BytesIO()
Пример #3
0
    def test_xslt_encoding_override(self):
        tree = self.parse(_bytes('<a><b>\\uF8D2</b><c>\\uF8D2</c></a>'
                                 ).decode("unicode_escape"))
        style = self.parse('''\
<xsl:stylesheet version="1.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output encoding="UTF-8"/>
  <xsl:template match="/">
    <foo><xsl:value-of select="/a/b/text()" /></foo>
  </xsl:template>
</xsl:stylesheet>''')

        st = etree.XSLT(style)
        res = st(tree)
        expected = _bytes("""\
<?xml version='1.0' encoding='UTF-16'?>\
<foo>\\uF8D2</foo>""").decode("unicode_escape")

        f = BytesIO()
        res.write(f, encoding='UTF-16')
        if is_python3:
            result = str(f.getvalue(), 'UTF-16').replace('\n', '')
        else:
            result = unicode(str(f.getvalue()), 'UTF-16').replace('\n', '')
        self.assertEquals(expected, result)
Пример #4
0
    def test_parse_encoding_8bit_explicit(self):
        text = _str('Søk på nettet')
        html_latin1 = (_str('<p>%s</p>') % text).encode('iso-8859-1')

        tree = self.etree.parse(BytesIO(html_latin1),
                                self.etree.HTMLParser(encoding="iso-8859-1"))
        p = tree.find("//p")
        self.assertEquals(p.text, text)
Пример #5
0
    def test_parse_encoding_8bit_override(self):
        text = _str('Søk på nettet')
        wrong_head = _str('''
        <head>
          <meta http-equiv="Content-Type"
                content="text/html; charset=UTF-8" />
        </head>''')
        html_latin1 = (_str('<html>%s<body><p>%s</p></body></html>') %
                       (wrong_head, text)).encode('iso-8859-1')

        self.assertRaises(self.etree.ParseError, self.etree.parse,
                          BytesIO(html_latin1))

        tree = self.etree.parse(BytesIO(html_latin1),
                                self.etree.HTMLParser(encoding="iso-8859-1"))
        p = tree.find("//p")
        self.assertEquals(p.text, text)
Пример #6
0
    def test_parser_based_lookup(self):
        class TestElement(etree.ElementBase):
            FIND_ME = "parser_based"

        lookup = etree.ParserBasedElementClassLookup()
        etree.set_element_class_lookup(lookup)

        class MyLookup(etree.CustomElementClassLookup):
            def lookup(self, t, d, ns, name):
                return TestElement

        parser = etree.XMLParser()
        parser.set_element_class_lookup(MyLookup())

        root = etree.parse(BytesIO(xml_str), parser).getroot()
        self.assertEqual(root.FIND_ME, TestElement.FIND_ME)
        self.assertEqual(root[0].FIND_ME, TestElement.FIND_ME)

        root = etree.parse(BytesIO(xml_str)).getroot()
        self.assertFalse(hasattr(root, 'FIND_ME'))
        self.assertFalse(hasattr(root[0], 'FIND_ME'))
Пример #7
0
    def test_html_iterparse_tag(self):
        iterparse = self.etree.iterparse
        f = BytesIO(
            '<html><head><title>TITLE</title><body><p>P</p></body></html>')

        iterator = iterparse(f, html=True, tag=["p", "title"])
        self.assertEqual(None, iterator.root)

        events = list(iterator)
        root = iterator.root
        self.assertTrue(root is not None)
        self.assertEqual([('end', root[0][0]), ('end', root[1][0])], events)
Пример #8
0
    def test_html_iterparse_start(self):
        iterparse = self.etree.iterparse
        f = BytesIO(
            '<html><head><title>TITLE</title><body><p>P</p></body></html>')

        iterator = iterparse(f, html=True, events=('start', ))
        self.assertEquals(None, iterator.root)

        events = list(iterator)
        root = iterator.root
        self.assert_(root is not None)
        self.assertEquals([('start', root), ('start', root[0]),
                           ('start', root[0][0]), ('start', root[1]),
                           ('start', root[1][0])], events)
Пример #9
0
    def test_xmlschema_iterparse_fail(self):
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        self.assertRaises(
            etree.XMLSyntaxError, list,
            etree.iterparse(BytesIO('<a><c></c></a>'), schema=schema))
Пример #10
0
    def test_relaxng_stringio(self):
        tree_valid = self.parse('<a><b></b></a>')
        tree_invalid = self.parse('<a><c></c></a>')
        schema_file = BytesIO('''\
<element name="a" xmlns="http://relaxng.org/ns/structure/1.0">
  <zeroOrMore>
     <element name="b">
       <text />
     </element>
  </zeroOrMore>
</element>
''')
        schema = etree.RelaxNG(file=schema_file)
        self.assert_(schema.validate(tree_valid))
        self.assert_(not schema.validate(tree_invalid))
Пример #11
0
 def test_dtd_invalid_duplicate_id(self):
     root = etree.XML(_bytes('''
     <a><b id="id1"/><b id="id2"/><b id="id1"/></a>
     '''))
     dtd = etree.DTD(BytesIO(_bytes("""
     <!ELEMENT a (b*)>
     <!ATTLIST b
         id ID #REQUIRED
     >
     <!ELEMENT b EMPTY>
     """)))
     self.assertFalse(dtd.validate(root))
     self.assertTrue(dtd.error_log)
     self.assertTrue([error for error in dtd.error_log
                      if 'id1' in error.message])
Пример #12
0
    def test_void_elements(self):
        # http://www.w3.org/TR/html5/syntax.html#elements-0
        void_elements = set([
            "area", "base", "br", "col", "embed", "hr", "img", "input",
            "keygen", "link", "meta", "param", "source", "track", "wbr"
        ])

        # FIXME: These don't get serialized as void elements.
        void_elements.difference_update(
            ['area', 'embed', 'keygen', 'source', 'track', 'wbr'])

        for tag in sorted(void_elements):
            with etree.htmlfile(self._file) as xf:
                xf.write(etree.Element(tag))
            self.assertXml('<%s>' % tag)
            self._file = BytesIO()
Пример #13
0
    def test_xml_mode_element_inside_html(self):
        # The htmlfile already outputs in xml mode for .element calls. This
        # test actually illustrates a bug

        with etree.htmlfile(self._file) as xf:
            with xf.element("root"):
                with xf.element('foo', attrib={'selected': 'bar'}):
                    pass

        self.assertXml(
            '<root>'
            # '<foo selected></foo>'  # FIXME: this is the correct output
            # in html mode
            '<foo selected="bar"></foo>'
            '</root>')
        self._file = BytesIO()
Пример #14
0
    def test_xmlschema_iterparse_incomplete(self):
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        xml = BytesIO('<a><b></b></a>')
        event, element = next(iter(etree.iterparse(xml, schema=schema)))
        self.assertEqual('end', event)
        self.assertEqual('b', element.tag)
Пример #15
0
    def test_xmlschema_iterparse(self):
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        xml = BytesIO('<a><b></b></a>')
        events = [(event, el.tag)
                  for (event, el) in etree.iterparse(xml, schema=schema)]

        self.assertEquals([('end', 'b'), ('end', 'a')], events)
Пример #16
0
    def test_html_iterparse_broken(self):
        iterparse = self.etree.iterparse
        f = BytesIO('<head><title>TEST></head><p>P<br></div>')

        iterator = iterparse(f, html=True)
        self.assertEqual(None, iterator.root)

        events = list(iterator)
        root = iterator.root
        self.assertTrue(root is not None)
        self.assertEqual('html', root.tag)
        self.assertEqual('head', root[0].tag)
        self.assertEqual('body', root[1].tag)
        self.assertEqual('p', root[1][0].tag)
        self.assertEqual('br', root[1][0][0].tag)
        self.assertEqual([('end', root[0][0]), ('end', root[0]),
                          ('end', root[1][0][0]), ('end', root[1][0]),
                          ('end', root[1]), ('end', root)], events)
Пример #17
0
    def test_xml_mode_write_inside_html(self):
        elt = etree.Element("foo", attrib={'selected': 'bar'})

        with etree.htmlfile(self._file) as xf:
            with xf.element("root"):
                xf.write(elt)  # 1

                assert elt.text is None
                xf.write(elt, method='xml')  # 2

                elt.text = ""
                xf.write(elt, method='xml')  # 3

        self.assertXml('<root>'
                       '<foo selected></foo>'  # 1
                       '<foo selected="bar"/>'  # 2
                       '<foo selected="bar"></foo>'  # 3
                       '</root>')
        self._file = BytesIO()
Пример #18
0
    def test_xmlschema_stringio(self):
        schema_file = BytesIO('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(file=schema_file)
        parser = etree.XMLParser(schema=schema)

        tree_valid = self.parse('<a><b></b></a>', parser=parser)
        self.assertEqual('a', tree_valid.getroot().tag)

        self.assertRaises(etree.XMLSyntaxError,
                          self.parse, '<a><c></c></a>', parser=parser)
Пример #19
0
    def test_html_iterparse_stop_short(self):
        iterparse = self.etree.iterparse
        f = BytesIO(
            '<html><head><title>TITLE</title><body><p>P</p></body></html>')

        iterator = iterparse(f, html=True)
        self.assertEqual(None, iterator.root)

        event, element = next(iterator)
        self.assertEqual('end', event)
        self.assertEqual('title', element.tag)
        self.assertEqual(None, iterator.root)
        del element

        event, element = next(iterator)
        self.assertEqual('end', event)
        self.assertEqual('head', element.tag)
        self.assertEqual(None, iterator.root)
        del element
        del iterator
Пример #20
0
    def test_write_compressed_text(self):
        Element = self.etree.Element
        SubElement = self.etree.SubElement
        ElementTree = self.etree.ElementTree
        text = _str("qwrtioüöä")

        root = Element('root')
        root.text = text
        child = SubElement(root, 'sub')
        child.text = 'TEXT'
        child.tail = 'TAIL'
        SubElement(root, 'sub').text = text

        tree = ElementTree(root)
        out = BytesIO()
        tree.write(out, method='text', encoding='utf8', compression=9)
        out.seek(0)

        f = gzip.GzipFile(fileobj=out)
        try:
            result = f.read().decode('utf8')
        finally:
            f.close()
        self.assertEqual(text + 'TEXTTAIL' + text, result)
Пример #21
0
class ETreeXMLSchemaResolversTestCase(HelperTestCase):
    resolver_schema_int = BytesIO("""\
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
    xmlns:etype="http://codespeak.net/lxml/test/external"
    targetNamespace="http://codespeak.net/lxml/test/internal">
        <xsd:import namespace="http://codespeak.net/lxml/test/external" schemaLocation="XXX.xsd" />
        <xsd:element name="a" type="etype:AType"/>
</xsd:schema>""")

    resolver_schema_int2 = BytesIO("""\
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
    xmlns:etype="http://codespeak.net/lxml/test/external"
    targetNamespace="http://codespeak.net/lxml/test/internal">
        <xsd:import namespace="http://codespeak.net/lxml/test/external" schemaLocation="YYY.xsd" />
        <xsd:element name="a" type="etype:AType"/>
</xsd:schema>""")

    resolver_schema_ext = """\
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
    targetNamespace="http://codespeak.net/lxml/test/external">
    <xsd:complexType name="AType">
      <xsd:sequence><xsd:element name="b" type="xsd:string" minOccurs="0" maxOccurs="unbounded" /></xsd:sequence>
    </xsd:complexType>
</xsd:schema>"""

    class simple_resolver(etree.Resolver):
        def __init__(self, schema):
            self.schema = schema

        def resolve(self, url, id, context):
            assert url == 'XXX.xsd'
            return self.resolve_string(self.schema, context)

    # tests:

    def test_xmlschema_resolvers(self):
        """Test that resolvers work with schema."""
        parser = etree.XMLParser()
        parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext))
        schema_doc = etree.parse(self.resolver_schema_int, parser=parser)
        schema = etree.XMLSchema(schema_doc)

    def test_xmlschema_resolvers_root(self):
        """Test that the default resolver will get called if there's no
        specific parser resolver."""
        root_resolver = self.simple_resolver(self.resolver_schema_ext)
        etree.get_default_parser().resolvers.add(root_resolver)
        schema_doc = etree.parse(self.resolver_schema_int)
        schema = etree.XMLSchema(schema_doc)
        etree.get_default_parser().resolvers.remove(root_resolver)

    def test_xmlschema_resolvers_noroot(self):
        """Test that the default resolver will not get called when a more
        specific resolver is registered."""
        class res_root(etree.Resolver):
            def resolve(self, url, id, context):
                assert False
                return None

        root_resolver = res_root()
        etree.get_default_parser().resolvers.add(root_resolver)

        parser = etree.XMLParser()
        parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext))

        schema_doc = etree.parse(self.resolver_schema_int, parser=parser)
        schema = etree.XMLSchema(schema_doc)
        etree.get_default_parser().resolvers.remove(root_resolver)

    def test_xmlschema_nested_resolvers(self):
        """Test that resolvers work in a nested fashion."""

        resolver_schema = self.resolver_schema_ext

        class res_nested(etree.Resolver):
            def __init__(self, ext_schema):
                self.ext_schema = ext_schema

            def resolve(self, url, id, context):
                assert url == 'YYY.xsd'
                return self.resolve_string(self.ext_schema, context)

        class res(etree.Resolver):
            def __init__(self, ext_schema_1, ext_schema_2):
                self.ext_schema_1 = ext_schema_1
                self.ext_schema_2 = ext_schema_2

            def resolve(self, url, id, context):
                assert url == 'XXX.xsd'

                new_parser = etree.XMLParser()
                new_parser.resolvers.add(res_nested(self.ext_schema_2))
                new_schema_doc = etree.parse(self.ext_schema_1,
                                             parser=new_parser)
                new_schema = etree.XMLSchema(new_schema_doc)

                return self.resolve_string(resolver_schema, context)

        parser = etree.XMLParser()
        parser.resolvers.add(
            res(self.resolver_schema_int2, self.resolver_schema_ext))
        schema_doc = etree.parse(self.resolver_schema_int, parser=parser)
        schema = etree.XMLSchema(schema_doc)
Пример #22
0
 def test_dtd_invalid(self):
     root = etree.XML("<b><a/></b>")
     dtd = etree.DTD(BytesIO("<!ELEMENT b EMPTY>"))
     self.assertRaises(etree.DocumentInvalid, dtd.assertValid, root)
Пример #23
0
 def test_dtd_assertValid(self):
     root = etree.XML("<b><a/></b>")
     dtd = etree.DTD(BytesIO("<!ELEMENT b (a)><!ELEMENT a EMPTY>"))
     dtd.assertValid(root)
Пример #24
0
 def test_dtd_broken(self):
     self.assertRaises(etree.DTDParseError, etree.DTD,
                       BytesIO("<!ELEMENT b HONKEY>"))
Пример #25
0
 def test_dtd_stringio(self):
     root = etree.XML(_bytes("<b/>"))
     dtd = etree.DTD(BytesIO("<!ELEMENT b EMPTY>"))
     self.assertTrue(dtd.validate(root))
Пример #26
0
 def test_html_iterparse_broken_no_recover(self):
     iterparse = self.etree.iterparse
     f = BytesIO('<p>P<br></div>')
     iterator = iterparse(f, html=True, recover=False)
     self.assertRaises(self.etree.XMLSyntaxError, list, iterator)
Пример #27
0
 def test_module_parse_html_error(self):
     parser = self.etree.HTMLParser(recover=False)
     parse = self.etree.parse
     f = BytesIO("<html></body>")
     self.assertRaises(self.etree.XMLSyntaxError, parse, f, parser)
Пример #28
0
 def run_parse():
     thread_root = self.etree.parse(BytesIO(xml)).getroot()
     result.append(thread_root[0])
     result.append(thread_root[-1])
Пример #29
0
        expr = etree.ETXPath(_bytes("/a/{http://nsa/\\uf8d2}b").decode("unicode_escape"))
        r = expr(x)
        self.assertEquals(1, len(r))
        self.assertEquals(_bytes('{http://nsa/\\uf8d2}b').decode("unicode_escape"), r[0].tag)

        expr = etree.ETXPath(_bytes("/a/{http://nsb/\\uf8d1}b").decode("unicode_escape"))
        r = expr(x)
        self.assertEquals(1, len(r))
        self.assertEquals(_bytes('{http://nsb/\\uf8d1}b').decode("unicode_escape"), r[0].tag)

SAMPLE_XML = etree.parse(BytesIO("""
<body>
  <tag>text</tag>
  <section>
    <tag>subtext</tag>
  </section>
  <tag />
  <tag />
</body>
"""))

def tag(elem):
    return elem.tag

def tag_or_value(elem):
    return getattr(elem, 'tag', elem)

def stringTest(ctxt, s1):
    return "Hello "+s1

def stringListTest(ctxt, s1):
Пример #30
0
 def test_module_parse_html_norecover(self):
     parser = self.etree.HTMLParser(recover=False)
     parse = self.etree.parse
     f = BytesIO(self.broken_html_str)
     self.assertRaises(self.etree.XMLSyntaxError, parse, f, parser)