Пример #1
0
 def test_text_node_pos_single_line(self):
     text = '<elem>foo bar</elem>'
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual('foo bar', data)
     self.assertEqual((None, 1, 6), pos)
Пример #2
0
 def test_hex_charref(self):
     text = '<span>&#x27;</span>'
     events = list(HTMLParser(StringIO(text)))
     self.assertEqual(3, len(events))
     self.assertEqual((Stream.START, ('span', ())), events[0][:2])
     self.assertEqual((Stream.TEXT, "'"), events[1][:2])
     self.assertEqual((Stream.END, 'span'), events[2][:2])
Пример #3
0
 def test_processing_instruction_trailing_qmark(self):
     text = '<?php echo "Foobar" ??>'
     events = list(HTMLParser(StringIO(text)))
     kind, (target, data), pos = events[0]
     self.assertEqual(Stream.PI, kind)
     self.assertEqual('php', target)
     self.assertEqual('echo "Foobar" ?', data)
Пример #4
0
def HTML(text, encoding=None):
    """Parse the given HTML source and return a markup stream.
    
    Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
    iterated over multiple times:
    
    >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8')
    >>> print(html)
    <body><h1>Foo</h1></body>
    >>> print((html.select('h1')))
    <h1>Foo</h1>
    >>> print((html.select('h1/text()')))
    Foo
    
    :param text: the HTML source
    :return: the parsed XML event stream
    :raises ParseError: if the HTML text is not well-formed, and error recovery
                        fails
    """
    if isinstance(text, str):
        # If it's unicode text the encoding should be set to None.
        # The option to pass in an incorrect encoding is for ease
        # of writing doctests that work in both Python 2.x and 3.x.
        return Stream(list(HTMLParser(StringIO(text), encoding=None)))
    return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
Пример #5
0
 def test_undefined_entity_with_dtd(self):
     text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
     <html>&junk;</html>
     """
     events = XMLParser(StringIO(text))
     self.assertRaises(ParseError, list, events)
Пример #6
0
 def test_processing_instruction_no_data_1(self):
     text = u'<?foo ?>'
     events = list(HTMLParser(StringIO(text)))
     kind, (target, data), pos = events[0]
     self.assertEqual(Stream.PI, kind)
     self.assertEqual('foo', target)
     self.assertEqual('', data)
Пример #7
0
 def test_xmldecl_standalone(self):
     text = '<?xml version="1.0" standalone="yes" ?><root />'
     events = list(XMLParser(StringIO(text)))
     kind, (version, encoding, standalone), pos = events[0]
     self.assertEqual(Stream.XML_DECL, kind)
     self.assertEqual('1.0', version)
     self.assertEqual(None, encoding)
     self.assertEqual(1, standalone)
Пример #8
0
 def test_xmldecl_encoding(self):
     text = '<?xml version="1.0" encoding="utf-8" ?><root />'
     events = list(XMLParser(StringIO(text)))
     kind, (version, encoding, standalone), pos = events[0]
     self.assertEqual(Stream.XML_DECL, kind)
     self.assertEqual('1.0', version)
     self.assertEqual('utf-8', encoding)
     self.assertEqual(-1, standalone)
Пример #9
0
 def test_html_entity_in_attribute(self):
     text = '<p title="&nbsp;"/>'
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[0]
     self.assertEqual(Stream.START, kind)
     self.assertEqual('\xa0', data[1].get('title'))
     kind, data, pos = events[1]
     self.assertEqual(Stream.END, kind)
Пример #10
0
 def test_out_of_order_tags1(self):
     text = '<span><b>Foobar</span></b>'
     events = list(HTMLParser(StringIO(text)))
     self.assertEqual(5, len(events))
     self.assertEqual((Stream.START, ('span', ())), events[0][:2])
     self.assertEqual((Stream.START, ('b', ())), events[1][:2])
     self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2])
     self.assertEqual((Stream.END, 'b'), events[3][:2])
     self.assertEqual((Stream.END, 'span'), events[4][:2])
Пример #11
0
 def test_html_entity_with_dtd(self):
     text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
     <html>&nbsp;</html>
     """
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[2]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual('\xa0', data)
Пример #12
0
 def test_processing_instruction_no_data_2(self):
     text = u'<?experiment>...<?/experiment>'
     events = list(HTMLParser(StringIO(text)))
     kind, (target, data), pos = events[0]
     self.assertEqual(Stream.PI, kind)
     self.assertEqual('experiment', target)
     self.assertEqual('', data)
     kind, (target, data), pos = events[2]
     self.assertEqual('/experiment', target)
     self.assertEqual('', data)
Пример #13
0
 def test_element_attribute_order(self):
     text = '<elem title="baz" id="foo" class="bar" />'
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[0]
     self.assertEqual(Stream.START, kind)
     tag, attrib = data
     self.assertEqual('elem', tag)
     self.assertEqual(('title', 'baz'), attrib[0])
     self.assertEqual(('id', 'foo'), attrib[1])
     self.assertEqual(('class', 'bar'), attrib[2])
Пример #14
0
    def __init__(self,
                 source,
                 filepath=None,
                 filename=None,
                 loader=None,
                 encoding=None,
                 lookup='strict',
                 allow_exec=True):
        """Initialize a template from either a string, a file-like object, or
        an already parsed markup stream.
        
        :param source: a string, file-like object, or markup stream to read the
                       template from
        :param filepath: the absolute path to the template file
        :param filename: the path to the template file relative to the search
                         path
        :param loader: the `TemplateLoader` to use for loading included
                       templates
        :param encoding: the encoding of the `source`
        :param lookup: the variable lookup mechanism; either "strict" (the
                       default), "lenient", or a custom lookup class
        :param allow_exec: whether Python code blocks in templates should be
                           allowed
        
        :note: Changed in 0.5: Added the `allow_exec` argument
        """
        self.filepath = filepath or filename
        self.filename = filename
        self.loader = loader
        self.lookup = lookup
        self.allow_exec = allow_exec
        self._init_filters()
        self._init_loader()
        self._prepared = False

        if not isinstance(source, Stream) and not hasattr(source, 'read'):
            if isinstance(source, str):
                source = StringIO(source)
            else:
                source = BytesIO(source)
        try:
            self._stream = self._parse(source, encoding)
        except ParseError as e:
            raise TemplateSyntaxError(e.msg, self.filepath, e.lineno, e.offset)
Пример #15
0
def XML(text):
    """Parse the given XML source and return a markup stream.
    
    Unlike with `XMLParser`, the returned stream is reusable, meaning it can be
    iterated over multiple times:
    
    >>> xml = XML('<doc><elem>Foo</elem><elem>Bar</elem></doc>')
    >>> print(xml)
    <doc><elem>Foo</elem><elem>Bar</elem></doc>
    >>> print(xml.select('elem'))
    <elem>Foo</elem><elem>Bar</elem>
    >>> print(xml.select('elem/text()'))
    FooBar
    
    :param text: the XML source
    :return: the parsed XML event stream
    :raises ParseError: if the XML text is not well-formed
    """
    return Stream(list(XMLParser(StringIO(text))))
Пример #16
0
def HTML(text, encoding=None):
    """Parse the given HTML source and return a markup stream.
    
    Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
    iterated over multiple times:
    
    >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8')
    >>> print(html)
    <body><h1>Foo</h1></body>
    >>> print(html.select('h1'))
    <h1>Foo</h1>
    >>> print(html.select('h1/text()'))
    Foo
    
    :param text: the HTML source
    :return: the parsed XML event stream
    :raises ParseError: if the HTML text is not well-formed, and error recovery
                        fails
    """
    if isinstance(text, str):
        return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
    return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
Пример #17
0
 def test_render_output_stream_unicode(self):
     xml = XML('<li>Über uns</li>')
     strio = StringIO()
     self.assertEqual(None, xml.render(encoding=None, out=strio))
     self.assertEqual('<li>Über uns</li>', strio.getvalue())
Пример #18
0
 def test_parse_fileobj(self):
     fileobj = StringIO('<root> ${var} $var</root>')
     tmpl = MarkupTemplate(fileobj)
     self.assertEqual('<root> 42 42</root>', str(tmpl.generate(var=42)))
Пример #19
0
 def test_undefined_entity_without_dtd(self):
     text = '<html>&junk;</html>'
     events = XMLParser(StringIO(text))
     self.assertRaises(ParseError, list, events)
Пример #20
0
 def test_render_output_stream_unicode(self):
     xml = XML('<li>Über uns</li>')
     strio = StringIO()
     self.assertEqual(None, xml.render(encoding=None, out=strio))
     self.assertEqual(u'<li>Über uns</li>', strio.getvalue())
Пример #21
0
 def test_html_entity_in_text(self):
     text = '<p>&nbsp;</p>'
     events = list(HTMLParser(StringIO(text)))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual('\xa0', data)
Пример #22
0
 def test_html_entity_without_dtd(self):
     text = '<html>&nbsp;</html>'
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual('\xa0', data)
Пример #23
0
 def test_unicode_input(self):
     text = '<div>\u2013</div>'
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual('\u2013', data)