Пример #1
0
 def __init__(self):
     # Aggregated sgmlop parser
     self._parser = sgmlop.SGMLParser()
     self._parser.register(self)
     # Initialize behaviour of both the mixin and SGMLParser
     HtmlParserMixin.reset(self)
     sgmllib.SGMLParser.__init__(self)
Пример #2
0
    def reset(self):
	self.rawdata = ''
	self.stack = []
	self.lasttag = '???'
	self.nomoretags = 0
	self.literal = 0
	self.parser = sgmlop.SGMLParser()
	self.feed = self.parser.feed
	self.parser.register(self)
Пример #3
0
    def __init__(self):
        # This module should be built already!
        import sgmlop

        self.parser = sgmlop.SGMLParser()
        self.parser.register(self)
        HarvestManSimpleParser.__init__(self)
        # Type
        self.typ = 1
Пример #4
0
def test_sgmlop():
    """ Test whether sgmlop is available and working """

    html = """\
    <html><
    title>Test sgmlop</title>
    <body>
    <p>This is a pargraph</p>
    <img src="img.jpg"/>
    <a href="http://www.python.org'>Python</a>
    </body>
    </html>
    """

    # Return True for working and False for not-working
    # or not-present...
    try:
        import sgmlop

        class DummyHandler(object):
            links = []

            def finish_starttag(self, tag, attrs):
                self.links.append(tag)
                pass

        parser = sgmlop.SGMLParser()
        parser.register(DummyHandler())
        parser.feed(html)

        # Check if we got all the links...
        if len(DummyHandler.links) == 4:
            return True
        else:
            return False

    except ImportError, e:
        return False