Пример #1
0
    def test_single_tag_matcher(self):
        tree = ElementTree.fromstring('''\
            <html>
                <body>
                    <h1>str1</h1>
                    <h2>str2</h2>
                    <h3>str3</h3>
                </body>
            </html>
        ''')

        finder = ElementFinder()
        tag_matcher = TagElementMatcher(tags=['h1', 'h3'])
        finder.add_element_matcher(tag_matcher)
        results = finder.findElements(tree)

        # check that the keys of the results dict are the supplied matchers
        self.assertEquals(results.keys(), [tag_matcher])

        # check that two tags were matched by the tag_matcher
        self.assertEquals(2, len(results[tag_matcher]))

        # check matching of the h1 node
        h1_node = results[tag_matcher][0]
        self.assertEquals(h1_node.tag, 'h1')
        self.assertEquals(h1_node.text, 'str1')

        # check matching of the h3 node
        h3_node = results[tag_matcher][1]
        self.assertEquals(h3_node.tag, 'h3')
        self.assertEquals(h3_node.text, 'str3')
Пример #2
0
    def test_lxml_ignore_comments(self):
        tree = lxml.html.document_fromstring('''\
            <html>
                <body>
                    <!--
                    <h1>str1</h1>
                    -->
                </body>
            </html>
        ''')

        finder = ElementFinder()
        tag_matcher = TagElementMatcher(tags=['h1'])
        finder.add_element_matcher(tag_matcher)
        results = finder.findElements(tree)

        # check that no tags were matched by the tag_matcher
        self.assertEquals(0, len(results[tag_matcher]))
Пример #3
0
    def test_single_non_tag_matcher(self):
        tree = ElementTree.fromstring('''\
            <html>
                <body>
                    <h1>str1</h1>
                    <h1>str2</h1>
                </body>
            </html>
        ''')

        class Matcher(ElementMatcher):
            def doesMatch(self, element):
                return 'str1' in element.text

        finder = ElementFinder()
        tag_matcher = Matcher()
        finder.add_element_matcher(tag_matcher)
        results = finder.findElements(tree)

        # check that only the first element was matched by the tag_matcher
        self.assertEquals(1, len(results[tag_matcher]))
        self.assertEquals('str1', results[tag_matcher][0].text)
Пример #4
0
    def test_skip_tags_matched_by_ignore_matcher(self):
        tree = ElementTree.fromstring('''\
            <html>
                <body>
                    <div>
                        <h1>str1</h1>
                    </div>
                </body>
            </html>
        ''')

        finder = ElementFinder()
        tag_matcher = TagElementMatcher(tags=['h1'])
        finder.add_element_matcher(tag_matcher)
        finder.add_ignore_element_matcher(TagElementMatcher(tags=['div']))
        results = finder.findElements(tree)

        # check that no tags were matched by the tag_matcher
        self.assertEquals(0, len(results[tag_matcher]))
Пример #5
0
    def test_skip_tags_matched_by_non_tag_ignore_matcher(self):
        tree = ElementTree.fromstring('''\
            <html>
                <body>
                    <h1 skip="yes">str1</h1>
                </body>
            </html>
        ''')

        class Matcher(ElementMatcher):
            def doesMatch(self, element):
                return element.get('skip') == 'yes'

        finder = ElementFinder()
        matcher = TagElementMatcher(tags=['h1'])
        finder.add_element_matcher(matcher)
        ignore_matcher = Matcher()
        finder.add_ignore_element_matcher(ignore_matcher)
        results = finder.findElements(tree)

        # check that no tags were matched by the tag_matcher
        self.assertEquals(0, len(results[matcher]))