Python HTMLImageLinkExtractor示例

编程语言: Python

命名空间/包名称: scrapy.contrib.linkextractors.image

hotexamples.com的示例: 5

Python HTMLImageLinkExtractor - 已找到5个示例。这些是从开源项目中提取的最受好评的scrapy.contrib.linkextractors.image.HTMLImageLinkExtractor现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

HTMLImageLinkExtractor(2)

extract_links(2)

示例#1

显示文件

文件： test_contrib_linkextractors.py 项目： pkufranky/scrapy

    def test_extraction(self):
        '''Test the extractor's behaviour among different situations'''

        lx = HTMLImageLinkExtractor(locations=('//img', ))
        links_1 = lx.extract_links(self.response)
        self.assertEqual(links_1,
            [ Link(url='http://example.com/sample1.jpg', text=u'sample 1'),
              Link(url='http://example.com/sample2.jpg', text=u'sample 2'),
              Link(url='http://example.com/sample4.jpg', text=u'sample 4') ])

        lx = HTMLImageLinkExtractor(locations=('//img', ), unique=False)
        links_2 = lx.extract_links(self.response)
        self.assertEqual(links_2,
            [ Link(url='http://example.com/sample1.jpg', text=u'sample 1'),
              Link(url='http://example.com/sample2.jpg', text=u'sample 2'),
              Link(url='http://example.com/sample4.jpg', text=u'sample 4'),
              Link(url='http://example.com/sample4.jpg', text=u'sample 4 repetition') ])

        lx = HTMLImageLinkExtractor(locations=('//div[@id="wrapper"]', ))
        links_3 = lx.extract_links(self.response)
        self.assertEqual(links_3,
            [ Link(url='http://example.com/sample1.jpg', text=u'sample 1'),
              Link(url='http://example.com/sample2.jpg', text=u'sample 2'),
              Link(url='http://example.com/sample4.jpg', text=u'sample 4') ])

        lx = HTMLImageLinkExtractor(locations=('//a', ))
        links_4 = lx.extract_links(self.response)
        self.assertEqual(links_4,
            [ Link(url='http://example.com/sample2.jpg', text=u'sample 2'),
              Link(url='http://example.com/sample3.html', text=u'sample 3') ])

示例#2

显示文件

文件： test_contrib_linkextractors.py 项目： serkanh/scrapy

    def test_extraction(self):
        """Test the extractor's behaviour among different situations"""

        lx = HTMLImageLinkExtractor(locations=("//img",))
        links_1 = lx.extract_links(self.response)
        self.assertEqual(
            links_1,
            [
                Link(url="http://example.com/sample1.jpg", text=u"sample 1"),
                Link(url="http://example.com/sample2.jpg", text=u"sample 2"),
                Link(url="http://example.com/sample4.jpg", text=u"sample 4"),
            ],
        )

        lx = HTMLImageLinkExtractor(locations=("//img",), unique=False)
        links_2 = lx.extract_links(self.response)
        self.assertEqual(
            links_2,
            [
                Link(url="http://example.com/sample1.jpg", text=u"sample 1"),
                Link(url="http://example.com/sample2.jpg", text=u"sample 2"),
                Link(url="http://example.com/sample4.jpg", text=u"sample 4"),
                Link(url="http://example.com/sample4.jpg", text=u"sample 4 repetition"),
            ],
        )

        lx = HTMLImageLinkExtractor(locations=('//div[@id="wrapper"]',))
        links_3 = lx.extract_links(self.response)
        self.assertEqual(
            links_3,
            [
                Link(url="http://example.com/sample1.jpg", text=u"sample 1"),
                Link(url="http://example.com/sample2.jpg", text=u"sample 2"),
                Link(url="http://example.com/sample4.jpg", text=u"sample 4"),
            ],
        )

        lx = HTMLImageLinkExtractor(locations=("//a",))
        links_4 = lx.extract_links(self.response)
        self.assertEqual(
            links_4,
            [
                Link(url="http://example.com/sample2.jpg", text=u"sample 2"),
                Link(url="http://example.com/sample3.html", text=u"sample 3"),
            ],
        )

示例#3

显示文件

文件： test_contrib_linkextractors.py 项目： richard-ma/CodeReading

    def test_extraction(self):
        '''Test the extractor's behaviour among different situations'''

        lx = HTMLImageLinkExtractor(locations=('//img', ))
        links_1 = lx.extract_links(self.response)
        self.assertEqual(links_1, [
            Link(url='http://example.com/sample1.jpg', text=u'sample 1'),
            Link(url='http://example.com/sample2.jpg', text=u'sample 2'),
            Link(url='http://example.com/sample4.jpg', text=u'sample 4')
        ])

        lx = HTMLImageLinkExtractor(locations=('//img', ), unique=False)
        links_2 = lx.extract_links(self.response)
        self.assertEqual(links_2, [
            Link(url='http://example.com/sample1.jpg', text=u'sample 1'),
            Link(url='http://example.com/sample2.jpg', text=u'sample 2'),
            Link(url='http://example.com/sample4.jpg', text=u'sample 4'),
            Link(url='http://example.com/sample4.jpg',
                 text=u'sample 4 repetition')
        ])

        lx = HTMLImageLinkExtractor(locations=('//div[@id="wrapper"]', ))
        links_3 = lx.extract_links(self.response)
        self.assertEqual(links_3, [
            Link(url='http://example.com/sample1.jpg', text=u'sample 1'),
            Link(url='http://example.com/sample2.jpg', text=u'sample 2'),
            Link(url='http://example.com/sample4.jpg', text=u'sample 4')
        ])

        lx = HTMLImageLinkExtractor(locations=('//a', ))
        links_4 = lx.extract_links(self.response)
        self.assertEqual(links_4, [
            Link(url='http://example.com/sample2.jpg', text=u'sample 2'),
            Link(url='http://example.com/sample3.html', text=u'sample 3')
        ])

示例#4

显示文件

文件： test_contrib_linkextractors.py 项目： richard-ma/CodeReading

 def test_urls_type(self):
     '''Test that the resulting urls are regular strings and not a unicode objects'''
     lx = HTMLImageLinkExtractor()
     links = lx.extract_links(self.response)
     self.assertTrue(all(isinstance(link.url, str) for link in links))

示例#5

显示文件

文件： test_contrib_linkextractors.py 项目： pkufranky/scrapy

 def test_urls_type(self):
     '''Test that the resulting urls are regular strings and not a unicode objects'''
     lx = HTMLImageLinkExtractor()
     links = lx.extract_links(self.response)
     self.assertTrue(all(isinstance(link.url, str) for link in links))