Exemplo n.º 1
0
    def test_nested_replace(self):
        loader = ItemLoader(selector=self.selector)
        nl1 = loader.nested_xpath('//footer')
        nl2 = nl1.nested_xpath('a')

        loader.add_xpath('url', '//footer/a/@href')
        self.assertEqual(loader.get_output_value('url'),
                         ['http://www.scrapy.org'])
        nl1.replace_xpath('url', 'img/@src')
        self.assertEqual(loader.get_output_value('url'), ['/images/logo.png'])
        nl2.replace_xpath('url', '@href')
        self.assertEqual(loader.get_output_value('url'),
                         ['http://www.scrapy.org'])
Exemplo n.º 2
0
    def test_nested_ordering(self):
        loader = ItemLoader(selector=self.selector)
        nl1 = loader.nested_xpath('//footer')
        nl2 = nl1.nested_xpath('a')

        nl1.add_xpath('url', 'img/@src')
        loader.add_xpath('url', '//footer/a/@href')
        nl2.add_xpath('url', 'text()')
        loader.add_xpath('url', '//footer/a/@href')

        self.assertEqual(loader.get_output_value('url'), [
            '/images/logo.png',
            'http://www.scrapy.org',
            'homepage',
            'http://www.scrapy.org',
        ])
Exemplo n.º 3
0
    def test_nested_load_item(self):
        loader = ItemLoader(selector=self.selector)
        nl1 = loader.nested_xpath('//footer')
        nl2 = nl1.nested_xpath('img')

        loader.add_xpath('name', '//header/div/text()')
        nl1.add_xpath('url', 'a/@href')
        nl2.add_xpath('image', '@src')

        item = loader.load_item()

        assert item is loader.item
        assert item is nl1.item
        assert item is nl2.item

        self.assertEqual(item['name'], ['marta'])
        self.assertEqual(item['url'], ['http://www.scrapy.org'])
        self.assertEqual(item['image'], ['/images/logo.png'])
Exemplo n.º 4
0
    def __callback(self, response: HtmlResponse):
        item = LeroymerlinItem()
        loader = ItemLoader(item=item, selector=response)
        loader.add_xpath(Fields.name, "//h1/text()")
        loader.add_value(Fields.link, response.url)
        loader.add_xpath(Fields.article_number,
                         "//span[@slot='article']/text()")
        loader.add_xpath(Fields.price, "//uc-pdp-price-view/span/text()")
        loader.add_xpath(Fields.image_links,
                         "//uc-pdp-media-carousel//img/@src")
        loader.add_value(Fields.image_paths, [])
        loader.add_value(Fields.category, self.__category)

        # 2)Написать универсальный обработчик характеристик товаров, который будет формировать данные вне зависимости от их типа и количества.
        loader.add_xpath(
            Fields.details,
            "//dl[@class='def-list']//dt/text() | //dl[@class='def-list']//dd/text()"
        )

        yield loader.load_item()
Exemplo n.º 5
0
 def parse_application(self, response):
     loader = ItemLoader(item=TraidingSsgeItem(), selector=response)
     loader._add_value('statement_date', response.request.meta['appdate'])
     loader.add_xpath('new_or_used',
                      "normalize-space(//div[@class='condition']/text())")
     loader.add_xpath('location',
                      "//div[@class='location-time']/div[2]/p/span/text()")
     loader.add_xpath(
         'last_updated',
         'normalize-space(//div[@class="location-time"]/div[2]/descendant::span[2]/text())'
     )
     loader.add_xpath('product',
                      "normalize-space(//h2[@class='main-title']/text())")
     loader.add_xpath(
         'price',
         "normalize-space(//div[@class='market-item-price ']/text())")
     loader.add_xpath(
         'currency_symbol',
         "normalize-space(//div[@class='market-item-price ']/span/text())")
     loader.add_xpath(
         'applicant', "normalize-space(//div[@class='author_type']/text())")
     loader.add_value('current_app_url', response.url)
     loader.add_xpath(
         'all_apps_url',
         "//div[@class='author_type']/descendant::span/a/@href")
     loader.add_xpath(
         'agent_or_person',
         "normalize-space((//div[@class='author_type'])[1]/span/a/text())")
     loader.add_xpath(
         'number_of_apps',
         "normalize-space(//div[@class='author_type']/descendant::span[2]/text())"
     )
     loader.add_xpath(
         'product_description',
         "normalize-space(//span[@class='details_text']/text())")
     loader.add_xpath(
         'product_specification',
         "normalize-space(//div[@class='jobs_details']/span/text())")
     loader.add_xpath(
         'product_condition_description',
         "normalize-space(//div[@class='jobs_details'][2]/span[2]/text())")
     loader.add_xpath(
         'seen',
         "normalize-space(//div[@class='article_views']/span/text())")
     loader.add_xpath(
         'app_id',
         "normalize-space(//div[@class='market-item-id']/span/text())")
     loader.add_xpath(
         'phone', "normalize-space(//div[@class='numbers-wrap']/a/@href)")
     print(loader.item)
     yield loader.load_item()