示例#1
0
    def book_parse(self, response: HtmlResponse):
        name = response.xpath(
            "//div[@id='product-title']/h1/text()").extract_first()
        link = response.url
        authors = response.xpath(
            "//div[@class='product-description']/div[@class='authors']/text()"
        ).extract()
        publisher = response.xpath(
            "//div[@class='product-description']/div[@class='publisher']/a/text()"
        ).extract_first()
        price = response.xpath(
            "//div[@class='product-description']/div[@class='buying']//span[@class='buying-price-val-number']/text()"
        ).extract_first()
        currency = response.xpath(
            "//div[@class='product-description']/div[@class='buying']//span[@class='buying-pricenew-val-currency']/text()"
        ).extract_first()
        rate = response.xpath("//div[@id='rate']/text()").extract_first()
        product_id = self.get_product_id(
            response.xpath(
                "//div[@class='product-description']//div[@class='articul']/text()"
            ).extract_first())

        yield BookparserItem(name=name,
                             link=link,
                             authors=authors,
                             publisher=publisher,
                             price=price,
                             currency=currency,
                             rate=rate,
                             product_id=product_id)
示例#2
0
    def book_parse(self, response: HtmlResponse):

        # Получение url-а и парсинг информации о книжке
        book_url = response.url
        book_title = response.xpath(
            '//div[@id="product-title"]/h1/text()').extract_first()
        book_authors = response.xpath(
            '//div[@class="authors"]/a[@data-event-label="author"]/text()'
        ).extract()
        book_rating = response.xpath(
            '//div[@id="rate"]/text()').extract_first()

        # Парсинг цены. Три разных тега.
        price_normal = response.css(
            'span.buying-price-val-number::text').extract_first()
        price_new = response.css(
            'span.buying-pricenew-val-number::text').extract_first()
        price_old = response.css(
            'span.buying-priceold-val-number::text').extract_first()

        # Возвращаем один item
        yield BookparserItem(url=book_url,
                             title=book_title,
                             authors=book_authors,
                             price_normal=price_normal,
                             price_new=price_new,
                             price_old=price_old,
                             rating=book_rating)
示例#3
0
    def book_parse(self, response: HtmlResponse):

        # Получение url-а и парсинг информации о книжке
        book_url = response.url
        book_title = response.css(
            'h1.item-detail__title::text').extract_first()
        book_authors = response.xpath(
            '//div[@class="item-tab__chars-list"]/div[1]/span[2]/a/text()'
        ).extract()
        book_rating = response.css(
            'span.rating__rate-value::text').extract_first()

        # Парсинг цены. Два разных тега
        price_normal = response.css(
            'div.item-actions__price b::text').extract_first()
        price_old = response.css(
            'div.item-actions__price-old::text').extract_first()

        # Возвращаем один item
        yield BookparserItem(url=book_url,
                             title=book_title,
                             authors=book_authors,
                             price_normal=price_normal,
                             price_new=None,
                             price_old=price_old,
                             rating=book_rating)
示例#4
0
 def book_parser(self, response: HtmlResponse):
     url = response.url
     name = response.xpath("//h1/text()").extract_first()
     author = response.xpath("//a[@data-event-label='author']/text()").extract()
     main_price = response.xpath("//span[@class='buying-priceold-val-number']/text()").extract_first()
     sel_price = response.xpath("//span[@class='buying-pricenew-val-number']/text()").extract_first()
     rate = response.xpath("//div[@id='rate']/text()").extract_first()
     yield BookparserItem(name=name,url=url,author=author,main_price=main_price,sel_price=sel_price,rate=rate)
示例#5
0
 def book_parse(self, response: HtmlResponse):
     link = response.url
     title = response.xpath("//div[@id='product-info']/@data-name").extract_first()
     authors = response.xpath("//div[@class='authors'][1]/a/text()").extract()
     price = response.xpath("//span[@class='buying-priceold-val-number']/text()").extract_first()
     discount_price = response.xpath("//span[@class='buying-pricenew-val-number']/text()").extract_first()
     rate = response.xpath("//div[@id='rate']/text()").extract_first()
     yield BookparserItem(link=link, title=title, authors=authors, price=price, discount_price=discount_price, rate=rate)
 def book_parse(self, response: HtmlResponse):
     # собираем информацию о цене
     general = response.xpath(
         '//div[@class="item-actions__prices"]//text()').extract()
     # вся информация о товаре находится в теле скрипта
     main = response.xpath(
         '//div[@class="item-detail__wrapper js-product-card"]/following::script[1]/text()'
     ).extract_first()
     yield BookparserItem(general=general, main=main)
 def process_item(self, response=HtmlResponse):
     item = BookparserItem()
     item['url'] = response.url
     item['title'] = response.xpath('//h1/text()').get()
     item['author'] = response.xpath('//div[contains(@class, "authors")]//a//text()').get()
     item['price'] = response.xpath('//span[contains(@class, "buying-priceold-val-number")]//text()').get()
     item['discount_price'] = response.xpath('//span[contains(@class, "buying-pricenew-val-number")]//text()').get()
     item['rating'] = response.xpath('//div[contains(@id, "rate")]//text()').get()
     yield item
示例#8
0
 def book_parce(self, response: HtmlResponse):
     link_book = response.url
     name_book = response.xpath('//h1/text()').extract_first()
     author_book = response.xpath('//div[@class="item-tab__chars-item"][1]//span/a/text()').extract()
     price = response.xpath('//div[@class="item-actions__price"]//b/text()').extract_first()
     priceold = response.xpath('//div[@class="item-actions__price-old"]/text()').extract_first()
     rating_book = response.css('div.rating__value-box span.rating__rate-value::text').extract_first()
     yield BookparserItem(link=link_book, name=name_book, author=author_book, price_basic=priceold, price_discount=price,
                          rating=rating_book)
示例#9
0
    def book_parse(self, response: HtmlResponse):
        link_book = response.xpath("//meta[@property='og:url']/@content").extract_first()
        name_book = response.xpath('//h1/text()').extract_first()
        author_book = response.xpath('//div[@class="authors"][1]//text()').extract()
        main_price_book = response.xpath('//span[@class="buying-priceold-val-number"]/text()').extract_first()
        sale_price_book = response.xpath('//span[@class="buying-pricenew-val-number"]/text()').extract_first()
        rating_book = response.xpath('//div[@id="rate"]/text()').extract_first()

        # Передача параметров в items
        yield BookparserItem(link=link_book, name=name_book, author=author_book, main_price=main_price_book,
                             sale_price=sale_price_book, rating=rating_book)
示例#10
0
 def book_parce(self, response:HtmlResponse):
     link_book = response.url
     name_book = response.xpath('//h1/text()').extract_first()
     author_book = response.xpath("//div[@class='authors']//a[@data-event-label='author']/text()").extract()
     price = response.xpath('//div[@class="buying-price"]//span[@class="buying-price-val"]//text()').extract_first()
     if price != None:
         priceold = price
         pricenew = None
     else:
         priceold = response.xpath('//div[@class="buying-priceold"]//span[@class="buying-priceold-val-number"]/text()').extract()
         pricenew = response.xpath('//div[@class="buying-pricenew"]//div[@class="buying-pricenew-val"]//text()').extract()
     rating_book = response.xpath('//div[@id="rate"]/text()').extract_first()
     yield BookparserItem(link=link_book, name=name_book, author=author_book, price_basic=priceold, price_discount=pricenew, rating=rating_book)
示例#11
0
 def process_item(self, response=HtmlResponse):
     item = BookparserItem()
     item['url'] = response.url
     item['title'] = response.xpath('//h1/text()').get()
     item['author'] = response.xpath(
         '//span[contains(@class, "__chars-value")]//a//text()').get()
     item['price'] = response.xpath(
         '//div[contains(@class, "__price-old")]//text()').get()
     item['discount_price'] = response.xpath(
         '//div[contains(@class, "__price")]//b//text()').get()
     item['rating'] = response.xpath(
         '//div[contains(@class, "__rate-value")]//text()').get()
     yield item
示例#12
0
    def book_parse(self, response: HtmlResponse):
        name = response.xpath("//div[@class='item-detail__informations-box']/h1[@class='item-detail__title']/text()").extract_first()
        link = response.url
        authors = response.xpath("//a[contains(@data-link,'author')]/text()").extract_first()
        publisher = response.xpath(
            "//div[@class='item-tab']//a[contains(@href,'brand')]/text()").extract_first()
        price = response.xpath(
            "//div[@class='item-actions__buttons-box']//div[@class='item-actions__price']/b/text()").extract_first()
        currency = response.xpath(
            "//div[@class='item-actions__buttons-box']//div[@class='item-actions__price']/text()").extract_first()
        rate = response.xpath("//div[@class='item-detail__information-item']//span[@class='rating__rate-value']/text()").extract_first()
        product_id = response.xpath("//a[@class='button _block _fill _d _item js-toggle js-product-card-button js-add2basket']/@data-product").extract_first()

        yield BookparserItem(name=name, link=link, authors=authors, publisher=publisher, price=price, currency=currency, rate=rate,
                             product_id=product_id)
示例#13
0
 def book_parse(self, response: HtmlResponse):
     link = response.url
     name = response.css('h1::text').extract_first()
     author = response.css(
         'div.authors a.analytics-click-js::text').extract_first()
     price = response.css('div.buying span.buying-priceold-val-number::text'
                          ).extract_first()
     sale_price = response.css(
         'div.buying span.buying-pricenew-val-number::text').extract_first(
         )
     rating = response.xpath('//div[@id="rate"]/text()').extract_first()
     # print(link, name, author, price, sale_price, rating)
     yield BookparserItem(name=name,
                          author=author,
                          price=price,
                          sale_price=sale_price,
                          rating=rating)
示例#14
0
 def book_parse(self, response: HtmlResponse):
     name = response.xpath("//h1/text()").extract_first()
     url = response.url
     authors = response.xpath("//div[@class='authors']/a/text()").extract()
     main_price = response.xpath(
         "//span[@class='buying-priceold-val-number']/text()"
     ).extract_first()
     sale_price = response.xpath(
         "//span[@class='buying-pricenew-val-number']/text()"
     ).extract_first()
     rating = response.xpath("//div[@id='rate']/text()").extract_first()
     yield BookparserItem(name=name,
                          url=url,
                          authors=authors,
                          main_price=main_price,
                          sale_price=sale_price,
                          rating=rating)
示例#15
0
 def book_parse(self, response: HtmlResponse):
     link = response.url
     title = response.xpath(
         "//h1[@class='item-detail__title']/text()").extract_first()
     authors = response.xpath(
         "//span[@class='item-tab__chars-key' and text()='Автор:']/..//a/text()"
     ).extract()
     price = response.xpath(
         "//div[@class='item-actions__price-old']/text()").extract_first()
     discount_price = response.xpath(
         "//div[@class='item-actions__price']/b/text()").extract_first()
     rate = response.xpath(
         "//span[@class='rating__rate-value']/text()").extract_first()
     yield BookparserItem(link=link,
                          title=title,
                          authors=authors,
                          price=price,
                          discount_price=discount_price,
                          rate=rate)
示例#16
0
 def book_parse(self, response: HtmlResponse):
     link = response.url
     name = response.css('h1::text').extract_first()
     author = response.xpath(
         '//div[@class="item-tab__chars-item"]//a[@class="item-tab__chars-link"]/text()'
     ).extract_first()
     price = response.css(
         'div.item-actions__price-old::text').extract_first()
     sale_price = response.css(
         'div.item-actions__prices div.item-actions__price b::text'
     ).extract_first()
     rating = response.css(
         'div.rating span.rating__rate-value::text').extract_first()
     # print(link, name, author, price, sale_price, rating)
     yield BookparserItem(name=name,
                          author=author,
                          price=price,
                          sale_price=sale_price,
                          rating=rating)
 def book_parse(self, response: HtmlResponse):
     book_link = response.url
     name = response.xpath(
         "//h1[@class='item-detail__title']/text()").extract_first()
     author = response.xpath(
         "//a[@class='item-tab__chars-link js-data-link']/text()"
     ).extract_first()
     wo_bonus_price = response.xpath(
         "//div[@class='item-actions__price-old']/text()").extract_first()
     bonus_price = no_bonus_price = response.xpath(
         "//div[@class='item-actions__price']/text()").extract_first()
     rate = response.xpath(
         "//div[@class='live-lib__rate-value']/text()").extract_first()
     yield BookparserItem(name=name,
                          author=author,
                          book_link=book_link,
                          wo_bonus_price=wo_bonus_price,
                          bonus_price=bonus_price,
                          no_bonus_price=no_bonus_price,
                          rate=rate)
示例#18
0
    def book_parse(self, response: HtmlResponse):
        link_book = response.xpath(
            "//meta[@property='og:url']/@content").extract_first()
        name_book = response.xpath('//h1/text()').extract_first()
        author_book = response.xpath(
            '//div[@class="item-tab__chars-item"][1]//a//text()').extract()
        main_price_book = response.xpath(
            '//div[@class="item-actions__price-old"]/text()').extract_first()
        sale_price_book = response.xpath(
            '//div[@class="item-actions__price"]/b/text()').extract_first()
        rating_book = response.xpath(
            '//span[@class="rating__rate-value"]/text()').extract_first()

        # Передача параметров в items
        yield BookparserItem(link=link_book,
                             name=name_book,
                             author=author_book,
                             main_price=main_price_book,
                             sale_price=sale_price_book,
                             rating=rating_book)
 def book_parse(self, response: HtmlResponse):
     book_link = response.url
     name = response.xpath(
         "//div[@id='product-title']/h1/text()").extract_first()
     author = response.xpath(
         "//div[@class='authors']/a/text()").extract_first()
     wo_bonus_price = response.xpath(
         "//span[@class='buying-priceold-val-number']/text()"
     ).extract_first()
     bonus_price = response.xpath(
         "//span[@class='buying-pricenew-val-number']/text()"
     ).extract_first()
     no_bonus_price = response.xpath(
         "//span[@class='buying-price-val-number']/text()").extract_first()
     rate = response.xpath("//div[@id='rate']/text()").extract_first()
     yield BookparserItem(name=name,
                          author=author,
                          book_link=book_link,
                          wo_bonus_price=wo_bonus_price,
                          bonus_price=bonus_price,
                          no_bonus_price=no_bonus_price,
                          rate=rate)
示例#20
0
 def book_parse(self, response: HtmlResponse):
     name = response.xpath("//h1/text()").extract_first()
     link = response.url
     base_price = response.xpath(
         "//div[@class = 'item-actions__price-old']/text()").extract_first(
         )
     authors = response.xpath(
         "//a[@class = 'item-tab__chars-link js-data-link']/text()"
     ).extract()
     price = response.xpath(
         "//div[@class='item-actions__price']/b/text()").extract_first()
     rating = response.xpath(
         "//span[@class = 'rating__rate-value']/text()").extract_first()
     currency = response.xpath(
         "//div[@class='item-actions__price']/text()").extract_first()
     yield BookparserItem(name=name,
                          link=link,
                          price=price,
                          authors=authors,
                          base_price=base_price,
                          rating=rating,
                          currency=currency)
示例#21
0
 def book_parse(self, response: HtmlResponse):
     title = response.xpath(
         '//div[@id="product-left-column"]//@data-name').extract_first()
     authors = response.xpath(
         '//a[@data-event-label="author"]/text()').extract()
     book_url = response.url
     cover = response.xpath(
         '//div[@id="product-image"]//img/@data-src').extract_first()
     publisher = response.xpath(
         '//div[@class="publisher"]//text()').extract()
     # информация о ценах
     general = response.xpath(
         '//div[@class="buying"]/div[contains(@class, "buying-price")]//text()'
     ).extract()
     ISBN = response.xpath('//div[@class="isbn"]/text()').extract_first()
     rate = response.xpath('//div[@id="rate"]/text()').extract_first()
     yield BookparserItem(title=title,
                          authors=authors,
                          book_url=book_url,
                          general=general,
                          cover_image=cover,
                          publisher=publisher,
                          ISBN=ISBN,
                          rate=rate)