Пример #1
0
    def book_parse(self, response: HtmlResponse):
        name = response.xpath('//h1/text()').extract_first()
        authors = response.xpath(
            "//span[contains(text(), 'Автор:')]/..//a/text()").extract()
        interpreter = None
        editor = None
        publisher = response.xpath(
            "//span[contains(text(), 'Издательство:')]/..//a/text()"
        ).extract_first()
        series = response.xpath(
            "//span[contains(text(), 'Серия:')]/..//a/text()").extract_first()
        isbn = response.xpath(
            "//input[contains(@class, 'isbn__code')]/@value").extract_first()
        pages = response.xpath(
            "//span[contains(text(), 'Количество страниц:')]/..//span[2]/text()"
        ).extract_first()
        price = response.xpath(
            "//div[@class='item-actions__price']/b/text()").extract_first()
        description = response.xpath(
            "//div[contains(@class, 'collapse-panel__panel')]/div[contains(@class, "
            "'collapse-panel__text')]//text()").extract()

        yield BooksparserItem(source=self.allowed_domains[0],
                              name=name,
                              authors=authors,
                              interpreter=interpreter,
                              editor=editor,
                              publisher=publisher,
                              series=series,
                              isbn=isbn,
                              pages=pages,
                              price=price,
                              description=description)
Пример #2
0
    def parse_book(self, response: HtmlResponse):

        href = response.url
        title = response.xpath("//h1/text()").extract_first()
        authors = response.xpath(
            "//a[@data-event-label='author']/text()").extract()
        rate = response.xpath("//div[@id='rate']/text()").extract_first()
        genre = response.xpath(
            "//div[@id='thermometer-books']/span/a/span/text()").extract()
        annotation = response.xpath("//h2[1]/../p/text()").extract_first()

        price = response.xpath(
            "//span[@class='buying-priceold-val-number']/text()"
        ).extract_first()
        if price is None:
            price = response.xpath(
                "//span[@class='buying-price-val-number']/text()"
            ).extract_first()

        special_price = response.xpath(
            "//span[@class='buying-pricenew-val-number']/text()"
        ).extract_first()

        yield BooksparserItem(href=href,
                              title=title,
                              authors=authors,
                              special_price=special_price,
                              price=price,
                              rate=rate,
                              genre=genre,
                              annotation=annotation)
Пример #3
0
 def handle_book_data(self, response: HtmlResponse):
     book_title = response.xpath(
         '//div[@class="prodtitle"]/h1/text()').extract_first()
     price = response.xpath(
         '//span[@class="buying-pricenew-val-number"]/text()'
     ).extract_first()
     if price is None:
         price = response.xpath(
             '//span[@class="buying-price-val-number"]/text()'
         ).extract_first()
     initial_price = response.xpath(
         '//span[@class="buying-priceold-val-number"]/text()'
     ).extract_first()
     if initial_price is None:
         initial_price = price
     link = response.request.url
     author = response.xpath(
         '//a[@data-event-label="author"]/text()').extract_first()
     rating = response.xpath('//div[@id="rate"]/text()').extract_first()
     yield BooksparserItem(book_title=book_title,
                           book_price=price,
                           book_initial_price=initial_price,
                           book_link=link,
                           book_author=author,
                           book_rating=rating)
Пример #4
0
 def handle_book_data(self, response: HtmlResponse):
     book_title = response.xpath('//h1[@class="item-detail__title"]/text()').extract_first()
     price = response.xpath('//div[@class="item-actions__price"]//b/text()').extract_first()
     initial_price = response.xpath('//div[@class="item-actions__price-old"]/text()').extract_first()
     if initial_price is None:
         initial_price = price
     link = response.request.url
     author = response.xpath('//a[@class="item-tab__chars-link js-data-link"]/text()').extract_first()
     rating = response.xpath('//span[@class="rating__rate-value"]/text()').extract_first()
     yield BooksparserItem(book_title=book_title, book_price=price, book_initial_price=initial_price,
                           book_link=link, book_author=author, book_rating=rating)
Пример #5
0
    def book_parse(self, response: HtmlResponse):

        name = response.css("h1::text").extract_first()
        link = response.url
        author = response.css(
            "div.authors a.analytics-click-js::text").extract_first()
        basic_price = response.css(
            "span.buying-priceold-val-number::text").extract_first()
        discount_price = response.css(
            "span.buying-pricenew-val-number::text").extract_first()
        rating = response.xpath("//div[@id='rate']/text()").extract_first()

        yield BooksparserItem(name=name,
                              link=link,
                              author=author,
                              basic_price=basic_price,
                              discount_price=discount_price,
                              rating=rating)
        print()
Пример #6
0
    def parse_book(self, response:HtmlResponse):

        href = response.url
        title = response.xpath("//h1/text()").extract_first()

        if response.xpath("//div[@class='item-tab__chars-list']/div[1]/span/text()").extract_first() == 'Автор:':
            authors = response.xpath("//div[@class='item-tab__chars-list']/div[1]//a/text()").extract()
        else:
            authors = None

        rate = response.xpath("//div[@class='rating']//span[@class='rating__rate-value']/text()").extract_first()
        genre = response.xpath("//div[@class='breadcrumbs__list']/div/a/text()").extract()
        annotation = response.xpath("//div[@class='text-block-d']/p/text()").extract()
        price = response.xpath("//div[@class='item-actions__price-old']/text()").extract_first()
        special_price = response.xpath("//div[@class='item-actions__price']/b/text()").extract_first()


        yield BooksparserItem(href=href, title=title, authors=authors,
                              special_price=special_price, price=price, rate=rate,
                              genre=genre, annotation=annotation)
Пример #7
0
    def book_parse(self, response: HtmlResponse):

        name = response.css("h1::text").extract_first()
        link = response.url
        author = response.css(
            "div.js-tab-switcher-item._active a.item-tab__chars-link::text"
        ).extract_first()
        basic_price = response.css(
            "div.item-actions__price-old::text").extract_first()
        discount_price = response.css(
            "div.item-actions__price b::text").extract_first()
        rating = response.css("span.rating__rate-value::text").extract_first()

        yield BooksparserItem(name=name,
                              link=link,
                              author=author,
                              basic_price=basic_price,
                              discount_price=discount_price,
                              rating=rating)
        print()
Пример #8
0
    def book_parse(self, response: HtmlResponse):
        name = response.xpath('//h1/text()').extract_first()
        authors = response.xpath(
            "//div[contains(text(), 'Автор:')]/a/text()").extract()
        interpreter = response.xpath(
            "//div[contains(text(), 'Переводчик:')]/a/text()").extract_first()
        editor = response.xpath(
            "//div[contains(text(), 'Редактор:')]/a/text()").extract_first()
        publisher = response.xpath(
            "//div[@class='publisher']/a/text()").extract_first()
        series = response.xpath(
            "//div[@class='series']/a/text()").extract_first()
        isbn = response.xpath("//div[@class='isbn']/text()").extract_first()
        pages = response.xpath("//div[@class='pages2']/text()").extract_first()
        price = response.xpath(
            "//span[@class='buying-pricenew-val-number']/text()"
        ).extract_first()
        description = response.xpath(
            "//div[@id='fullannotation']/p//text()").extract_first()
        if description is None:
            description = response.xpath(
                "//div[@id='product-about']/p//text()").extract_first()
            if description is None:
                description = response.xpath(
                    "//h2/..//*/p//text()").extract_first()

        yield BooksparserItem(source=self.allowed_domains[0],
                              name=name,
                              authors=authors,
                              interpreter=interpreter,
                              editor=editor,
                              publisher=publisher,
                              series=series,
                              isbn=isbn,
                              pages=pages,
                              price=price,
                              description=description)