Пример #1
0
 def parse_src(self, response):
     self.item = Item()
     content = ""
     for con in response.xpath('//p[@class="Normal"]/text()').extract():
         if "video" in con or "Video" in con or con == "\n":
             content = content
         else:
             content += con
     self.item["content"] = content
     self.item["description"] = response.xpath(
         '//h2[@class="description"]/text()').extract()
     self.item["title"] = response.xpath(
         '//h1[@class="title_news_detail mb10"]/text()').extract()
     if content != "":
         yield self.item
Пример #2
0
 def parse_src(self, response):
     self.item = Item()
     self.item["time"] = response.xpath(
         '//span[@class="fr fon7 mr2 tt-capitalize"]/text()').extract()
     self.item["title"] = response.xpath(
         '//h1[@class="fon31 mgb15"]/text()').extract()
     self.item["description"] = response.xpath(
         '//h2[@class="fon33 mt1 sapo"]/text()').extract()
     content = ""
     for con in response.xpath(
             '//div[@id="divNewsContent"]/p/text()').extract():
         content += con
     self.item["content"] = content
     if content != "":
         yield self.item
Пример #3
0
 def parse_src(self, response):
     self.item = Item()
     self.item["time"] = response.xpath(
         '//li[@class="the-article-publish cms-date"]/text()').extract()
     self.item["title"] = response.xpath(
         '//h1[@class="the-article-title cms-title"]/text()').extract()
     self.item["description"] = response.xpath(
         '//p[@class="the-article-summary cms-desc"]/text()').extract()
     content = ""
     for con in response.xpath(
             '//div[@class="the-article-body cms-body"]/p/text()').extract(
             ):
         content += con
     self.item["content"] = content
     if content != "":
         yield self.item
Пример #4
0
 def parse_src(self, response):
     self.item = Item()
     self.item["time"] = response.xpath(
         '//div[@class="ArticleDateTime"]/span[@class="ArticleDate"]/text()'
     ).extract()
     self.item["title"] = response.xpath(
         '//div[@class="ArticleDetail"]/h1[@class="title"]/text()').extract(
         )
     self.item["description"] = response.xpath(
         '//div[@id="ArticleContent"]/p/strong/text()').extract()
     content = ""
     for con in response.xpath(
             '//div[@id="ArticleContent"]/p/text()').extract():
         content += con
     self.item["content"] = content
     if content != "":
         yield self.item
Пример #5
0
    def parse_ad(self, ad_html):
        selector = Selector(text=ad_html)
        source_id = selector.css("div.ad-options::attr(ad-id)").get()
        title_elem = selector.css("a.adName")
        title = title_elem.css("::text").get().strip()
        price = selector.css("span.adPrice::text").get().strip()
        url = title_elem.css("::attr(href)").get().strip()
        url = f"{self.base_url}{url}"
        image = selector.css("div.adImgWrapper img::attr(src)").get().strip()
        image = f"https:{image}"

        item = Item()
        item["site"] = self.site
        item["source_id"] = source_id
        item["url"] = url
        item["title"] = title
        item["price"] = price
        item["image"] = image

        return item
Пример #6
0
    def parse_ad(self, ad_html):
        selector = Selector(text=ad_html)
        title_elem = selector.css(".offer-title a")
        title = title_elem.css("::text").get().strip()
        url = title_elem.css("::attr(href)").get().strip()
        url = f"{self.base_url}{url}"
        source_id = url.rsplit("/", 2)[1]
        image = selector.css(
            "picture.advert-picture img::attr(data-src)").get().strip()
        price = selector.css("p.offer-price span::text").get().strip()

        item = Item()
        item["site"] = self.site
        item["source_id"] = source_id
        item["url"] = url
        item["title"] = title
        item["price"] = price
        item["image"] = image

        return item
Пример #7
0
def parse_item(response):
    item = Item()
    item['name'] = response.xpath(
        '//div[@class="prod-spec-title"]/h1/text()').extract()
    item['brand'] = response.xpath(
        '//div[@class="prod-spec-title"]/h2/a/text()').extract()
    item['description'] = response.xpath(
        '//p[@id="product_description"]/text()').extract()
    item['price'] = response.xpath(
        '//div[@class="prod-spec-title"]/p/text()').extract()
    item['url'] = response.url

    item['original_price'] = response.xpath(
        '//div[@class="prod-spec-title"]/p/span/text()').extract()

    if len(item['original_price']) == 0:
        item['original_price'] = item['price']

    images = [
        response.xpath(
            '//div[@class="detail-photo left"]/div[@class="big-photo left"]/a/img/@src'
        ).extract()
    ]

    item['image_urls'] = images[0] + response.xpath(
        '//div[@class="detail-photo left"]/div[@class="small-photo left"]/ul/li/a/img/@src'
    ).extract()
    item['source'] = 'berrybenka'

    string_size_xpath = '//div[@class="filter-size filter-content"]/ul/li/div/label/text()'
    size_xpath = response.xpath(string_size_xpath).extract()

    #item['sizes'] = parse_sizes()
    item['sizes'] = size_xpath

    return item
Пример #8
0
def parse_item(response):
    item = Item()
    item['name'] = response.xpath(
        '//div[@class="product__title fsm"]/text()').extract()
    item['brand'] = response.xpath(
        '//div[@class="js-prd-brand product__brand"]/a/text()').extract()
    item['description'] = response.xpath(
        '//div[@class="product__title fsm"]/text()').extract()
    item['url'] = response.url

    item['original_price'] = response.xpath(
        '//span[@id="js-price"]/text()').extract()

    item['price'] = response.xpath(
        '//span[@class="js-detail_updateSku_lowestPrice"]/text()').extract()

    if len(item['price']) == 0:
        # no discount
        item['price'] = item['original_price']

    image_urls = response.xpath(
        '//ul[@class="prd-moreImagesList ui-listItemBorder ui-listLight swiper-wrapper"]/li/a/img/@src'
    ).extract()
    #item['image_urls'] = image_urls
    item['image_urls'] = parse_images_urls(image_urls)

    item['source'] = 'zalora'

    string_size_xpath = '//option[(contains(@data-attribute,"size")) and not(contains(@disabled, "disabled"))]'
    string_size_xpath += '/text()'
    size_xpath = response.xpath(string_size_xpath).extract()

    #item['sizes'] = parse_sizes()
    item['sizes'] = size_xpath

    return item
Пример #9
0
    def parse_ad(self, response):
        content = response.css("div.uk-container.body")
        title = content.css("div.table-cell-left > h1::text").get().strip()
        image = content.css("ul#image-gallery li img::attr(src)").get().strip()

        # old way
        # price = content.css('div.price-item-discount::text').extract()
        # if price:
        #     price = next((p.strip() for p in price if p.strip()))
        # else:
        #     price = content.css('div.price-item::text').get().strip()

        # new way
        price = content.css("span.priceClassified::text").get().strip()

        item = Item()
        item["site"] = self.site
        item["source_id"] = response.meta["ad_id"]
        item["url"] = response.url
        item["title"] = title
        item["price"] = price
        item["image"] = image

        return item