Exemplo n.º 1
0
 def parse_product(self, response):
     loader = ProductLoader(item=Product(), response=response)
     loader.add_xpath('url', '//link[@rel="canonical"]/@href')
     loader.add_xpath('name', '//span[@id="productName"]//text()')
     loader.add_xpath('sku', '//span[@id="productEAN"]/text()[last()]')
     loader.add_xpath('category', '//div[@id="breadcrumb"]/ul/li[position()>1]/a/span/text()')
     loader.add_css('image_url', '.productImageItem ::attr(href)')
     brand = response.css('.brand ::text').extract_first()
     if brand != "null":
         loader.add_value('brand', brand)
     item = loader.load_item()
     
     p = re.compile('stockMatrix = (.+?);', re.DOTALL)
     data = response.xpath('//script/text()').re(p)
     options = json.loads(data[0])
     for option in options:
         loader = ProductLoader(item=Product(), response=response)
         loader.add_value(None, item)
         opt_iter = iter(option)
         opt_name = ''
         for attribute in response.css('.skuAttribute'):
             opt_name = opt_iter.next()
             loader.add_value('name', opt_name)
         colour_url = response.xpath('//input[@class="colourImageUrl"][@name="%s"]/@value' %opt_name).extract_first()
         if colour_url:
             loader.replace_value('image_url', 'http://media.littlewoods.com/i/littlewoods/%s?$1064x1416_standard$' %colour_url)
         loader.replace_value('identifier', opt_iter.next())
         stock = opt_iter.next()
         if stock.startswith('Unavailable'):
             continue
         loader.replace_value('stock', int('Out of stock' not in stock))
         loader.replace_value('price', opt_iter.next())
         yield loader.load_item()
Exemplo n.º 2
0
 def parse_product(self, response):
     loader = ProductLoader(response=response, item=Product())
     condition = response.css('.condition span::text').extract_first()
     if 'Used' not in condition.title():
         return
     identifier = response.url.split('/')[-1]
     loader.add_value('identifier', identifier)
     loader.add_xpath('sku', '//script/text()', re='skuCode": *"(.+)?"')
     categories = response.css('.f-breadcrumb a::text').extract()[1:-1]
     loader.add_xpath('brand',
                      '//script/text()',
                      re='manufacturerName": *"(.+)?"')
     loader.add_value('category', categories)
     loader.add_xpath('name',
                      '//script/text()',
                      re='fullProductName": *"(.+)?"')
     loader.add_xpath('price',
                      '//script/text()',
                      re='currentPrice": *([.\d]+)?')
     loader.add_value('url', response.url)
     loader.add_css('image_url', '.f-slideshow img::attr(src)')
     metadata = WexMeta()
     metadata['condition'] = condition
     product = loader.load_item()
     product['metadata'] = metadata
     yield product
Exemplo n.º 3
0
 def parse_product(self, response):
     data = response.xpath('//script/text()').re('{\\\\"Variants.+}')[0]
     data = json.loads(data.replace('\\"', '"'))
     variants = data['Variants']
     for variant in variants:
         url = response.urljoin(variant['ProductPLU'])
         yield Request(make_variant_url(url), self.parse_product)
     
     loader = ProductLoader(item=Product(), response=response)
     identifier = response.xpath('//input[@id="ProductPLU"]/@value').extract_first()
     loader.add_value('identifier', identifier)
     loader.add_value('sku', identifier)
     loader.add_value('url', response.url)
     loader.add_xpath('name', '(//h1[@itemprop="name"]/text())[1]')
     metadata = {}
     for i in xrange(3):
         variant_name = data['Variant%dSelected' %(i+1)]
         if variant_name and variant_name != 'N/A':
             loader.add_value('name', variant_name)
             metadata[data['Variant%dHeader' %(i+1)]] = variant_name
             if 'size' in variant_name.lower():
                 metadata['size'] = variant_name[5:].strip()
     price = response.css('.price-value .currency::text').extract()
     loader.add_value('price', price.pop())
     category = response.css('.breadcrumb a::text').extract()
     loader.add_value('category', category[1:])
     loader.add_css('image_url', '.product-image::attr(src)')
     loader.add_xpath('brand', '//meta[@itemprop="brand"]/@content')
     loader.add_value('shipping_cost',  '7.95')
     stock = response.css('.product-stock-widget::attr(ng-init)').re('AvailableOnline: (\w+)')[0]
     if stock != 'true':
         loader.add_value('stock', 0)
     item = loader.load_item()
     item['metadata'] = metadata
     yield item
Exemplo n.º 4
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     identifier = response.xpath('//script/text()').re(
         'ecomm_prodid: *(\d+),')
     loader.add_value('identifier', identifier)
     loader.add_value('url', response.url)
     name = ' '.join(''.join(
         response.xpath('//h1//text()').extract()).split())
     loader.add_value('name', name)
     loader.add_css('price', 'span.GBP::attr(content)')
     loader.add_xpath('sku', '//span[@id="js-product-reference"]/@data-ref')
     category = response.xpath(
         '//div[contains(@class, "breadcrumb")]//a/span/text()').extract(
         )[1:]
     loader.add_value('category', category)
     image_url = response.xpath(
         '//a[@class="product__image__zoom-link"]/@href').extract()
     image_url = response.urljoin(image_url[0]) if image_url else ''
     loader.add_value('image_url', image_url)
     brand = response.xpath(
         '//span[@class="product-content__title--brand"]/text()').extract()
     brand = brand[0].strip() if brand else ''
     loader.add_value('brand', brand)
     stock = response.xpath(
         '//span[@id="js-product-in-stock-default" and contains(text(), "in Stock")]'
     )
     if not stock:
         loader.add_value('stock', 0)
     yield loader.load_item()
Exemplo n.º 5
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     identifier = response.css(
         'input.baseProductCode::attr(value)').extract_first()
     loader.add_value('identifier', identifier)
     loader.add_value('sku', identifier)
     loader.add_value('url', response.url)
     name = response.css(
         'h1.pdp-headline span.pdp-description::text').extract_first()
     loader.add_value('name', name)
     loader.add_css('price', 'p.pdp-price::text')
     category = response.css('div#breadcrumb a::text').extract()[:-1]
     category = [cat.strip() for cat in category]
     if 'Designer' in category:
         category.remove('Designer')
     loader.add_value('category', category)
     image_url = response.xpath('//@data-main-img-url').extract_first()
     if image_url:
         loader.add_value('image_url', response.urljoin(image_url))
     brand = response.css('h1.pdp-headline a::text').extract_first()
     loader.add_value('brand', brand)
     stock = response.xpath('//@data-stl-json').re(
         '%s.+?stockLevelCode":"(.+?)"' % identifier)
     if stock and 'inStock' not in stock:
         loader.add_value('stock', 0)
     yield loader.load_item()
Exemplo n.º 6
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     loader.add_value('url', response.url)
     loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
     sku = response.xpath('//div[@itemprop="description"]/div/div[last()]/text()').extract_first()
     loader.add_value('identifier', sku)
     loader.add_value('sku', sku)
     category = response.css('.breadcrumbs a::text').extract()[1:]
     category += response.css('.breadcrumbs li:last-of-type::text').extract()
     loader.add_value('category', category)
     image_url = response.css('img.gallery-main-image::attr(src)').extract_first()
     if image_url:
         loader.add_value('image_url', response.urljoin(image_url))
     if not response.css('.in-stock'):
         loader.add_value('stock', 0)       
     item = loader.load_item()
     
     options = response.css('table.product-table tbody tr')
     for option in options:
         loader = ProductLoader(Product(), selector=option)
         loader.add_value(None, item)
         sku = option.css('span.product-code::text').re('\((.+)\)')[0]
         name = option.css('span.product-name::text').extract_first()
         identifier = '-'.join((sku, hashlib.md5(item['name'] + name).hexdigest()))
         loader.replace_value('identifier', identifier)
         loader.replace_value('sku', sku)
         loader.add_css('price', 'span.product-price-rrp')
         price = option.css('td.product-price').xpath('text()[last()]').extract_first()
         loader.replace_value('price', price)
         if name not in item['name']:
             loader.add_value('name', name)
         yield loader.load_item()
         
Exemplo n.º 7
0
    def parse_product(self, response):
        brand = response.meta['brand']
        brands = response.meta['brands']

        loader = ProductLoader(Product(), response=response)

        sku_searched = response.meta['sku']
        sku = response.css('.part-number strong::text').extract_first()
        if not sku or sku.strip().upper() != sku_searched:
            return

        product_brand = response.xpath(
            '//tr[th[contains(text(), "Brand")]]/td[contains(@class, "data")]/text()'
        ).extract()[0]
        if product_brand.upper().strip() not in brands:
            return

        loader.add_value('identifier', sku)
        loader.add_value('url', response.url)
        loader.add_css('name', '.product-name .h1::text')
        loader.add_xpath(
            'price', '//span[contains(@id, "price-excluding-tax")]/text()')
        loader.add_value('sku', sku)
        category = response.css('.breadcrumbs a::text').extract()[1:]
        loader.add_value('category', category)
        loader.add_css('image_url', 'img#image-main::attr(src)')
        loader.add_value('brand', brand)
        if response.css('.availability .out-of-stock'):
            loader.add_value('stock', 0)
        item = loader.load_item()
        if item['price'] < 50:
            item['shipping_cost'] = 5
        yield item
Exemplo n.º 8
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     loader.add_xpath('identifier', '//input[@name="productid"]/@value')
     loader.add_value('url', response.url)
     loader.add_css('name', '.descr::text')
     loader.add_css('price', 'span.currency::text')
     loader.add_value('sku', response.meta['sku'])
     image_url = response.css(
         'img#product_thumbnail::attr(src)').extract_first()
     if image_url:
         loader.add_value('image_url', response.urljoin(image_url))
     loader.add_value('brand', response.meta['brand'])
     stock = response.css('.quantity script::text').re(
         'product_avail = (\d+);')[0]
     loader.add_value('stock', stock)
     item = loader.load_item()
     if stock == '0':
         yield item
         return
     request = FormRequest.from_response(response,
                                         formname='orderform',
                                         meta={
                                             'cookiejar':
                                             item['identifier'],
                                             'item': Product(item)
                                         },
                                         cookies=self.cookies,
                                         callback=self.parse_shipping,
                                         dont_filter=True)
     yield request
Exemplo n.º 9
0
    def parse_product(self, response):
        data = SpiderSchema(response).get_product()

        options = response.xpath(
            '//div[@class="summary-container"]/table//tr[not(th)]')
        for option in options:
            loader = ProductLoader(item=Product(), response=response)
            opt_name = option.xpath(
                './/td[contains(@class,"optionscol")]/text()')[0].extract()
            opt_name = u'{} - {}'.format(data['name'], opt_name)
            opt_identifier = option.xpath('@class')[0].extract().split(' ')[0]
            opt_price = option.xpath('@data-price').extract()

            loader.add_value('name', opt_name)
            loader.add_value('url', response.url)
            loader.add_value('sku', data['sku'])
            loader.add_value('identifier', opt_identifier)
            if 'image' in data:
                loader.add_value('image_url', data['image'])
            else:
                loader.add_xpath('image_url',
                                 '//meta[@itemprop="og:image"]/@content')
            stock = option.xpath('@class').re('instock')
            if not stock:
                loader.add_value('stock', 0)
            loader.add_value('price', opt_price)
            loader.add_css('category',
                           'div.product_meta span.posted_in a::text')

            yield loader.load_item()
Exemplo n.º 10
0
    def parse_product(self, response):
        loader = ProductLoader(Product(), response=response)
        identifier = re.search('\d\d\d\d', response.url).group(0)
        loader.add_value('identifier', identifier)
        loader.add_value('sku', identifier)
        loader.add_value('url', response.url)
        loader.add_xpath('name', '//header[@class="prodCat"]/h1/text()')
        category = response.css('.bread li a::text').extract()[1:]
        category += response.css('.bread li:last-child::text').extract()
        loader.add_value('category', category)
        image_url = response.css('.detimg a::attr(href)').extract_first()
        if image_url:
            loader.add_value('image_url', response.urljoin(image_url))
        item = loader.load_item()

        options = response.css('.tbl').xpath('.//*[@class="tr"]')
        if not options:
            item['price'] = 0
            yield item
            return
        for option in options:
            loader = ProductLoader(Product(), selector=option)
            loader.add_value(None, item)
            identifier = option.xpath('.//input/@name').extract_first()
            loader.replace_value('identifier', identifier)
            loader.replace_value('sku', identifier)
            loader.replace_css('price', '.tc-price .pr-now::text')
            loader.add_css('price', '.tc-price::text')
            loader.replace_css('name', '.tc-title::text')
            yield loader.load_item()
Exemplo n.º 11
0
    def parse_product(self, response):
        loader = ProductLoader(item=Product(), response=response)
        loader.add_value('category', response.meta['category'])
        loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
        option_name = response.css('.label-select-container').xpath(
            './/option[@selected]/text()').extract()
        loader.add_value('name', option_name)
        item_identifier = response.xpath(
            '//input[@id="item_details_item_id"]/@value').extract_first()
        if not item_identifier:
            self.logger.warning('No identifier on %s' % response.url)
        identifier = item_identifier + '-' + response.xpath(
            '//input[@id="item_details_product_id"]/@value').extract_first()
        loader.add_value('identifier', identifier)
        loader.add_value('url', response.url)
        loader.add_xpath('price', '//meta[@itemprop="price"]/@content')
        sku = []
        sku.append(
            response.css('.order-code').xpath(
                'text()').extract_first().strip())
        sku.extend(response.css('.order-code span::text').extract())
        loader.add_value('sku', ' '.join(sku))
        loader.add_xpath('image_url', '//img[@id="imageMain"]/@src')
        loader.add_css('brand', '.sku_kc_brand_id_ ::text')
        if loader.get_output_value('price') < 50:
            loader.add_value('shipping_cost', '2.99')
        stock = response.xpath(
            '//meta[@itemprop="availability"]/@content').extract_first()
        stock = stock.replace(' ', '').lower()
        if stock not in self.instock:
            loader.add_value('stock', 0)
            if stock not in self.outofstock:
                self.logger.warning('Undefined stock status for %s' %
                                    response.url)
        item = loader.load_item()
        if item['identifier'] not in self.identifiers:
            self.identifiers.add(item['identifier'])
            yield item

        attributes = []
        options = []
        for attribute in response.css('.label-select-container select'):
            attribute_name = attribute.xpath('@id').extract_first()
            attribute_name = attribute_name.replace('_%s' % item_identifier,
                                                    '')
            attributes.append(attribute_name)
            options.append([])
            for value in attribute.xpath('option/@value').extract():
                options[-1].append(value)
        for variant in itertools.product(*options):
            url = 'http://www.kiddicare.com/ajax.get_exact_product.php?instart_disable_injection=true&item_id=%s' % item_identifier
            for n, option in enumerate(variant):
                url += '&attributes[%s]=%s' % (attributes[n], option)
            url = url.replace('+', '%2B')
            meta = response.meta
            meta['sku'] = sku
            meta['attributes'] = attributes
            yield Request(url, self.parse_option, meta=meta)
Exemplo n.º 12
0
    def parse_product(self, response):
        try:
            pdata = SpiderSchema(response).get_product()
        except:
            self.logger.error('No structured product data on %s' %response.url)
            return
        options = None
        js_line = ''
        for l in response.body.split('\n'):
            if 'variants:' in l:
                js_line = l
                break

        if js_line:
            options = demjson.decode(re.search(r'variants:(.*};)?', js_line).groups()[0][:-2].strip())

        product_loader = ProductLoader(item=Product(), response=response)
        sku = response.css('span.pd_productVariant::text').extract_first()
        product_loader.add_css('sku', 'span.pd_productVariant::text')
        product_loader.add_xpath('identifier', '//input[@name="productId"]/@value')
        product_loader.add_value('url', response.url)
        try:
            product_loader.add_value('name', pdata['name'])
        except KeyError:
            return
        category = response.xpath('//*[@id="breadcrumb"]//a/text()').extract()[1:-1]
        product_loader.add_value('category', category)
        img = response.xpath('//meta[@property="og:image"]/@content').extract()
        if img:
            product_loader.add_value('image_url', response.urljoin(img.pop()))
        price = response.xpath('//p[@class="productOfferPrice"]/text()').extract()[0]
        product_loader.add_value('price', price)
        if product_loader.get_output_value('price') < 45:
            product_loader.add_value('shipping_cost', '3.5')
        brand = response.xpath('//*[@id="brandHeader"]/a/@href').extract()
        if brand:
            brand = brand[0].replace('/en/', '')[:-1]
            if '/' not in brand:
                product_loader.add_value('brand', brand)
        stock = response.xpath('//link[@itemprop="availability"]/@href').extract_first()
        if stock != 'http://schema.org/InStock':
            product_loader.add_value('stock', 0)
        product = product_loader.load_item()

        yield product

        if options:
            for k, val in options.items():
                option_name = k.replace('_', ' ')
                option_product = Product(product)
                option_product['name'] = product['name'] + ' ' + option_name
                option_product['sku'] = val['productCode']
                option_product['identifier'] = val['variantId']
                option_product['price'] = extract_price(val['nowPrice'])
                yield option_product
Exemplo n.º 13
0
    def parse_product(self, response):

        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)
        try:
            name = response.css(
                '.content-fiche-produit h1::text').extract_first().strip()
        except:
            retry = int(response.meta.get('retry', 0))
            if retry < 10:
                retry += 1
                new_meta = response.meta.copy()
                new_meta['retry'] = retry
                yield Request(response.url,
                              meta=new_meta,
                              callback=self.parse_product,
                              dont_filter=True)
            return

        category = response.css('#breadcrumb a::text').extract()
        if category:
            category = category[-2]
        else:
            category = ""

        sku = response.css('.content-fiche-produit p::text').re(
            u'Référence (\d+)')

        pid = response.css('.content-fiche-produit p::text').re(u'Ref (\d+)')

        price = response.css('.new-price ::text').extract_first()

        stock = bool(
            response.xpath(
                '//p[contains(@class, "in-stock")]/text()').extract())
        if not stock:
            stock = 'DISPONIBLE' in ''.join(
                response.xpath('//p[contains(@class, "availability")]//text()'
                               ).extract()).upper()

        if price:
            loader = ProductLoader(response=response, item=Product())
            loader.add_value('url', urljoin(base_url, response.url))
            loader.add_value('name', name)
            loader.add_css('image_url', '#image ::attr(src)')
            loader.add_value('price', extract_price2uk(price))
            loader.add_value('category', category)
            loader.add_value('sku', sku)
            loader.add_value('identifier', pid)
            loader.add_value('brand', response.meta.get("brand", ""))
            #loader.add_value('stock', int(stock))
            yield loader.load_item()
        else:
            self.errors.append("No price set for url: '%s'" %
                               urljoin(base_url, response.url))
Exemplo n.º 14
0
    def parse_product(self, response):
        url = response.url
        l = ProductLoader(item=Product(), response=response)

        # name
        l.add_css('name', '.pro-des::text')

        # price
        price = '.'.join(
            response.xpath('//div[@class="price-strike"]/div/span//text()').re(
                '\d+'))
        l.add_value('price', price)

        # sku
        l.add_xpath('sku', '//div[@class="short-desc"]/span//text()')

        # identifier
        productid = response.xpath(
            '//input[@id="selectedProductIdd"]/@value').extract()[0]
        priceid = response.xpath('//input[@id="priceId"]/@value').extract()[0]
        identifier = '-'.join((productid, priceid))
        l.add_value('identifier', identifier)

        # category
        l.add_xpath(
            'category',
            "//div[@class='bread']//li[position() > 1]//text()[not(contains(., '>'))]"
        )

        # product image
        l.add_xpath('image_url', "//meta[@property='og:image']/@content")
        # url
        l.add_value('url', url)
        # brand
        l.add_xpath('brand', '//div[@class="added-item"]/h2/text()')
        # shipping
        shipping_cost = 9.9 if l.get_output_value('price') < 200 else 0
        l.add_value('shipping_cost', shipping_cost)
        product = l.load_item()

        if not price:
            storeid = response.xpath(
                '//input[@id="storeId"]/@value').extract()[0]
            url = 'http://www.courts.com.sg/home/addtocart.html?isAdd=true&newProduct=true&productId=%s&selectedCurrency=SGD&quantity=1&cartId=na&addQuantity=true&newQuantity=1&shippingOption=&shippingCity=&deliveryOption=&shippingDate=&cityId=&title=&inventorysensible=yes&priceId=%s&storeId=%s'
            yield Request(url % (productid, priceid, storeid),
                          callback=self.parse_price_from_cart,
                          meta={
                              'product': Product(product),
                              'dont_merge_cookies': True
                          })
        else:
            yield product
Exemplo n.º 15
0
    def parse_product(self, response):
        if response.url.endswith('page-not-found.page'):
            return
        formdata = {}
        for inp in response.xpath('//form[@id="variant-form"]//input'):
            formdata[inp.xpath('@name').extract_first()] = inp.xpath(
                '@value').extract_first()
        if not formdata:
            self.logger.warning('No data on %s' % response.url)
            return
        del formdata[None]
        options = response.css('.vContainer .variantDataElement')
        for option in options:
            formdata[option.xpath('@name').extract_first()] = option.xpath(
                '@data-variant-value').extract_first()
            r = FormRequest.from_response(
                response,
                formxpath='//form[@id="variant-form"]',
                formdata=formdata,
                callback=self.parse_product)
            yield r

        loader = ProductLoader(item=Product(), response=response)
        sku = response.xpath('//input[@id="skuIdVal"]/@value').extract_first()
        if sku != url_query_parameter(response.url, 'skuId'):
            url = add_or_replace_parameter(url_query_cleaner(response.url),
                                           'skuId', sku)
            yield Request(url, self.parse_product)
            return
        loader.add_value('identifier', sku)
        loader.add_value('url', response.url)
        loader.add_xpath('name', '//h1[@id="productLabel"]//text()')
        #loader.add_css('name', '.selected .variantDisplayName_title ::text')
        loader.add_css('price', '.current-price ::text')
        loader.add_value('sku', sku)
        category = response.xpath(
            '//div[@id="breadcrumb"]//li//span[@itemprop="title"]/text()'
        ).extract()
        loader.add_value('category', category[-4:-1])
        image_url = response.xpath(
            '//img[@itemprop="image"]/@src').extract_first()
        if image_url:
            loader.add_value('image_url', response.urljoin(image_url))
        loader.add_xpath(
            'brand', '//div[@itemprop="brand"]//span[@itemprop="name"]/text()')
        loader.add_value('shipping_cost', 3)
        #if not response.css('.stock-tag.in-stock') and not response.xpath('//link[@href="http://schema.org/InStock"]') and not response.css('.available-from'):
        if not response.css('.add-to-basket'):
            loader.add_value('stock', 0)
        if loader.get_output_value('price'):
            yield loader.load_item()
Exemplo n.º 16
0
    def parse_product(self, response):
        if 'aspxerrorpath' in response.url:
            yield Request(response.request.meta['redirect_urls'][0],
                          self.parse_product,
                          dont_filter=True)
            return
        loader = ProductLoader(Product(), response=response)
        identifier = response.xpath('//@data-feefo-vendor-ref').extract_first()
        loader.add_value('identifier', identifier)
        loader.add_value('url', response.url)
        loader.add_css('name', 'header.page-title h1::text')
        loader.add_css('price', 'header.product-sidebar__price h2::text')
        loader.add_value('sku', identifier)
        category = response.css('.breadcrumb a::text').extract()
        loader.add_value('category', category[1:-1])
        image_url = response.css(
            '.product-gallery__main-image img::attr(src)').extract_first()
        if image_url:
            loader.add_value('image_url', response.urljoin(image_url))
        stock = response.css('.product-sidebar__stock::text').extract_first()
        if not 'Order Now' in stock.title():
            loader.add_value('stock', 0)
        item = loader.load_item()
        if 'Discontinued' in stock.title():
            item['metadata'] = {"Discontinued?": "Yes"}

        option_types = response.css('.product-sidebar select')
        if not option_types:
            yield item
            return

        options = []
        for option_type in option_types:
            options.append(option_type.xpath('option[@value!="Select"]'))
        variants = itertools.product(*options)

        for variant in variants:
            loader = ProductLoader(Product(), response=response)
            loader.add_value(None, item)
            identifier = item['identifier']
            for option in variant:
                loader.add_value('name', option.xpath('text()').extract())
                identifier += '-' + option.xpath('@value').extract_first()
            loader.replace_value('identifier', identifier)
            loader.replace_value('sku', identifier)
            option_item = loader.load_item()
            option_item['metadata'] = item.get('metadata', {})
            yield option_item
Exemplo n.º 17
0
    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)

        loader = ProductLoader(item=Product(), response=response)
        loader.add_xpath('name', '//input[@id="product-name"]/@value')
        loader.add_value('url', response.url)
        loader.add_css('brand', 'span.b-brand_title::text')
        categories = response.css('div.b-breadcrumbs a::text').extract()[2:]
        loader.add_value('category', categories)

        loader.add_xpath('sku', '//meta[@itemprop="model"]/@content')
        identifier = response.xpath('//input[@name="pid"]/@value').extract()
        if not identifier:
            log.msg('PRODUCT WHIOUT IDENTIFIER: ' + response.url)
            return

        loader.add_value('identifier', identifier[0])
        image_url = response.xpath('//link[@rel="image_src"]/@href').extract(
        ) or response.xpath('//meta[@itemprop="image"]/@content').extract()
        if image_url:
            loader.add_value('image_url', image_url[0])

        price = response.xpath('//meta[@itemprop="price"]/@content').extract()
        loader.add_value('price', price)

        out_of_stock = response.css('div.b-availability').xpath(
            './/span[@data-availability="NOT_AVAILABLE"]')
        if out_of_stock:
            loader.add_value('stock', '0')

        product = loader.load_item()

        promo = response.xpath(
            '//div[@class="b-product_promo"]/div/span/text()').extract()

        metadata = ToyMonitorMeta()
        metadata['reviews'] = []
        if promo:
            metadata['promotions'] = promo[0].strip()
        product['metadata'] = metadata

        reviews_url = 'http://mark.reevoo.com/reevoomark/en-GB/product.html?page=1&sku=%s&tab=reviews&trkref=MOT'

        yield Request(reviews_url % identifier[0],
                      callback=self.parse_review_page,
                      meta={'product': product})
Exemplo n.º 18
0
    def parse_products(self, response):
        category = response.xpath(
            '//div[@id="breadcrumb"]//span[@itemprop="name"]/text()').extract(
            )[2:]
        for product in response.css('.productList .product'):
            loader = ProductLoader(item=Product(), selector=product)
            loader.add_xpath('identifier', '@id', re='product-(.+)')
            loader.add_xpath('url', './/@href')
            brand = product.xpath('.//h3/em/text()').extract_first()
            name = product.xpath('.//h3/span/text()').extract_first()
            if name[0].islower():
                loader.add_value('name', brand)
            loader.add_value('name', name)
            loader.add_css('price', '.productPrice dd:last-child::text')
            loader.add_xpath('sku', '@id', re='product-(.+)')
            loader.add_value('category', category)
            loader.add_css('image_url', '.productMainImage img::attr(src)')
            image_url = loader.get_output_value('image_url')
            promotion = None
            if image_url and '3for2' in image_url:
                promotion = '3 for 2'
            loader.add_value('brand', brand)
            loader.add_value('shipping_cost', '3.99')
            stock = product.css('.productStock dd').extract_first().title()
            if 'In Stock' not in stock and 'Low Stock' not in stock:
                loader.add_value('stock', 0)
            product = loader.load_item()

            metadata = ToyMonitorMeta()
            metadata['reviews'] = []
            if promotion:
                metadata['promotions'] = promotion
            product['metadata'] = metadata

            prod_id = re.findall("/(\d+).prd", product['url'])[0]
            reviews_url = "http://api.bazaarvoice.com/data/batch.json?passkey=35w0b6mavcfmefkhv3fccjwcc&apiversion=5.5&displaycode=17045-en_gb&resource.q0=reviews&filter.q0=isratingsonly%3Aeq%3Afalse&filter.q0=productid%3Aeq%3A" + prod_id + "&filter.q0=contentlocale%3Aeq%3Aen_GB%2Cen_IE%2Cen_US&sort.q0=isfeatured%3Adesc&stats.q0=reviews&filteredstats.q0=reviews&include.q0=authors%2Cproducts%2Ccomments&filter_reviews.q0=contentlocale%3Aeq%3Aen_GB%2Cen_IE%2Cen_US&filter_reviewcomments.q0=contentlocale%3Aeq%3Aen_GB%2Cen_IE%2Cen_US&filter_comments.q0=contentlocale%3Aeq%3Aen_GB%2Cen_IE%2Cen_US&limit.q0=100&offset.q0=0&limit_comments.q0=3&callback=bv_1111_57043"

            request = Request(reviews_url,
                              meta={
                                  'product': product,
                                  'offset': 0
                              },
                              callback=self.parse_reviews)
            yield request
Exemplo n.º 19
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     loader.add_value('url', response.url)
     category = response.css('div.treemenu a::text').extract()[1:]
     loader.add_value('category', category)
     loader.add_css('image_url', 'div#mainimage_holder img::attr(data-zoom-image)')
     identifier = response.xpath('//input[@name="fproduct_id"]/@value').extract_first()
     loader.add_value('identifier', identifier)
     loader.add_value('sku', identifier)
     loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
     loader.add_css('price', 'li.shelfBnormalprice::text')
     if loader.get_output_value('price') < 100:
         loader.add_value('shipping_cost', 10)
     item = loader.load_item()
     
     attributes = response.css('table.variabletable tr')
     attributes = [attr for attr in attributes if attr.xpath('td[1]/text()').extract_first() in self.options_to_extract]
     options = []
     for attr in attributes:
         options.append(attr.xpath('td/select/option[not(contains(.,"Please Select"))]'))
     variants = itertools.product(*options)
     if not variants:
         yield item
         return
     
     for variant in variants:
         loader = ProductLoader(Product(), response=response)
         loader.add_value(None, item)
         identifier = item['identifier']
         price = item['price']
         for option in variant:
             identifier += '-' + option.xpath('@value').extract_first()
             name_and_price = option.xpath('text()').extract_first().split('(Add')
             loader.add_value('name', name_and_price[0])
             if len(name_and_price) >1:
                 price += extract_price(name_and_price[1])
         loader.replace_value('identifier', identifier)
         loader.replace_value('sku', identifier)
         loader.replace_value('price', price)
         if price >= 100:
             loader.replace_value('shipping_cost', 0)
         yield loader.load_item()
Exemplo n.º 20
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     identifier = response.css('span#thisstkcode::text').extract_first()
     if not identifier:
         retries = response.meta.get('retries', 0)
         if retries > 9:
             self.logger.warning('No identifier found on %s' % response.url)
         else:
             self.logger.debug('Retry %s to get identifier' % response.url)
         meta = response.meta
         meta['retries'] = retries + 1
         yield response.request.replace('dont_filter=True', meta=meta)
     loader.add_value('identifier', identifier)
     loader.add_value('sku', identifier)
     loader.add_value('url', response.url)
     loader.add_xpath('name', '//h1/text()')
     price = response.css('span.prodPrice').xpath(
         './/span[@itemprop="price"]/text()').extract_first()
     loader.add_value('price', price)
     category = response.css('.breadcrumbs span::text').extract()[1:]
     loader.add_value('category', category)
     loader.add_css('image_url', '.main-product-photo::attr(href)')
     loader.add_css('brand', 'span#thisbrand::text')
     loader.add_css('stock', 'input#data-stock-qty::attr(value)')
     yield loader.load_item()
Exemplo n.º 21
0
    def parse_product(self, response):
        options = response.css('.pg_select')
        if options:
            selected_option = options.xpath('option[@selected]')
            if not selected_option:
                for url in options.xpath('.//@data-href').extract():
                    yield Request(response.urljoin(url_query_cleaner(url)),
                                  self.parse_product)
                return

        loader = ProductLoader(Product(), response=response)
        sku = response.xpath(
            '//div[@id="content"]//input[@name="sku"]/@value').extract_first()
        loader.add_value('identifier', sku)
        loader.add_value('sku', sku)
        loader.add_value('url', response.url)
        loader.add_xpath('name', '//strong[@itemprop="name"]/text()')
        loader.add_css('price', 'div.show h5 ::text')
        loader.add_css('price', '.nowPrice ::text')
        loader.add_css('price', '.typicalPrice h5 ::text')
        category = response.xpath('//input[@name="productDetailsDTO"]/@value'
                                  ).re('"category":"(.+?)"')
        if category:
            loader.add_value('category', category[0].split('/'))
        image_url = response.css(
            'ul#galleryImages a::attr(href)').extract_first()
        if image_url:
            loader.add_value('image_url', response.urljoin(image_url))
        loader.add_xpath(
            'brand',
            '//span[@itemprop="brand"]//span[@itemprop="name"]/text()')
        if response.css('div#content p.oos'):
            loader.add_value('stock', 0)
        yield loader.load_item()
Exemplo n.º 22
0
    def parse_product(self, response):
        loader = ProductLoader(Product(), response=response)
        identifier = response.xpath(
            '//input[@name="productId"]/@value').extract_first()
        if not identifier:
            loader.add_value('stock', 0)
            identifier = response.xpath('//text()').re('productId=(.+?)&')
        loader.add_value('identifier', identifier)
        loader.add_value('url', url_query_cleaner(response.url))
        loader.add_css('name', 'div.productTitleDescriptionContainer h1::text')
        loader.add_css('price', 'p.pricePerUnit::text')
        loader.add_css('sku', 'p.itemCode::text', re='Item code:(.+)')
        category = response.xpath(
            '//ul[@id="breadcrumbNavList"]//a/span/text()').extract()
        if 'Home' in category:
            category.remove('Home')
        loader.add_value('category', category)
        image_url = response.css(
            'img#productImageID::attr(src)').extract_first()
        if image_url:
            loader.add_value('image_url', response.urljoin(image_url))
        item = loader.load_item()
        item['metadata'] = {'reviews': []}

        review_id = response.xpath('//text()').re_first("productId: '(.+?)'")
        reviews_url = 'http://sainsburysgrocery.ugc.bazaarvoice.com/8076-en_gb/%s/reviews.djs?format=embeddedhtml' % review_id
        yield Request(reviews_url,
                      callback=self.parse_review_page,
                      meta={'item': item})
Exemplo n.º 23
0
 def parse_product(self, response):
     if 'contact-lenses' in response.url:
         for item in self.parse_lenses(response):
             yield item
         return
     loader = ProductLoader(item=Product(), response=response)
     loader.add_xpath('identifier', '//input[@name="SKU"]/@value')
     loader.add_xpath('url', '//link[@rel="canonical"]/@href')
     loader.add_xpath('name',
                      '//ul[@id="Brand"]/li[position()>1]//text()',
                      re='.+')
     loader.add_css('price', '.itemPrice ::text')
     loader.add_xpath('sku', '//span[@itemprop="sku"]/text()')
     category = response.css('.breadcrumb span::text').extract()
     loader.add_value('category', category[1:-1])
     image_url = response.css('.currentImage ::attr(src)').extract_first()
     loader.add_value('image_url', response.urljoin(image_url))
     loader.add_xpath('brand', '//ul[@id="Brand"]/li[2]/strong/text()')
     if response.xpath(
             '//div[@id="Order"]//link/@href[contains(., "OutOfStock")]'):
         loader.add_value('stock', 0)
     yield loader.load_item()
Exemplo n.º 24
0
 def parse_product(self, response):
     if 'login.cfm' in response.url:
         return
     loader = ProductLoader(Product(), response=response)
     identifier = response.url.split('/')[-1]
     identifier = hashlib.md5(identifier).hexdigest()
     loader.add_value('identifier', identifier)
     loader.add_value('url', response.url)
     loader.add_css('name', 'h1.content-title::text')
     loader.add_xpath('price', '//script/text()', re='price": "(.+)"')
     loader.add_xpath('sku', '//script/text()', re='sku": "(.+)"')
     category = response.xpath(
         '//ul[@id="breadcrumbs"][1]//a/text()').extract()[1:-1]
     loader.add_value('category', category)
     image_url = response.css(
         'div.product-detail-feature-img img::attr(src)').extract_first()
     if image_url:
         loader.add_value('image_url', response.urljoin(image_url))
     loader.add_xpath('brand', '//meta[@property="og:brand"]/@content')
     stock = response.xpath('//script/text()').re('availability": "(.+)"')
     if stock and stock[0] != 'In stock':
         loader.add_value('stock', 0)
     yield loader.load_item()
Exemplo n.º 25
0
    def parse_product(self, response):
        loader = ProductLoader(Product(), response=response)
        identifier = response.xpath(
            '//input[@name="product_id"]/@value').extract_first(
            ) or response.xpath(
                '//input[@name="add-to-cart"]/@value').extract_first()
        if not identifier:
            loader.add_value('stock', 0)
            identifier = response.xpath(
                '//div[@itemtype="http://schema.org/Product"]/@id').re_first(
                    'product-(\d+)')
        loader.add_value('identifier', identifier)
        loader.add_css('sku', 'span.sku::text')
        loader.add_value('url', response.url)
        loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
        loader.add_css('price', '.product-price-exvat span.amount::text')
        loader.add_css('price', '.product-price span.amount::text')
        category = response.xpath(
            '//span[@class="posted_in"][contains(., "Categories:")]/a/text()'
        ).extract_first()
        loader.add_value('category', category)
        loader.add_css('image_url',
                       'div.single-product-main-image a::attr(href)')
        brand = response.xpath(
            '//span[@class="posted_in"][contains(., "Brands:")]/a/text()'
        ).extract_first()
        loader.add_value('brand', brand)
        item = loader.load_item()

        variations = response.xpath(
            '//@data-product_variations').extract_first()
        if not variations:
            yield item
            return
        variations = json.loads(variations)
        for variant in variations:
            loader = ProductLoader(Product(), response=response)
            loader.add_value(None, item)
            loader.replace_value('identifier', variant['variation_id'])
            loader.replace_value('sku', variant['sku'])
            loader.replace_value('price', variant['display_price'])
            if variant['image_link']:
                loader.replace_value('image_url', variant['image_link'])
            loader.add_value('name', variant['attributes'].values())
            yield loader.load_item()
Exemplo n.º 26
0
 def parse_simple_product(self, response):
     loader = ProductLoader(Product(), response=response)
     loader.add_xpath('identifier', '//input[@name="product"]/@value')
     loader.add_value('url', response.url)
     loader.add_css('name', 'div.product-name h1::text')
     loader.add_css('price', 'li.bigPrice span.price::text')
     loader.add_xpath('sku', '//input[@name="product"]/@value')
     category = response.css('div.breadcrumbs a::text').extract()[1:]
     loader.add_value('category', category)
     loader.add_css('image_url', 'img#image::attr(src)')
     item = loader.load_item()
     yield item
Exemplo n.º 27
0
 def parse_lenses(self, response):
     loader = ProductLoader(item=Product(), response=response)
     identifier = response.xpath(
         '//input[@name="id"]/@value').extract_first()
     id_tipo = response.xpath(
         '//input[@name="id_tipo"]/@value').extract_first()
     if id_tipo:
         identifier += '-' + id_tipo
     loader.add_value('url', response.url)
     loader.add_css('name', '.nombre ::text')
     loader.add_xpath('price', '//*[@itemprop="price"]/text()')
     loader.add_css('category', '.breadcrumb a::text')
     loader.add_css('image_url', '.pag_producto img::attr(src)')
     loader.add_xpath('brand', '//meta[@itemprop="brand"]/@content')
     loader.add_value('identifier', identifier)
     loader.add_value('sku', identifier)
     yield loader.load_item()
Exemplo n.º 28
0
 def parse_product(self, response):
     loader = ProductLoader(item=Product(), response=response)
     css = '.nosto_product .%s ::text'
     loader.add_css('identifier', css % 'product_id')
     loader.add_css('sku', css % 'product_id')
     for field in ('url', 'name', 'image_url', 'brand'):
         loader.add_css(field, css % field)
     list_price = response.css(css % 'list_price').extract_first()
     sales_price = response.css(css % 'price').extract_first()
     loader.add_value('price', list_price)
     if 'InStock' not in response.css(css % 'availability').extract_first():
         loader.add_value('stock', 0)
     category = response.css(css % 'category').extract_first()
     loader.add_value('category', category.split('/')[-1])
     options_data = response.xpath('//script/text()').re(
         'Product.Config.({.+})')
     if not options_data:
         item = loader.load_item()
         if sales_price != list_price:
             item['metadata'] = {'SalesPrice': Decimal(sales_price)}
         yield item
         return
     options_data = json.loads(options_data[0])
     if len(options_data['attributes']) > 1:
         self.log('More than one options attributes found on %s' %
                  response.url)
         return
     price = loader.get_output_value('price')
     name = loader.get_output_value('name')
     sales_price = Decimal(sales_price)
     for option in options_data['attributes'].values()[0]['options']:
         new_price = sales_price + Decimal(option['price'])
         loader.replace_value('price', price + Decimal(option['oldPrice']))
         loader.replace_value('name', name + ' ' + option['label'])
         loader.replace_value('identifier', option['products'][0])
         loader.replace_value('sku', option['products'][0])
         loader.replace_xpath(
             'image_url', '//li[@id="simple-product-image-%s"]/a/@href' %
             option['products'][0])
         item = loader.load_item()
         if price + Decimal(option['oldPrice']) != new_price:
             item['metadata'] = {'SalesPrice': new_price}
         yield item
Exemplo n.º 29
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     identifier = re.search('(\d+)_BQ', response.url).group(1)
     loader.add_value('identifier', identifier)
     loader.add_value('url', response.url)
     loader.add_css('name', '.product-summary h1.product-title::text')
     loader.add_css('price', '.product-price::attr(content)')
     loader.add_css('sku', 'dl.product-code dd::text')
     loader.add_value('category', 'Bedroom')
     category = response.css('.breadcrumb').xpath(
         './/li/a/text()').extract()[-1]
     loader.add_value('category', category)
     image_url = response.css('.main-img img::attr(src)').extract_first()
     if image_url:
         loader.add_value('image_url', response.urljoin(image_url))
     loader.add_xpath('brand',
                      '//th[text()="Brand"]/following-sibling::td/text()')
     if loader.get_output_value('price') < 50:
         loader.add_value('shipping_cost', 5)
     yield loader.load_item()
Exemplo n.º 30
0
    def parse_product_options_config(self, response):
        options = response.xpath('//script/text()').re_first(
            'Product.Config.*?({.+})')
        loader = ProductLoader(Product(), response=response)
        loader.add_xpath('identifier', '//input[@name="product"]/@value')
        loader.add_value('url', response.url)
        loader.add_css('name', 'div.product-name h1::text')
        loader.add_css('price', 'li.bigPrice span.price::text')
        loader.add_xpath('sku', '//input[@name="product"]/@value')
        category = response.css('div.breadcrumbs a::text').extract()[1:]
        loader.add_value('category', category)
        loader.add_css('image_url', 'img#image::attr(src)')
        item = loader.load_item()
        if not options:
            yield item
            return

        options = json.loads(options)
        attributes = sorted(options['attributes'].values())
        products = [
            option['products'] for attr in attributes
            for option in attr['options']
        ]
        products = set(itertools.chain(*products))
        for product in products:
            loader = ProductLoader(Product(), response=response)
            loader.add_value(None, item)
            identifier = item['identifier'] + '-' + product
            loader.replace_value('identifier', identifier)
            loader.replace_value('sku', identifier)
            options = [
                option for attr in attributes for option in attr['options']
                if product in option['products']
            ]
            price = item['price']
            for option in options:
                loader.add_value('name', option['label'])
                price += Decimal(option['price'])
            loader.replace_value('price', price)
            yield loader.load_item()