Exemplo n.º 1
0
    def parse_products(self, response):
        next_page = response.xpath('//div[@class="pages"]//a[@class="next"]/@href').extract()
        if next_page:
            yield Request(response.urljoin(next_page[0]), meta=response.meta, callback=self.parse_products)

        products = response.xpath('//ul[contains(@class, "products-grid")]/li[contains(@class, "item")]')
        for product_xs in products:
            name = product_xs.xpath('div[@class="product-info"]/h2[@class="product-name"]/a/text()').extract()[0]
            url = product_xs.xpath('div[@class="product-info"]/h2[@class="product-name"]/a/@href').extract()[0]
            identifier = product_xs.xpath('div[@class="product-info"]/div[@class="actions"]//*[contains(@id, "product-price-")]/@id').re(r'(\d+)')[0]
            price = ''.join(product_xs.xpath('div[@class="product-info"]/div[@class="actions"]//*[contains(@id, "product-price-")]/span/text()').re(r'[\d\.,]+'))
            brand = product_xs.xpath('div[@class="product-info"]/p[@class="product-brand"]/img/@title').extract()
            image_url = product_xs.xpath('.//img[contains(@id, "product-collection-image-")]/@src').extract()
            out_stock = bool(product_xs.xpath('.//i[contains(@class, "icon-stock-outs")]').extract())
            try:
                sku = product_xs.xpath('div[@class="product-info"]/div[@class="product-sku"]/text()').extract()[0].strip()
            except:
                sku = '0'

            l = ProductLoader(item=Product(), response=response)
            if image_url:
                l.add_value('image_url', response.urljoin(image_url[0]))
            l.add_value('url', url)
            l.add_value('name', name)
            l.add_value('identifier', identifier)
            l.add_value('price', extract_price_eu(price))
            l.add_value('brand', brand)
            if sku != '0':
                l.add_value('sku', sku)
            if out_stock:
                l.add_value('stock', 0)

            product = l.load_item()

            product['metadata'] = SonaeMeta()

            if identifier in self.products_meta:
                prev_meta = self.products_meta[identifier]
                if prev_meta['sku']:
                    product['sku'] = prev_meta['sku']
                product['category'] = prev_meta['category']
            else:
                prev_meta = {}
            promo = response.meta.get('promo', False)
            promo_start = prev_meta.get('promo_start')
            promo_end = prev_meta.get('promo_end')
            today = datetime.datetime.now().strftime('%Y-%m-%d')

            product['metadata']['extraction_timestamp'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
            if promo:
                product['metadata']['promo_start'] = promo_start if promo_start and not promo_end else today
                product['metadata']['promo_end'] = ''
            elif promo_start:
                product['metadata']['promo_start'] = promo_start
                product['metadata']['promo_end'] = today if not promo_end else promo_end

            self._update_product_meta(product)
            self.collected_ids.add(product['identifier'])

            yield product
Exemplo n.º 2
0
    def extract_product(self, hxs):
        loader = ProductLoader(Product(), selector=hxs)
        url = hxs.xpath(
            './/a[@class="product_img_link"]/@href').extract()[0].split('?')[0]
        identifier = url.split('/')[3].split('-')[0]
        if identifier not in self.products:
            return

        price = hxs.xpath('.//span[@itemprop="price"]/text()').extract_first(
            '0')
        price = price.replace(' ', '').replace(',', '.')

        if self.meta_df is not None and not self.meta_df.empty and identifier in self.meta_df.index:
            prev_meta = self.meta_df.loc[identifier]
        else:
            prev_meta = {}
        promo = hxs.xpath('.//span[@class="promo-box"]')
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        metadata = SonaeMeta()
        metadata['extraction_timestamp'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M')
        if promo:
            metadata[
                'promo_start'] = promo_start if promo_start and not promo_end else today
            metadata['promo_end'] = ''
        else:
            if promo_start:
                metadata['promo_start'] = promo_start
                metadata['promo_end'] = today if not promo_end else promo_end
        loader.add_xpath('name', './/span[@class="list-name"]/text()')
        loader.add_value('identifier', identifier)
        loader.add_value('url', url)
        loader.add_value('price', price)
        sku = url.split('-')[-1].replace('.html', '')
        try:
            i_sku = int(sku)
            if len(str(sku)) > 10:
                sku = str(sku)
            else:
                sku = ''
        except ValueError:
            sku = ''

        loader.add_value('sku', sku)
        loader.add_xpath('image_url',
                         './/a[@class="product_img_link"]/img/@src')
        stock = hxs.xpath(
            './/span[@class="avail-label"]/text()').extract_first()
        if not stock:
            loader.add_value('stock', 0)
        loader.add_value('brand', self.products[identifier]['brand'])
        loader.add_value('category', self.products[identifier]['category'])
        item = loader.load_item()
        item['metadata'] = metadata
        return item
Exemplo n.º 3
0
    def parse_product(self, response):
        name = response.xpath('//div[@class="product-name"]/h1/text()').extract()[0]
        url = response.url
        identifier = re.findall(r'/id/(\d+)', response.url)[0]
        price = ''.join(re.findall(r'[\d\.,]+',
                                   response.xpath('//div[@class="product-essential"]//div[contains(@class, '
                                                  '"add-to-cart-wrapper")]//*[contains(@id, "product-price-")]/span/text()')
                                   .extract()[0]))
        brand = response.xpath('.//div[@class="product-brand"]//img/@title').extract()
        image_url = response.xpath('//img[@id="image-main"]/@src').extract()
        out_stock = bool(response.xpath('.//span[@class="shipping-run"]/i[contains(@class, "icon-stock-outs")]').extract())
        try:
            sku = response.xpath('//div[@class="product-sku"]//text()').extract()[0].strip()
        except:
            sku = ''

        l = ProductLoader(item=Product(), response=response)
        if image_url:
            l.add_value('image_url', response.urljoin(image_url[0]))
        l.add_value('url', url)
        l.add_value('name', name)
        l.add_value('identifier', identifier)
        l.add_value('price', extract_price_eu(price))
        l.add_value('brand', brand)
        if sku:
            l.add_value('sku', sku)
        if out_stock:
            l.add_value('stock', 0)

        product = l.load_item()

        product['metadata'] = SonaeMeta()

        if identifier in self.products_meta:
            prev_meta = self.products_meta[identifier]
            if prev_meta['sku']:
                product['sku'] = prev_meta['sku']
            product['category'] = prev_meta['category']
        else:
            prev_meta = {}
        promo = response.meta.get('promo', False)
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        product['metadata']['extraction_timestamp'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
        if promo:
            product['metadata']['promo_start'] = promo_start if promo_start and not promo_end else today
            product['metadata']['promo_end'] = ''
        elif promo_start:
            product['metadata']['promo_start'] = promo_start
            product['metadata']['promo_end'] = today if not promo_end else promo_end

        self._update_product_meta(product)
        self.collected_ids.add(product['identifier'])

        yield product
Exemplo n.º 4
0
    def get_product_from_cache(self, response, product_data):
        identifier = product_data['identifier']
        values = self.products[identifier]
        loader = ProductLoader(item=Product(), response=response)
        loader.add_value('identifier', identifier)
        loader.add_value('brand', values['brand'].decode('utf-8'))
        loader.add_value('sku', values['sku'].decode('utf-8'))
        loader.add_value('image_url', values['image_url'])
        loader.add_value('name', values['name'])
        category = self.products[identifier]['category'].split(' > ')

        loader.add_value('category', category)
        loader.add_value('dealer', 'Fnac')
        if product_data['shipping']:
            loader.add_value('shipping_cost', product_data['shipping'])

        loader.add_value('url', product_data['url'])
        loader.add_value('price', str(product_data['price']).replace('.', ','))

        product = Product(loader.load_item())

        product['metadata'] = SonaeMeta()
        product['metadata']['delivery_24_48'] = 'Yes'

        if product_data['exclusive_online']:
            product['metadata']['exclusive_online'] = 'Yes'

        promotion_price = product_data['promotion_price']
        if promotion_price:
            product['metadata']['promotion_price'] = str(
                promotion_price).replace(',', '.')

        if identifier in self.metadata_:
            prev_meta = self.metadata_[identifier]
        else:
            prev_meta = {}

        promo = promotion_price
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        product['metadata']['extraction_timestamp'] = datetime.datetime.now(
        ).strftime('%Y-%m-%d %H:%M')
        if promo:
            product['metadata'][
                'promo_start'] = promo_start if promo_start and not promo_end else today
            product['metadata']['promo_end'] = ''
        else:
            if promo_start:
                product['metadata']['promo_start'] = promo_start
                product['metadata'][
                    'promo_end'] = today if not promo_end else promo_end

        return product
Exemplo n.º 5
0
    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)
        product_loader = ProductLoader(item=Product(), selector=hxs)
        price = hxs.select('//*[@id="our_price_display"]/text()').extract()
        price = extract_price(price[0])
        product_loader.add_value('price', price)
        identifier = hxs.select(
            '//*[@id="product_page_product_id"]/@value').extract()[0]
        product_loader.add_value('identifier', identifier)
        name = hxs.select('//h1[@itemprop="name"]/text()').extract()[0]
        product_loader.add_value('name', name)
        product_loader.add_value('sku', identifier)
        image_url = hxs.select('//*[@id="bigpic"]/@src').extract()
        if image_url:
            product_loader.add_value('image_url',
                                     urljoin_rfc(base_url, image_url[0]))
        category = hxs.select(
            '//div[@class="breadcrumb clearfix"]//a[not(@class)]/text()'
        ).extract()
        product_loader.add_value('category', category)
        product_loader.add_value('url', response.url)
        stock = hxs.select('//*[@id="availability_value"]/text()').extract()
        if stock and stock[0] == u'Este produto não se encontra em stock':
            product_loader.add_value('stock', 0)
        product = product_loader.load_item()
        metadata = SonaeMeta()
        if self.meta_df is not None and not self.meta_df.empty and identifier in self.meta_df.index:
            prev_meta = self.meta_df.loc[identifier]
        else:
            prev_meta = {}
        promo = response.xpath(
            '//p[@id="reduction_amount" and not(contains(@style,"display:none"))]'
            '/span[@id="reduction_amount_display" and text()!=""]')
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        metadata['extraction_timestamp'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M')
        if promo:
            metadata[
                'promo_start'] = promo_start if promo_start and not promo_end else today
            metadata['promo_end'] = ''
        else:
            if promo_start:
                metadata['promo_start'] = promo_start
                metadata['promo_end'] = today if not promo_end else promo_end
        product['metadata'] = metadata
        yield product
Exemplo n.º 6
0
    def parse_product(self, response):
        if not response.xpath('//body[@id="product"]'
                              ) and not 'body id="product"' in response.body:
            return
        promo_dates = response.xpath(
            '//div[@class="pl_promoinfo_product_promo"]/span[@class="date"]/text()'
        ).extract()
        promo_start, promo_end = (None, None)
        try:
            promo_dates = [
                datetime.datetime.strptime(d, '%d-%m-%Y') for d in promo_dates
            ]
            promo_start, promo_end = promo_dates
        except ValueError:
            pass

        loader = ProductLoader(item=Product(), response=response)
        loader.add_xpath('identifier',
                         '//input[@id="product_page_product_id"]/@value')
        loader.add_value('url', response.url)
        loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
        price = response.xpath(
            '//span[@id="our_price_display"]/text()').extract_first()
        loader.add_value('price', price.replace(' ', ''))
        loader.add_xpath('sku', '//span[@itemprop="sku"]/text()')
        loader.add_xpath('sku',
                         '//script/text()',
                         re="productReference='(.+?)'")
        category = response.css('.navigation_page ::attr(title)').extract()
        main_category = response.meta.get('category')
        if not category or category[0].strip() != main_category:
            category = [main_category] + category
        loader.add_value('category', category)
        loader.add_xpath('image_url', '//img[@id="bigpic"]/@src')
        loader.add_xpath('brand', '//a[@itemprop="brand"]/span/text()')
        if not response.css('.primary_block .avail3'):
            loader.add_value('stock', 0)
        metadata = SonaeMeta()
        if promo_start and promo_end:
            metadata['promo_start'] = promo_start.strftime('%Y-%m-%d')
            metadata['promo_end'] = promo_end.strftime('%Y-%m-%d')
        metadata['extraction_timestamp'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M')
        item = loader.load_item()
        item['metadata'] = metadata
        yield item
Exemplo n.º 7
0
    def parse_product(self, response):
        description_field = response.xpath("//script[contains(text(), 'var dataLayer')]/text()").re('dataLayer = \[(.*)\];')[0]
        description_field = json.loads(description_field)

        name = description_field['productName']
        price = description_field['productPrice']
        brand = description_field['productBrand']
        categories = response.css('.breadcrumbs').xpath('li/a/text()').extract()
        ean = response.css('.w-product-details').xpath(".//li[span[contains(text(), 'EAN')]]/span[@class='details-value']").xpath('text()').extract_first()
        image_url = response.xpath("//img[@id='product-main-image']/@src").extract_first()
        identifier = description_field['productId']

        ref_code = description_field['productSKU']

        two_four_days = bool(response.css('.w-product__availability').xpath('.//p[contains(text(), "2 a 4")]'))

        l = ProductLoader(item=Product(), response=response)
        
        if image_url:
            l.add_value('image_url', response.urljoin(image_url))
        l.add_value('url', response.url)
        l.add_value('name', name)
        l.add_value('price', price)
        l.add_value('brand', brand)

        l.add_value('sku', ean)
        l.add_value('identifier', identifier)
        if identifier in self.products:
            categories = self.products.get(identifier, '').split(' > ')
        for category in categories:
            l.add_value('category', category.strip())
        product = l.load_item()

        product['metadata'] = SonaeMeta()
        product['metadata']['exclusive_online'] = 'No'
        if two_four_days:
            product['metadata']['delivery_48_96'] = 'Yes'
        else:
            product['metadata']['delivery_96_more'] = 'Yes'

        if ref_code:
            product['metadata']['ref_code'] = ref_code

        yield product
Exemplo n.º 8
0
    def parse_items(self, response):
        hxs = HtmlXPathSelector(response=response)
        description_field = hxs.select("//script[@src = 'http://media.flixfacts.com/js/loader.js']").extract()[0]
        name = hxs.select("//span[@itemprop = 'name']/text()").extract()[0].encode('ascii', 'ignore')
        price = hxs.select("//meta[@property = 'og:price:amount']/@content").extract()[0]
        identifier = re.findall(re.compile('data-flix-mpn="(.+?)"'), description_field)[0]
        try:
            sku = re.findall(re.compile('data-flix-ean="(\d*)"'), description_field)[0]
        except:
            sku = ""
        categories = hxs.select("//div[@class = 'breadcrumb']/a/span/text()").extract()[1:4]
        brand = hxs.select("//span[@itemprop = 'brand']/text()").extract()[0]
        stock = hxs.select("//section[@class = 'col3']").extract()[0]
        stock = 1 if not re.findall(re.compile('Out of stock'), stock) else 0
        try:
            image_url = hxs.select("//div[@id = 'currentView']//img[@itemprop = 'image']/@src").extract()[0]
        except:
            image_url = ""
        l = ProductLoader(item=Product(), response=response)
        l.add_value('image_url', image_url)
        l.add_value('url', response.url)
        l.add_value('name', name)
        l.add_value('price', price)
        l.add_value('stock', stock)

        for category in categories:
            l.add_value('category', categories)

        l.add_value('brand', brand)
        l.add_value('sku', sku)
        l.add_value('identifier', identifier)
        product = l.load_item()

        product['metadata'] = SonaeMeta()
        if hxs.select('//span[@class="unavailable" and contains(text()[2], "Collect in store")]'):
            product['metadata']['exclusive_online'] = 'Yes'

        yield product
Exemplo n.º 9
0
    def parse_offers_static_page(self, response):
        rows = response.css('#colsMP tr')
        if rows:
            rows = rows[1:]

        exclusive_online = False
        if response.meta.get('exclusive_online'):
            exclusive_online = True
        product_info = response.meta['product_info']
        base_identifier = product_info['base_identifier'].replace('mp', '')
        if not 'fcom' in base_identifier:
            base_identifier = 'fcom' + base_identifier

        self.seen.add(base_identifier.replace('fcom', ''))
        product_info = response.meta.get('product_info')
        for row in rows:
            if row.css('.fnacView'):
                self.log('Skipping Fnac direct product')
                continue
            status = row.css('td.gras').xpath('./text()').extract()
            if status and 'novo' not in status[0].lower():
                self.log('Skipping used product')
                continue

            price = row.css('.userPrice').xpath('./text()').extract()
            if not price:
                self.log('Price not found')
                continue
            else:
                price = price[0].replace(u'\xa0', '').strip()

            promotion_price = row.css('.oldPrice').xpath('./text()').extract()
            if promotion_price:
                promotion_price = extract_price_eu(promotion_price[0].replace(u'\xa0', '').strip())

            shipping_cost = row.css('.noir').xpath('./text()').extract()
            if shipping_cost:
                shipping_cost = extract_price_eu(shipping_cost[0].strip())

            dealer = row.css('.bleu_MP')
            if not dealer:
                self.log('Dealer not found')
                continue
            dealer_id = dealer.xpath('./a/@href').extract()[0].split('/')[-1]
            dealer_name = dealer.xpath('./a/strong/text()').extract()[0].strip()
        
            loader = ProductLoader(item=Product(), selector=row)
            identifier = base_identifier + '-' + dealer_id
            identifier = self.get_identifier(identifier)
            loader.add_value('identifier', identifier)
            loader.add_value('dealer', dealer_name)
            for c in ['name', 'category', 'brand', 'url', 'image_url', 'sku']:
                loader.add_value(c, product_info[c])
            loader.add_value('price', price)
            if shipping_cost:
                loader.add_value('shipping_cost', shipping_cost)

            product = loader.load_item()
            metadata = SonaeMeta()
            if exclusive_online:
                metadata['exclusive_online'] = 'Yes'

            metadata['delivery_24_48'] = 'Yes'

            if promotion_price:
                metadata['promotion_price'] = str(promotion_price)

            product['metadata'] = metadata
            if identifier in self.metadata_:
                prev_meta = self.metadata_[identifier]
            else:
                prev_meta = {}
            promo = promotion_price
            promo_start = prev_meta.get('promo_start')
            promo_end = prev_meta.get('promo_end')
            today = datetime.datetime.now().strftime('%Y-%m-%d')

            product['metadata']['extraction_timestamp'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
            if promo:
                product['metadata']['promo_start'] = promo_start if promo_start and not promo_end else today
                product['metadata']['promo_end'] = ''
            else:
                if promo_start:
                    product['metadata']['promo_start'] = promo_start
                    product['metadata']['promo_end'] = today if not promo_end else promo_end

            yield product
Exemplo n.º 10
0
    def parse_product(self, response):
        base_url = get_base_url(response)

        hxs = HtmlXPathSelector(response)

        for url in hxs.select(
                '//div[@class="stretch clearfix box"]/select/option/@value'
        ).extract():
            yield Request(urljoin_rfc(base_url, url),
                          callback=self.parse_product,
                          meta={'dont_redirect': True})
        loader = ProductLoader(item=Product(), selector=hxs)
        name = hxs.select(
            '//*[@id="centerC"]/h1/span[@itemprop="name"]/text()').extract()[0]
        loader.add_value('name', name)
        identifier = hxs.select(
            '//div[@class="pd-container-right"]//form[@class="addBasketItem"]//input[@name="productId"]/@value'
        ).extract()
        if not identifier:
            return
        loader.add_value('identifier', identifier[0])
        loader.add_value('url', response.url)
        price = hxs.select('//noscript/span/text()').extract()
        price = extract_price(price[0]) if price else '0'
        loader.add_value('price', price)
        stock = hxs.select('//*[@id="first3"]/p/span/text()').extract()
        stock = stock[0] if stock else ''
        categories = hxs.select(
            '//*[@id="infoblock"]/div/a/text()').extract()[1:]
        for category in categories:
            loader.add_value('category', category)
        brand = hxs.select('//div[@class="pd-brand box"]/a/img/@alt').extract()
        brand = brand[0] if brand else ''
        loader.add_value('brand', brand)
        image_url = hxs.select('//*[@id="showPic"]/@src').extract()
        image_url = urljoin_rfc(base_url, image_url[0]) if image_url else ''
        loader.add_value('image_url', image_url)
        product = loader.load_item()

        promotion_price = hxs.select(
            u'//p[contains(text(), "Preço Regular")]/strike/text()').re(
                r'[\d,.]+')
        metadata = SonaeMeta()
        metadata['exclusive_online'] = 'No'
        if promotion_price:
            metadata['promotion_price'] = promotion_price[0].replace(
                '.', '').replace(',', '.')
        metadata['stock'] = stock

        if self.meta_df is not None and not self.meta_df.empty and identifier[
                0] in self.meta_df.index:
            prev_meta = self.meta_df.loc[identifier[0]]
        else:
            prev_meta = {}
        promo = promotion_price
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        metadata['extraction_timestamp'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M')
        if promo:
            metadata[
                'promo_start'] = promo_start if promo_start and not promo_end else today
            metadata['promo_end'] = ''
        else:
            if promo_start:
                metadata['promo_start'] = promo_start
                metadata['promo_end'] = today if not promo_end else promo_end

        product['metadata'] = metadata

        shipping_pid = hxs.select(
            '//span[@id="shipmentDetails"]/@data-productid').extract()
        if shipping_pid:
            shipping_url = 'https://www.redcoon.pt/req/ajax/mod/ShopShipment/pid/' + shipping_pid[
                0]
            headers = {
                'X-Requested-With': 'XMLHttpRequest',
            }
            yield Request(shipping_url,
                          headers=headers,
                          callback=self.parse_shipping,
                          meta={'product': product})
        else:
            yield product
Exemplo n.º 11
0
    def parse_product(self, response):
        if response.url in self.old_urls:
            self.old_urls.remove(response.url)

        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)

        products = hxs.select(
            '//div[@id="searchItem"]//h3[@id="skuName"]/a/@href').extract()
        if products:
            for url in products:
                yield Request(urljoin_rfc(base_url, url),
                              callback=self.parse_product,
                              meta=response.meta)

        loader = ProductLoader(item=Product(), response=response)

        identifier = hxs.select(
            '//div[@itemprop="productID"]/text()').extract()
        identifier = identifier[0].strip().split(
            ' ')[-1].strip() if identifier else ''
        sku = identifier

        stock = hxs.select("//link[@itemprop='availability']/@href").extract()
        stock = stock[0] if stock else ''
        stock = 0 if 'OutOfStock' in stock else 1

        name = hxs.select("//h1[@itemprop='name']/text()").extract()
        name = name[0] if name else ''

        if not name:
            return

        categories = hxs.select(
            '//div[@id="skuBreadCrumbs"]//span[@itemprop="title"]/text()'
        ).extract()
        categories = list(set(categories))

        image_url = hxs.select('//img[@id="SkuPageMainImg"]/@src').extract()
        image_url = urljoin_rfc(base_url, image_url[0]) if image_url else ''

        brand = re.findall(re.compile("brandName\":\[\"(.+?)\"\]"),
                           response.body)
        brand = brand[0] if brand else ''

        price = hxs.select('//span[@itemprop="price"]/text()').extract()
        price = price[0].replace('.', '').replace(
            ',', '.').strip() if price else '0.00'

        loader.add_value('price', price)

        price = loader.get_output_value('price')
        if price:
            price = Decimal(price)
            if price <= 48.99:
                loader.add_value('shipping_cost', '3.00')

        loader.add_value('stock', stock)
        loader.add_value('brand', brand.decode('utf-8'))
        loader.add_value('url', response.url)
        loader.add_value('image_url', image_url)
        loader.add_value('identifier', identifier)
        loader.add_value('sku', sku)
        loader.add_value('name', name)
        for category in categories:
            loader.add_value('category', category)

        product = loader.load_item()

        metadata = SonaeMeta()
        promotional_price = hxs.select('//div[@id="SkuSaveStory"]//span[contains(@class, "strike") '
                                       'and contains(@class, "darkGray")]/text()') \
            .re(r'[\d,.]+')
        if promotional_price:
            metadata['promotion_price'] = promotional_price[0].replace(
                '.', '').replace(',', '.')

        if self.meta_df is not None and not self.meta_df.empty and identifier in self.meta_df.index:
            prev_meta = self.meta_df.loc[identifier]
        else:
            prev_meta = {}
        promo = promotional_price
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        metadata['extraction_timestamp'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M')
        if promo:
            metadata[
                'promo_start'] = promo_start if promo_start and not promo_end else today
            metadata['promo_end'] = ''
        else:
            if promo_start:
                metadata['promo_start'] = promo_start
                metadata['promo_end'] = today if not promo_end else promo_end

        metadata['delivery_24'] = 'Yes'
        product['metadata'] = metadata
        yield product
Exemplo n.º 12
0
    def parse_product(self, response):
        product_loader = ProductLoader(item=Product(), response=response)
        price = response.xpath(
            '//div[@class="product-info"]//span[@class="price-fixed"]/text()'
        ).extract()[0]
        price = extract_price(price)
        product_loader.add_value('price', price)
        identifier = response.xpath(
            '//input[@name="product_id"]/@value').extract()[0]
        product_loader.add_value('identifier', identifier + '-new')
        name = response.xpath(
            '//div[@class="product-info"]//h1/text()').extract_first()
        product_loader.add_value('name', name)

        sku = response.xpath(
            '//div[@class="description"]'
            '/span[contains(text(), "digo do produto")]'
            '/following-sibling::text()[1]').extract()[0].strip()
        product_loader.add_value('sku', sku)

        brand = response.xpath('//div[@class="description"]'
                               '/span[contains(text(), "Fabricantes")]'
                               '/following-sibling::a[1]/text()').extract()
        brand = brand[0].strip() if brand else ''
        product_loader.add_value('brand', brand)

        stock_text = response.xpath(
            '//div[@class="description"]'
            '/span[contains(text(), "Disponibilidade")]'
            '/following-sibling::text()[1]').extract()[0].strip()
        stock = u'Dispon\xedvel para Encomenda' in stock_text
        if not stock:
            product_loader.add_value('stock', 0)

        image_url = response.xpath(
            '//div[@class="product-info"]//div[contains(@class, "image")]/a/@href'
        ).extract()
        if image_url:
            product_loader.add_value('image_url', image_url[0])
        category = response.xpath(
            '//div[@class="breadcrumb"]/a/text()').extract()[1:-1]
        product_loader.add_value('category', category)
        product_loader.add_value('url', response.url)
        product = product_loader.load_item()
        product['metadata'] = SonaeMeta()

        lookup_id = identifier + '-new'
        if self.meta_df is not None and not self.meta_df.empty and lookup_id in self.meta_df.index:
            prev_meta = self.meta_df.loc[lookup_id]
        else:
            prev_meta = {}

        promo = response.meta.get('promo', False)
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        product['metadata']['extraction_timestamp'] = datetime.datetime.now(
        ).strftime('%Y-%m-%d %H:%M')
        if promo:
            product['metadata'][
                'promo_start'] = promo_start if promo_start and not promo_end else today
            product['metadata']['promo_end'] = ''
        else:
            if promo_start:
                product['metadata']['promo_start'] = promo_start
                product['metadata'][
                    'promo_end'] = today if not promo_end else promo_end

        yield product
Exemplo n.º 13
0
    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)
        product_loader = ProductLoader(item=Product(), selector=hxs)
        price = response.xpath(
            u'//tr[th[contains(text(), "Preço Campanha")]]/td/span/text()'
        ).extract()
        if not price:
            price = response.xpath(
                u'//tr[th[contains(text(), "Preço")]]/td/span/text()').extract(
                )
        price = extract_price(price[0])
        product_loader.add_value('price', price)

        identifier = response.xpath(
            '//input[@name="products_id"]/@value').extract()[0]
        product_loader.add_value('identifier', identifier)
        name = response.xpath(
            '//div[@id="my_header"]//h2/text()').extract()[0].strip()
        product_loader.add_value('name', name)
        sku = response.xpath('//span[@class="smallText"]/text()').re(
            'EAN\[(.*)\]')
        sku = sku[0] if sku else ''
        product_loader.add_value('sku', sku)
        image_url = response.xpath('//a[@rel="fancybox"]/img/@src').extract()
        if image_url:
            product_loader.add_value('image_url',
                                     response.urljoin(image_url[0]))
        category = response.xpath(
            '//div[@id="my_header"]//a/text()').extract()[-3:]
        product_loader.add_value('category', category)
        product_loader.add_value('url', response.url)
        product_loader.add_xpath('brand', '//div/@data-product-manufacture')

        metadata = SonaeMeta()
        promo = response.xpath(
            '//div[@class="discount_block"]/span[@class="discount_block_text" and text()]'
        )

        if self.meta_df is not None and not self.meta_df.empty and identifier in self.meta_df.index:
            prev_meta = self.meta_df.loc[identifier]
        else:
            prev_meta = {}
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')
        metadata['extraction_timestamp'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M')
        if promo:
            metadata[
                'promo_start'] = promo_start if promo_start and not promo_end else today
            metadata['promo_end'] = ''
        else:
            if promo_start:
                metadata['promo_start'] = promo_start
                metadata['promo_end'] = today if not promo_end else promo_end

        stock = response.xpath('//div/@data-product-availability').extract()[0]
        if stock:
            stock = '1' == stock[0]
        else:
            stock = False
        if not stock:
            product_loader.add_value('stock', 0)

        product = product_loader.load_item()
        product['metadata'] = metadata
        yield product
Exemplo n.º 14
0
    def parse_products(self, response):
        data = json.loads(response.body)
        products = data['response']['products']
        if products:
            u_id = response.meta['u_id']
            u_cat = response.meta['u_cat']
            offset = response.meta['offset']
            for product in products:
                product_loader = ProductLoader(item=Product(),
                                               response=response)
                if product['price']:
                    product_loader.add_value('identifier', product['id'])
                    product_loader.add_value('name', product['title'])
                    product_loader.add_value('sku', product['id'])
                    price = product['price']['value'].replace(' ', '').replace(
                        '.', '').replace(',', '.')
                    product_loader.add_value('price', price)
                    product_loader.add_value(
                        'image_url',
                        response.urljoin(product['featured_image']['source']))
                    product_loader.add_value('url', product['url'])
                    product_loader.add_value('brand', product['brand']['name'])
                    if product['variants'][0]['inventory_quantity'] == '0':
                        product_loader.add_value('stock', 0)
                    product_loader.add_value('category', product['category'])
                    exclusive_online = False
                    metadata = SonaeMeta()
                    promo = False
                    for tag in product['tags']:
                        if u'promo' in tag['title'].lower():
                            promo = True
                        if u"PromoçãoOnline" in tag['title'].title().replace(
                                ' ', ''):
                            exclusive_online = True

                    if self.meta_df is not None and not self.meta_df.empty and product[
                            'id'] in self.meta_df.index:
                        prev_meta = self.meta_df.loc[product['id']]
                    else:
                        prev_meta = {}
                    promo_start = prev_meta.get('promo_start')
                    promo_end = prev_meta.get('promo_end')
                    today = datetime.datetime.now().strftime('%Y-%m-%d')
                    metadata['extraction_timestamp'] = datetime.datetime.now(
                    ).strftime('%Y-%m-%d %H:%M')
                    if promo:
                        metadata[
                            'promo_start'] = promo_start if promo_start and not promo_end else today
                        metadata['promo_end'] = ''
                    else:
                        if promo_start:
                            metadata['promo_start'] = promo_start
                            metadata[
                                'promo_end'] = today if not promo_end else promo_end
                    if exclusive_online:
                        metadata['exclusive_online'] = 'Yes'
                    item = product_loader.load_item()
                    item['metadata'] = metadata
                    yield item

            yield scrapy.Request(
                'http://www.phonehouse.pt/api.php/getProducts/' + u_id + '/' +
                u_cat + '/' + str(offset + 12),
                callback=self.parse_products,
                meta={
                    'u_id': u_id,
                    'u_cat': u_cat,
                    'offset': offset + 12
                })
Exemplo n.º 15
0
    def parse_product(self, response):
        if 'SQLSTATE' in response.body:
            retry_req = self._retry(response)
            if retry_req:
                yield retry_req
            else:
                self.log('Error parsing {}'.format(response.url))
            return

        loader = ProductLoader(item=Product(), response=response)
        name = response.xpath('//h1[@itemprop="name"]/text()').extract_first()
        identifier = response.xpath(
            '//*[@id="product_page_product_id"]/@value').extract_first()
        image_url = response.xpath('//*[@id="bigpic"]/@src').extract_first()
        price = response.xpath(
            '//*[@id="our_price_display"]/text()').extract_first('0')
        sku = response.xpath(
            '//label[text()="EAN "]/../span/text()').extract_first()
        brand = response.xpath(
            '//label[text()="Fabricante "]/../span/text()').extract_first()
        categories = response.xpath(
            '//div[@class="breadcrumb clearfix"]//a/text()').extract()
        stock = response.xpath(
            '//span[@class="avail-label"]/text()').extract_first()
        loader.add_value('name', name)
        loader.add_value('identifier', identifier)
        loader.add_value('sku', sku)
        loader.add_value('brand', brand)
        loader.add_value('category', categories)
        loader.add_value('url', response.url)
        if image_url and not image_url.strip().startswith('data:image'):
            loader.add_value('image_url', response.urljoin(image_url))
        loader.add_value('price', extract_price(price.replace(' ', '')))
        if not stock:
            loader.add_value('stock', 0)
        item = loader.load_item()

        if self.meta_df is not None and not self.meta_df.empty and identifier in self.meta_df.index:
            prev_meta = self.meta_df.loc[identifier]
        else:
            prev_meta = {}
        promo = response.xpath(
            '//p[@id="reduction_amount" and not(contains(@style,"display:none"))]'
            '/span[@id="reduction_amount_display" and text()!=""]')
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        metadata = SonaeMeta()
        metadata['extraction_timestamp'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M')
        if promo:
            metadata[
                'promo_start'] = promo_start if promo_start and not promo_end else today
            metadata['promo_end'] = ''
        else:
            if promo_start:
                metadata['promo_start'] = promo_start
                metadata['promo_end'] = today if not promo_end else promo_end
        item['metadata'] = metadata
        yield item
Exemplo n.º 16
0
    def parse_product(self, response):
        base_url = get_base_url(response)
        hxs = HtmlXPathSelector(response=response)
        name = hxs.select('//div[@class="prod-nome"]/text()').extract()
        price = hxs.select('//div[@class="prod-price "]/text()').extract()
        if not price:
            price = hxs.select(
                '//div[@class="prod-price campanha"]/text()').extract()
        price = price[0]

        brand = ''
        categories = hxs.select(
            '//div[@id="breadcrumb"]/ul/li/a/text()').extract()[1:]

        l = ProductLoader(item=Product(), response=response)

        image_url = hxs.select('//div[@id="prod-imagem"]/img/@src').extract()
        image_url = urljoin_rfc(base_url, image_url[0]) if image_url else ''
        l.add_value('image_url', image_url)
        l.add_value('url', response.url)
        l.add_value('name', name)
        l.add_value('price', extract_price_eu(price))
        l.add_value('brand', brand)
        for category in categories:
            l.add_value('category', category)
        ean = hxs.select('//script[@data-flix-ean]/@data-flix-ean').extract()
        l.add_value('sku', ean)
        identifier = re.findall('idprod=(.*)', response.url)[0]
        l.add_value('identifier', identifier)

        product = l.load_item()

        metadata = SonaeMeta()

        promotion_price = hxs.select(
            '//div[@class="prod-price-old"]/del/text()').re(r'[\d,.]+')
        if promotion_price:
            metadata['promotion_price'] = promotion_price[0].replace(
                '.', '').replace(',', '.')

        if response.meta.get('exclusive_online', 'No') == 'Yes':
            metadata['exclusive_online'] = 'Yes'

        if self.meta_df is not None and not self.meta_df.empty and identifier in self.meta_df.index:
            prev_meta = self.meta_df.loc[identifier]
        else:
            prev_meta = {}
        promo = hxs.xpath(
            '//div[@id="prod-data"]//div[@class="prod-price campanha"]')
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        metadata['extraction_timestamp'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M')
        if promo:
            metadata[
                'promo_start'] = promo_start if promo_start and not promo_end else today
            metadata['promo_end'] = ''
        else:
            if promo_start:
                metadata['promo_start'] = promo_start
                metadata['promo_end'] = today if not promo_end else promo_end

        product['metadata'] = metadata
        yield product
Exemplo n.º 17
0
    def parse_product(self, response):
        l = ProductLoader(item=Product(), response=response)
        metadata = SonaeMeta()

        l.add_xpath('image_url', '//img[contains(@class, "product-detail-img-main")]/@src')
        l.add_value('url', response.url)
        name = response.xpath('//h1/text()').extract()[0].strip()
        #name_desc = ''.join(hxs.select('//span[@class="infoDet"]/text()').extract()).strip()
        #l.add_value('name', name + ' ' + name_desc)
        l.add_value('name', name)
        price = ''.join(response.xpath('//span[@class="item-price"]/text()').extract()[0].strip().split())
        l.add_value('price', extract_price(price))
 
        out_of_stock = response.xpath(u'//div[@class="product-btns-panel"]/button[contains(text(), "Indisponível")]')
        if out_of_stock:
            l.add_value('stock', 0)

        categories = response.xpath('//ol[@class="breadcrumb"]/li/a/text()').extract()[1:]
        for category in categories:
            l.add_value('category', category)
        
        brand = response.xpath('//div[h1]/h3/text()').extract()
        if brand:
            l.add_value('brand', brand[0])
        '''
        weight = response.xpath('//div[h2[contains(text(), "Peso")]]/p/text()').extract()
        if not weight:
            weight = response.xpath('//tr[td[contains(text(), "Peso")]]/td/@txt').extract()
        
        weight = extract_price(weight[0]) if weight else 0
        shipping = 0
        if weight>=0.5 and weight<3:
            shipping = 2
        if weight>=3 and weight<5:
            shipping = 4
        if weight>=5 and weight<10:
            shipping = 5
        if weight>=10 and weight<20:
            shipping = 10
        if weight>=20:
            shipping = 15
                
        if shipping:
            l.add_value('shipping_cost', shipping)
        '''
        identifier = response.xpath('//input[@name="Id"]/@value').extract()
        l.add_value('identifier', identifier[0])
        l.add_value('sku', identifier[0])

        if self.meta_df is not None and not self.meta_df.empty and identifier[0] in self.meta_df.index:
            prev_meta = self.meta_df.loc[identifier[0]]
        else:
            prev_meta = {}
        promo = response.xpath('//span[@class="item-old-price"]/span[@class="item-old-price"]/text()')
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        metadata['extraction_timestamp'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
        if promo:
            metadata['promo_start'] = promo_start if promo_start and not promo_end else today
            metadata['promo_end'] = ''
        else:
            if promo_start:
                metadata['promo_start'] = promo_start
                metadata['promo_end'] = today if not promo_end else promo_end

        item = l.load_item()
        item['metadata'] = metadata
        yield item
Exemplo n.º 18
0
    def parse_item(self, response):
        self.log('Parsing {} in parse_item'.format(response.url))
        if response.url.endswith("/index.html"):
            return

        response = response.replace(
            body=response.body.decode('utf-8', errors='ignore'))

        hxs = HtmlXPathSelector(response=response)

        description_field = hxs.select(
            '''//script[contains(text(), '"prdref"')]''').extract()
        description_field = description_field[0] if description_field else ''

        try:
            name = re.findall(re.compile('\"prdname\"\,\"(.+?)\"'),
                              description_field)[0]
        except:
            name = hxs.select("//span[@itemprop = 'name']/text()").extract()[0]

        sku = response.meta.get('sku', None)
        if not sku:
            url_orig = response.meta['url']
            if url_orig in self.skus:
                sku = self.skus[url_orig]
            else:
                sku = re.findall(re.compile('\"prdref\"\,\"(.+?)\"'),
                                 description_field)[0]

        brand = re.findall(re.compile('\"prdparam-brand\"\,\"(.+?)\"'),
                           description_field)

        #stock  = hxs.select("//div[@class = 'availability tsp']").extract()[0]
        #stock  = 1 if re.findall(re.compile('Em stock'), stock) else 0

        identifier = hxs.select(
            "//input[@name = 'sProductId']/@value").extract()[0]

        categories = hxs.select(
            '//div[@class="breadcrumb"]//span[@itemprop="title"]/text()'
        ).extract()
        categories = [
            c for c in categories if c.strip() and c.strip().lower() != 'home'
        ][:3]

        try:
            image_url = hxs.select(
                "//article[@class='product cancelOverfProduct col9']//img/@src"
            ).extract()[0]
        except:
            image_url = ""

        dealers = hxs.select('//div[@class="merchant product"]')
        for dealer in dealers:
            l = ProductLoader(item=Product(), response=response)
            stock = dealer.select('.//span[@class="available"]')
            price = dealer.select(
                './/span[@class="currentPrice"]//text()').extract()
            price = ''.join(price).replace(',', '.')
            shipping = dealer.select(
                './/div[@class="productPrices"]/span/text()').extract()
            shipping = ''.join(shipping[-1]).replace(',',
                                                     '.') if shipping else '0'

            seller = dealer.select(
                './/p[@class="soldby"]/strong/a//text()').extract()
            #merchant_id = dealer.select('//input[@name="iMerchantId"]/@value').extract()[0]

            if not seller:
                seller = ['Pixmania']

            if 'Pixmania' not in seller:
                continue

            prod_id = identifier + '-' + seller[0].lower()
            l.add_value('image_url', image_url)
            l.add_value('url', response.url)
            l.add_value('name', name)
            l.add_value('price', price)
            if not stock:
                l.add_value('stock', 0)
            l.add_value('category', categories)
            if brand:
                l.add_value('brand', brand[0])
            l.add_value('shipping_cost', shipping)
            l.add_value('identifier', prod_id)
            l.add_value('dealer', seller)
            l.add_value('sku', sku)

            product = l.load_item()

            metadata = SonaeMeta()
            metadata['exclusive_online'] = 'Yes'
            delivery = dealer.re(r'([\d-]+?) dias')
            if delivery:
                if '-' in delivery[0]:
                    delivery = [delivery[0].split('-')[0]]
                delivery_days = int(delivery[0])
                if delivery_days == 1:
                    metadata['delivery_24'] = 'Yes'
                elif delivery_days == 2:
                    metadata['delivery_24_48'] = 'Yes'
                elif delivery_days < 5:
                    metadata['delivery_48_96'] = 'Yes'
                elif delivery_days >= 5:
                    metadata['delivery_96_more'] = 'Yes'
            previous_price = dealer.select(
                './/span[@class="previousPrice"]/del/text()').re(r'[\d,.]+')
            if previous_price:
                metadata['promotion_price'] = previous_price[0].replace(
                    '.', '').replace(',', '.')
            product['metadata'] = metadata
            yield product
Exemplo n.º 19
0
    def parse_product(self, response):
        self.log("[[TEST]] parse_product")
        description_field = response.xpath(
            "//script[contains(text(), 'tc_vars')]/text()").extract_first()
        if not description_field:
            raise ValueError("Could not find description field: {}".format(
                response.url))

        m = re.findall(r'tc_vars\["product_id"\]\s*=\s*"([^"]*)"',
                       description_field)
        identifier = m[0]
        if not identifier:
            raise ValueError("Identifier not found: {}".format(response.url))
        identifier = 'fcom' + identifier

        name = response.css('.ProductSummary-title').xpath(
            "//*[@itemprop='name']/text()").extract_first().strip()
        if not name:
            raise ValueError("Name not found: {}".format(response.url))
        subname = response.css('.ProductSummary-subTitle').xpath(
            "span[a]/preceding-sibling::span/text()").extract_first()
        if subname:
            name = ' '.join([name, subname])

        m = re.findall(r'tc_vars\["product_EAN"\]\s*=\s*"(\d*)"',
                       description_field)
        sku = m[0] if m else ''

        price = response.xpath(
            '//*[@class="ProductSellers-tabControlText" and contains(text(), "Fnac")]//text()'
        )
        if price:
            price = ' '.join(price.extract()).replace(u'\xa0', '')
            price = re.search('([\d,]+)', price, re.MULTILINE | re.DOTALL)
            if price:
                price = price.groups()[0]
            self.log(price)

        if not price and not identifier in self.products:
            self.log('Price not found {}'.format(response.url))
            return

        stock = 1 if price else 0

        category_01 = response.css('.Breadcrumb-list').css(
            '.Breadcrumb-item').css('[itemprop=title]')[1].xpath(
                'text()').extract_first()
        try:
            category_02 = response.css('.Breadcrumb-list').css(
                '.Breadcrumb-item').css('[itemprop=title]')[2].xpath(
                    'text()').extract_first()
        except IndexError:
            category_02 = ''

        m = re.findall(r'tc_vars\["product_trademark"\]\s*=\s*"([^"]*)"',
                       description_field)
        brand = m[0] if m else ''

        shipping = response.css('.Delivery').xpath('.//text()').extract()
        if shipping:
            shipping = ''.join(shipping).strip()
            shipping = re.search('([\d,]+)', shipping)
            if shipping:
                shipping = shipping.groups()[0]
                shipping = extract_price_eu(shipping)
        else:
            shipping = ''

        m = re.findall(r'tc_vars\["product_picture_url"\]\s*=\s*"([^"]*)"',
                       description_field)
        image_url = m[0]

        l = ProductLoader(item=Product(), response=response)
        self.seen.add(identifier)
        l.add_value('identifier', identifier)
        l.add_value('name', name)
        l.add_value('url', response.url)
        l.add_value('sku', sku)
        l.add_value('price', price)
        if not stock:
            l.add_value('stock', stock)
        l.add_value('category', category_01)
        l.add_value('category', category_02)
        l.add_value('brand', brand)
        l.add_value('shipping_cost', shipping)
        l.add_value('image_url', image_url)
        l.add_value('dealer', 'Fnac')

        product = l.load_item()

        product['metadata'] = SonaeMeta()
        product['metadata']['delivery_24_48'] = 'Yes'
        if response.meta.get('exclusive_online'):
            product['metadata']['exclusive_online'] = 'Yes'

        promotion_price = response.css('.ProductPriceBox').css(
            '.oldPrice').xpath("text()").extract_first()
        if promotion_price:
            promotion_price = promotion_price.strip().replace(
                u'\xa0', '').replace(u'\u20ac', '').replace(' ', '')
            product['metadata']['promotion_price'] = str(
                extract_price_eu(promotion_price))

        if identifier in self.metadata_:
            prev_meta = self.metadata_[identifier]
        else:
            prev_meta = {}
        promo = promotion_price
        promo_start = prev_meta.get('promo_start')
        promo_end = prev_meta.get('promo_end')
        today = datetime.datetime.now().strftime('%Y-%m-%d')

        product['metadata']['extraction_timestamp'] = datetime.datetime.now(
        ).strftime('%Y-%m-%d %H:%M')
        if promo:
            product['metadata'][
                'promo_start'] = promo_start if promo_start and not promo_end else today
            product['metadata']['promo_end'] = ''
        else:
            if promo_start:
                product['metadata']['promo_start'] = promo_start
                product['metadata'][
                    'promo_end'] = today if not promo_end else promo_end

        yield product