def parse_product_detail(self, response): product = items.Ecommerce_product_items() product['domain_name'] = 'www.othoba.com' product['url'] = response.url product['title'] = response.selector.xpath( '//h2/span[@itemprop="name"]/text()').extract_first() a = response.selector.xpath( '//ol[@class="breadcrumb"]//li/a/text()').extract() category_name = a[1:] product['categories'] = category_name product['currency'] = 'BDT' s = response.selector.xpath( '//span[@itemprop="price"]/text()').extract_first() s = s.replace(' ', '') s = s.replace(',', '') num = float(s) product['price'] = num print 'price float--------------------------> ' print num img1 = response.selector.xpath( '//div[@style="margin-top: 10px;text-align: left;"]//div/a/@href' ).extract() img2 = response.selector.xpath( '//a[@itemprop="image"]/@href').extract() product['images'] = img1 + img2 product['last_updated'] = datetime.datetime.now() yield product
def parse_product_detail(self, response): product = items.Ecommerce_product_items() ## this name is same as class name in items.py product['domain_name'] = 'www.priyoshop.com' product['url'] = response.url title = response.selector.xpath('//div[@class="product-name"]/h1[@itemprop="name"]/text()').extract_first() product['title'] = title[33:] a = response.selector.xpath('//div[@class="breadcrumb"]/ul//li/span/a/span/text()').extract() category_name = a[1:] product['categories'] = category_name product['currency'] ='BDT' s = response.selector.xpath('//span[@itemprop="price"]/text()').extract_first() s = s.replace(' ','') s = s.replace(',','') s = s.replace('Tk','') num = float(s) product['price'] = num print num img1 = response.selector.xpath('//div[@class="picture-thumbs"]//a/@href').extract() img2 = response.selector.xpath('//img[@itemprop="image"]/@src').extract() s = img1 + img2 product['images'] = s product['last_updated'] = datetime.datetime.now() yield product
def parse_product_detail(self, response): product = items.Ecommerce_product_items() product['domain_name'] = 'www.pickaboo.com' product['url'] = response.url a = response.selector.xpath('//div[@class="breadcrumbs"]/ul//li/a/span/text()').extract() category_name = a[1:] product['categories'] = category_name product['title'] = response.selector.xpath('//h1[@itemprop="name"]/text()').extract_first() product['currency'] ='BDT' price = response.selector.xpath('//span[@class="regular-price"]//span[@class="price"]/text()').extract_first() if price is not None: s = price else: s = response.selector.xpath('//p[@class="special-price"]//span[@class="price"]/text()').extract_first() s = s.replace(' ','') s = s.replace(',','') s = s[1:] num = float(s) product['price'] = num print num img1 = response.selector.xpath('//a[@class="magnify-zoom-gallery"]/img/@src').extract() img2 = response.selector.xpath('//img[@id="magnify-small"]/@src').extract() product['images'] = img1 + img2 product['last_updated'] = datetime.datetime.now() yield product
def parse_product_detail(self, response): product = items.Ecommerce_product_items() product['domain_name'] = 'www.bagdoom.com' product['url'] = response.url product['title'] = response.selector.xpath('//div[@class="product-name"]/h1/text()').extract_first() a = response.selector.xpath('//div[@class="breadcrumbs"]/ul//li/a/text()').extract() category_name = a[1:] product['categories'] = category_name product['currency'] ='BDT' price = response.selector.xpath('//p [@class="special-price"]/span [@class="price"]/text()').extract_first() if price is not None: s = price else: s = response.selector.xpath('//span[@class="price"]/text()').extract_first() s = s.replace(' ','') s = s.replace(',','') s = s.replace('Tk.','') # where 'Tk.' is substring num = float(s) product['price'] = num product['images'] = response.selector.xpath('//div[@class="more-views"]/ul//li/a/@href').extract() product['last_updated'] = datetime.datetime.now() yield product
def parse_product_detail(self, response): product = items.Ecommerce_product_items() product['domain_name'] = 'easyshoppingbd.com' product['url'] = response.url product['title'] = response.selector.xpath('//h1[@itemprop="name"]/text()').extract_first() a = response.selector.xpath('//span[@itemprop="title"]/text()').extract() product['categories'] = a product['currency'] ='BDT' s = response.selector.xpath('//span[@id="our_price_display"]/text()').extract_first() s = s.replace(' ','') s = s.replace(',','') s = s.replace('Tk.','') s = s[1:] num = float(s) product['price'] = num print num img1 = response.selector.xpath('//ul[@id="thumbs_list_frame"]//li/a/@href').extract() img2 = response.selector.xpath('//img[@id="bigpic" and @itemprop="image"]/@src').extract() product['images'] = img1 + img2 product['last_updated'] = datetime.datetime.now() yield product
def parse_product_detail(self, response): product = items.Ecommerce_product_items() product['url'] = response.url product['domain_name'] = "www.daraz.com.bd" product['title'] = response.selector.xpath( '//div[@class="details -validate-size"]/span/h1[@class="title"]/ text()' ).extract_first() a = response.selector.xpath( '//nav[@class="osh-breadcrumb"]/ul//li/a/text()').extract() l = len(a) - 1 a = a[1:l] product['categories'] = a product['currency'] = 'BDT' p = response.selector.xpath( '//span[@dir="ltr"]/text()').extract_first() p = p.replace(' ', '') p = p.replace(',', '') num = float(p) product['price'] = num print num #product['images'] = response.selector.xpath(u'//div[@class="product-preview"]/img/@data-zoom').extract() #one image product['images'] = response.selector.xpath( '//div[@class="thumbs-wrapper"]/div[@id="thumbs-slide"]//a/@href' ).extract() #image list product['last_updated'] = datetime.datetime.now() #'https://bd.daraz.io/EtGrdRLf6i5cz3R7boLgMrApIF4=/fit-in/680x680/filters:fill(white)/product/27/1733/1.jpg?6043', #u'https://bd.daraz.io/zGz4XrWyA2iB8ZpGhYLM7Wilt0I=/fit-in/680x680/filters:fill(white)/product/27/1733/2.jpg?6043' yield product
def parse_product_detail(self, response): product = items.Ecommerce_product_items() product['domain_name'] = 'dam.com.bd' product['url'] = response.url product['title'] = response.selector.xpath( '//h2[@itemprop="name"]/text()').extract_first() a = response.selector.xpath( '//div[@id="breadcrumbs"]/ul//li/a/span/text()').extract() l = len(a) - 1 product['categories'] = a[1:l] product['currency'] = 'BDT' s = response.selector.xpath( '//span[@itemprop="lowPrice"]/text()').extract_first() s = s.replace(' ', '') s = s.replace(',', '') s = s.replace('Tk.', '') num = float(s) product['price'] = num img1 = response.selector.xpath( '//div[@class="clearfix padder-v-sm"]//img/@src').extract() img2 = response.selector.xpath( '//img[@itemprop="image"]/@src').extract() s = img1 + img2 product['images'] = s product['last_updated'] = datetime.datetime.now() yield product
def parse_product_detail(self, response): product = items.Ecommerce_product_items() product['domain_name'] = 'www.nanarokom.com' product['url'] = response.url product['title'] = response.selector.xpath('//h1[@class="product_title entry-title"]/text()').extract_first() a = response.selector.xpath('//nav[@class="woocommerce-breadcrumb"]//a/text()').extract() l = len(a)-1 category_name = a[1:l] product['categories'] = category_name product['currency'] ='BDT' a = response.selector.xpath('//p[@class="price"]/ins/span[@class="woocommerce-Price-amount amount"]/text()').extract_first() if a is not None: s = a else: s = response.selector.xpath('//p[@class="price"]/span[@class="woocommerce-Price-amount amount"]/text()').extract_first() s = s.replace(' ','') s = s.replace(',','') print " ssssssssssssssssss " print s num = float(s) product['price'] = num print num img1 = response.selector.xpath('//figure[@class="woocommerce-product-gallery__wrapper"]//div//a/@href').extract() img2 = response.selector.xpath('//a[@itemprop="image"]/@href').extract() s = img1 + img2 product['images'] = s product['last_updated'] = datetime.datetime.now() yield product
def parse_product_detail(self, response): product = items.Ecommerce_product_items() product['url'] = response.url product['domain_name'] = "www.rokomari.com" product['title'] = response.selector.xpath( '//div[@class="buyArea"]/h2/text()').extract_first() cat = response.selector.xpath( '//ol[@class="breadcrumb"]//li/a/text()').extract() product['categories'] = cat[1:] product['currency'] = 'BDT' #PRICE OUTPUT FORMAT='Tk. 284' p = response.selector.xpath( '//span[@class="mainPrice"]/text()').extract_first() p = p.replace(' ', '') p = p.replace(',', '') p = p.replace('Tk.', '') # where 'Tk.' is substring p = p.replace('BDT', '') l = len(p) num = float(p) product['price'] = num a = response.selector.xpath( '//div[@class="bookImgArea"]//img/@src').extract() a = a[1:] img_others = response.selector.xpath( '//ul[@class="list-unstyled"]//li/@hovermax').extract() sum_img = a + img_others product['images'] = sum_img product['last_updated'] = datetime.datetime.now() yield product
def parse_product_detail(self, response): product = items.Ecommerce_product_items() product['url'] = response.url product['domain_name'] = "www.kiksha.com" t = response.selector.xpath( '//h1[@itemprop="name"]/text()').extract_first() l = len(t) t = t[65:l] product['title'] = t product['currency'] = 'BDT' price = response.selector.xpath( '//p[@class="special-price"]//span[@class="price"]/text()' ).extract_first() if price is not None: p = price else: p = response.selector.xpath( '//span[@class="price"]/text()').extract_first() p = p.replace(' ', '') p = p.replace(',', '') p = p.replace('BDT', '') num = float(p) product['price'] = num img1 = response.selector.xpath( '//a[@class="thumb-link"]/img/@src').extract() img2 = response.selector.xpath( '//img[@id="image-main"]/@src').extract() product['images'] = img1 + img2 product['last_updated'] = datetime.datetime.now() yield product
def parse_product_detail(self, response): product = items.Ecommerce_product_items() product['url'] = response.url product['domain_name']="kartbd.com" product['title'] = response.selector.xpath('//div[@class="col-md-5 col-sm-6 product-info"]/h2[@class="title"]/text()').extract_first() a = response.selector.xpath('//nav[@class="breadcrumb"]/ul//li/a/text()').extract() category_name = a[1:] product['categories'] = category_name product['currency'] ='BDT' p = response.selector.xpath('//div[@class="col-xs-7"]/text()').extract_first() #tk #'\n tk1,490.00' p = p.replace(' ','') p = p.replace(',','') p = p.replace('tk','') # where 'Tk.' is substring p = p.replace('\n','') num = float(p) product['price'] = num img = 'http://kartbd.com' img1 = response.selector.xpath('//a[@class="f-box"]/@href').extract() img1 = [img + ck for ck in img1] img2 = response.selector.xpath('//div[@id="thumbRails"]//div/a/img/@src').extract() img2 = [img + ck for ck in img2] product['images'] = img1 + img2 product['last_updated'] = datetime.datetime.now() yield product