def get_prices(self, hxs): price = hxs.select('//div[@class="op"]/text()').extract() price = [basic.get_price(price[0])] old_price = hxs.select('//span[@class="lp"]/text()').extract() if old_price: old_price = [basic.get_price(old_price[0])] return price, old_price
def get_oldies(self, hxs): try: save = hxs.select('//span[@class="save"]/text()').extract()[0] old = hxs.select('//span[@class="oldPrice"]/text()').extract()[0] save = basic.get_price(save) old = basic.get_price(old) except: save = None old = None return [save], [old]
def parse(self, response): self.counter += 1 basic.print_status(self.counter, self.total) hxs = HtmlXPathSelector(response) item = BootsItem() item['product_id'], item['store_id'], item['lang_id'], item['catalog_id'] = self.get_ids(hxs) item['name'] = self.get_name(hxs) item['short_description'], sponsored, description, in_stock, item['ingredients'], patient_information_url, item['offer'], item['promotion'] = self.get_description(hxs) item['rating'] = self.get_rating(hxs) size, price_per_size = self.get_size(hxs) item['normal_image_url'], image_urls = self.get_images(hxs) brand, brand_image_url = self.get_brand(hxs) item['save_money'], item['old_price'] = self.get_oldies(hxs) for i in range(0, len(description)): tag = 'description_%d' % (i + 1) item[tag] = [basic.cdata(description[i])] if sponsored is not None: item['sponsored'] = sponsored item['in_stock'] = ["NOT_IN_STOCK"] if in_stock == "In stock": item['in_stock'] = ["IN_STOCK"] item['order_id'] = hxs.select('//input[@name="orderId"]/@value').extract() item['cat_entry_id'] = hxs.select('//input[@name="catEntryId"]/@value').extract() item['calculation_usage_id'] = hxs.select('//input[@name="calculationUsageId"]/@value').extract() if brand_image_url is not None: item['brand'] = brand item['brand_image_url'] = ["43662980-f344-11e1-a21f-0800200c9a66/full/" + self.get_image_sha1(brand_image_url)] image_urls.append(brand_image_url) if patient_information_url is not None: item['patient_information_url'] = [basic.cdata(patient_information_url)] prices, point_prices, collect_points, colors, color_image_urls, variant_ids = self.get_color_variants(hxs) if size is not None: item['size'] = size item['price_per_size'] = price_per_size elif variant_ids is None: prices, point_prices, collect_points, sizes, variant_ids = self.get_size_variants(hxs) if color_image_urls is not None: image_urls.extend(color_image_urls) if variant_ids is not None: self.xml.create_xml(item) if colors is not None: self.create_color_variants(prices, point_prices, colors, color_image_urls, variant_ids, collect_points, item['product_id']) else: self.create_size_variants(prices, point_prices, sizes, variant_ids, collect_points, item['product_id']) else: prices = hxs.select('//p[@class="price"]/text()').extract()[0] point_prices = hxs.select('//span[@class="pointsPrice"]/text()').extract()[0] collect_points = [basic.get_price(hxs.select('//p[@class="collectPoints"]/text()').extract()[0])] item['price'] = [basic.get_price(prices)] item['points_price'] = [basic.get_price(point_prices)] item['collect_points'] = collect_points self.xml.create_xml(item) item['image_urls'] = image_urls #raw_input("Press Enter to continue...") return item
def get_size_variants(self, hxs): try: variants = hxs.select('//script').re('productCode:\".*\d\"')[0].split(",") except: print "No size variants found" return None, None, None, None, None sizes = hxs.select('//select[@id="size_x"]//option/text()').extract()[1:] collect_points = [] prices = [] point_prices = [] variant_ids = [] for i in range(7, len(variants), 7): price = basic.get_price(variants[i+2]) prices.append(price) points = str(int(float(price) * 100)) point_prices.append(points) variant_id = basic.get_price(variants[i+4]) variant_ids.append(variant_id) points = basic.get_price(variants[i+1]) collect_points.append(points) return prices, point_prices, collect_points, sizes, variant_ids
def get_color_variants(self, hxs): try: variants = hxs.select('//script').re('productCode:\".*\d\"')[0].split(",") colors = hxs.select('//div[@class="gp_80-20a column"]//div[@class="innerColumn"]//fieldset//div//label//span/text()').extract() color_image_urls = hxs.select('//div[@class="gp_80-20a column"]//div[@class="innerColumn"]//fieldset//div//label//img//@src').extract() collect_points = [] prices = [] point_prices = [] variant_ids = [] for i in range(0, len(variants), 8): price = basic.get_price(variants[i+2]) prices.append(price) points = str(int(float(price) * 100)) point_prices.append(points) variant_id = basic.get_price(variants[i]) variant_ids.append(variant_id) points = basic.get_price(variants[i+5]) collect_points.append(points) return prices, point_prices, collect_points, colors, color_image_urls, variant_ids except: print "No color variants found" return None, None, None, None, None, None