Exemplo n.º 1
0
 def get_prices(self, hxs):
     price = hxs.select('//div[@class="op"]/text()').extract()
     price = [basic.get_price(price[0])]
     old_price = hxs.select('//span[@class="lp"]/text()').extract()
     if old_price:
         old_price = [basic.get_price(old_price[0])]
     return price, old_price
Exemplo n.º 2
0
 def get_prices(self, hxs):
     price = hxs.select('//div[@class="op"]/text()').extract()
     price = [basic.get_price(price[0])]
     old_price = hxs.select('//span[@class="lp"]/text()').extract()
     if old_price:
         old_price = [basic.get_price(old_price[0])]
     return price, old_price
Exemplo n.º 3
0
 def get_oldies(self, hxs):
     try:
         save = hxs.select('//span[@class="save"]/text()').extract()[0]
         old = hxs.select('//span[@class="oldPrice"]/text()').extract()[0]
         save = basic.get_price(save)
         old = basic.get_price(old)
     except:
         save = None
         old = None
     return [save], [old]
Exemplo n.º 4
0
 def parse(self, response):
     self.counter += 1
     basic.print_status(self.counter, self.total)
     hxs = HtmlXPathSelector(response)
     item = BootsItem()
     item['product_id'], item['store_id'], item['lang_id'], item['catalog_id'] = self.get_ids(hxs)
     item['name'] = self.get_name(hxs)
     item['short_description'], sponsored, description, in_stock, item['ingredients'], patient_information_url, item['offer'], item['promotion'] = self.get_description(hxs)
     item['rating'] = self.get_rating(hxs)
     size, price_per_size = self.get_size(hxs)
     item['normal_image_url'], image_urls = self.get_images(hxs)
     brand, brand_image_url = self.get_brand(hxs)
     item['save_money'], item['old_price'] = self.get_oldies(hxs)
     for i in range(0, len(description)):
         tag = 'description_%d' % (i + 1)
         item[tag] = [basic.cdata(description[i])]
     if sponsored is not None:
         item['sponsored'] = sponsored
     item['in_stock'] = ["NOT_IN_STOCK"]
     if in_stock == "In stock":
         item['in_stock'] = ["IN_STOCK"]
         item['order_id'] = hxs.select('//input[@name="orderId"]/@value').extract()
         item['cat_entry_id'] = hxs.select('//input[@name="catEntryId"]/@value').extract()
         item['calculation_usage_id'] = hxs.select('//input[@name="calculationUsageId"]/@value').extract()
     if brand_image_url is not None:
         item['brand'] = brand
         item['brand_image_url'] = ["43662980-f344-11e1-a21f-0800200c9a66/full/" + self.get_image_sha1(brand_image_url)]
         image_urls.append(brand_image_url)
     if patient_information_url is not None:
         item['patient_information_url'] = [basic.cdata(patient_information_url)]
     prices, point_prices, collect_points, colors, color_image_urls, variant_ids = self.get_color_variants(hxs)
     if size is not None:
         item['size'] = size
         item['price_per_size'] = price_per_size
     elif variant_ids is None:
         prices, point_prices, collect_points, sizes, variant_ids = self.get_size_variants(hxs)
     if color_image_urls is not None:
         image_urls.extend(color_image_urls)
     if variant_ids is not None:
         self.xml.create_xml(item)
         if colors is not None:
             self.create_color_variants(prices, point_prices, colors, color_image_urls, variant_ids, collect_points, item['product_id'])
         else:
             self.create_size_variants(prices, point_prices, sizes, variant_ids, collect_points, item['product_id'])
     else:
         prices = hxs.select('//p[@class="price"]/text()').extract()[0]
         point_prices = hxs.select('//span[@class="pointsPrice"]/text()').extract()[0]
         collect_points = [basic.get_price(hxs.select('//p[@class="collectPoints"]/text()').extract()[0])]
         item['price'] = [basic.get_price(prices)]
         item['points_price'] = [basic.get_price(point_prices)]
         item['collect_points'] = collect_points
         self.xml.create_xml(item)
     item['image_urls'] = image_urls
     #raw_input("Press Enter to continue...")
     return item
Exemplo n.º 5
0
 def get_size_variants(self, hxs):
     try:
         variants = hxs.select('//script').re('productCode:\".*\d\"')[0].split(",")
     except:
         print "No size variants found"
         return None, None, None, None, None
     sizes = hxs.select('//select[@id="size_x"]//option/text()').extract()[1:]
     collect_points = []
     prices = []
     point_prices = []
     variant_ids = []
     for i in range(7, len(variants), 7):
         price = basic.get_price(variants[i+2])
         prices.append(price)
         points = str(int(float(price) * 100))
         point_prices.append(points)
         variant_id = basic.get_price(variants[i+4])
         variant_ids.append(variant_id)
         points = basic.get_price(variants[i+1])
         collect_points.append(points)
     return prices, point_prices, collect_points, sizes, variant_ids
Exemplo n.º 6
0
 def get_color_variants(self, hxs):
     try:
         variants = hxs.select('//script').re('productCode:\".*\d\"')[0].split(",")
         colors = hxs.select('//div[@class="gp_80-20a column"]//div[@class="innerColumn"]//fieldset//div//label//span/text()').extract()
         color_image_urls = hxs.select('//div[@class="gp_80-20a column"]//div[@class="innerColumn"]//fieldset//div//label//img//@src').extract()
         collect_points = []
         prices = []
         point_prices = []
         variant_ids = []
         for i in range(0, len(variants), 8):
             price = basic.get_price(variants[i+2])
             prices.append(price)
             points = str(int(float(price) * 100))
             point_prices.append(points)
             variant_id = basic.get_price(variants[i])
             variant_ids.append(variant_id)
             points = basic.get_price(variants[i+5])
             collect_points.append(points)
         return prices, point_prices, collect_points, colors, color_image_urls, variant_ids
     except:
         print "No color variants found"
         return None, None, None, None, None, None