def parse_shop_look(self, hxs): products = hxs.select('//div[@id="cat-ens-prod-item"]') i = 0 # do this with actual id item = ExpressItem() whole_page = hxs.extract() whole_page = "".join(whole_page) ensemble_id = basic.get_middle_text(whole_page, "ensembleId: '", "',") name = hxs.select('//div[@id="cat-ens-prod-con"]/h1/text()').extract() name = basic.clean_string_field(name) item['ensemble_id'] = ensemble_id item['normal_image_url'] = self.shl_get_image(hxs) item['product_id'] = ["DUMMIE1"] item['shop_look'] = ['True'] item['normal'] = ['False'] item['shop_line'] = ['False'] item['in_stock'] = ['IN_STOCK'] item['name'] = name xml.create_xml(item) item.clear() for p in products: i += 1 item = ExpressItem() item['master_product_id'] = ['DUMMIE1'] item['product_id'] = ["DUMMIE1_" + str(i)] item['name'], item['price'], item['style'] = self.shl_basic_info(p) page = p.extract() item['variants'] = basic.cdata_field([self.shl_create_variants(self.get_variants(page))]) item['colors'] = basic.cdata_field(self.shl_get_swatches(p)) xml.create_xml(item)
def create_child_products(self, main_id, ids, sizes, prices, images_grouped): """Creating child products (both colors and sizes). Arguments it gets are: main_id as product id of the master product, images_grouped that is a dict of images grouped by color (field i field) and dicts ids, sizes and prices (e.g. dict with color names as keys and fields of ids for it as values 'black': ['32854, '32855''])""" item = ExpressItem() i = 0 for k in ids: cur_id = main_id + "_" + chr(i + 97) item['product_id'] = [cur_id] item['master_product_id'] = [main_id] item['color'] = [k] # use this for some other path (our server) # images, thumbs = self.get_image_url(images_grouped[i]) if images_grouped: images = self.get_absolute_url(images_grouped[k]) # item['normal_image_url'], item['thumb_image_url'] = self.map_url_to_server(images,main_id) item['normal_image_url'] = basic.cdata_field(self.map_url_to_server(images, main_id)) self.xml.create_xml(item) item.clear() j = 0 for val in ids[k]: item['product_id'] = [cur_id + "_" + chr(j + 97)] item['master_product_id'] = [cur_id] if len(sizes): item['size'] = [sizes[k][j]] item['size_option_id'] = [ids[k][j]] item['price'] = [prices[k][j]] self.xml.create_xml(item) j += 1 i += 1
def get_description(self, hxs): description = hxs.select( '//div[@id="FieldsetProductInfo"]/text()').extract()[3] features = hxs.select('//div[@id="FieldsetProductInfo"]/ul').extract() if features: features = [features[0][:2000]] return [basic.cdata(description)], basic.cdata_field(features)
def get_basic_info(self, hxs): """Getting basic info about products (name, shown with).""" name = hxs.select('//div[@id="product_name"]/text()').extract() if name: name = basic.cdata_field(name) shown_with = hxs.select('//div[@id="shown_with_container"]').extract() if shown_with: shown_with = [basic.cdata(shown_with[0])] return name, shown_with
def get_basic_info(self, hxs): """Gets basic info about products. Returns description and promo text""" description = hxs.select('//li[@class="cat-pro-desc"]').extract()[0] description = basic.clean_string(description) description = [basic.cdata(description)] promo_text = hxs.select('//span[@class="cat-pro-promo-text"]/text()').extract() if not promo_text: promo_text = hxs.select('//span[@class="cat-pro-promo-text"]/font').extract() if promo_text: promo_text = basic.cdata_field(promo_text) return description, promo_text
def parse_for_shop_look(self, hxs, id, product_id, page, images_grouped, product_url, index): """Special parse function for shop looks and lines. It gets same info stored in different format, mostly json and reference to master product id that is actually shop look/line id. TO DO: see if there is need to specially handle the case for not available""" item = ExpressItem() item['master_product_id'] = [id] item['product_id'] = [id + "_" + product_id] if self.ordered: item['order_index'] = [self.order_list[index]] item['style'] = [product_id] item['product_page'] = [product_url] item['category_id'], item['subcategory_id'] = self.get_categories(hxs) item['add_to_cart_id'] = self.get_add_to_cart_id(hxs) # below is part fot creating swatch images and images json color_names, urls, swatch_image_names, jsons = self.get_swatch_images(hxs) i = 0 colors = [] for k in color_names: d = {'name': k, 'swatch_url': urls[i], 'image_url': self.get_absolute_url(images_grouped[k])} i += 1 colors.append(simplejson.dumps(d)) item['colors'] = basic.cdata_field(colors) item['price'], item['discount_price'] = self.get_product_prices(hxs) item['description'], item['promo_text'] = self.get_basic_info(hxs) item['name'] = self.get_name(hxs) # below is part for creating variants json ids, sizes, prices = self.get_variants(page) variants = [] for k in ids: d = {'color': k, 'prices': prices[k], 'ids': ids[k]} try: d['sizes'] = sizes[k] except StandardError: print "This product has no sizes" variants.append(simplejson.dumps(d)) item['variants'] = basic.cdata_field(variants) self.xml.create_xml(item)
def get_price(self, hxs): """Getting product prices. Gets regular and discount price if there is one.""" price = hxs.select('//span[@id="divUnitPrice"]/text()').extract() if not price: price = hxs.select('//div[@id="product_price"]/span[1]/text()').extract() if not price: price = hxs.select('//div[@id="product_price"]/text()').extract() discount = hxs.select('//div[@id="product_price"]/span[@class="pc-salePrice"]/text()').extract() price = basic.clean_string(price[0]) price = re.sub(" +", " ", price) price = price.replace("Price:", "") price = price.replace("Prix:", "") price = basic.cdata(price.strip()) if discount: discount = basic.cdata_field(discount) return [price], discount
def get_description(self, hxs): heading = hxs.select('//div[@id="description"]/p').extract() details = hxs.select('//p[@class="description"]').extract() specs = hxs.select('//div[@class="specs"]/ul').extract() last = hxs.select('//div[@class="callToAction"]/p/text()').extract() return basic.cdata_field(heading), basic.cdata_field(details), basic.cdata_field(specs), basic.cdata_field(last)
def get_description(self, hxs): description = hxs.select('//div[@id="FieldsetProductInfo"]/text()').extract()[3] features = hxs.select('//div[@id="FieldsetProductInfo"]/ul').extract() if features: features = [features[0][:2000]] return [basic.cdata(description)], basic.cdata_field(features)