def get_image_names(self, page): """Gets color names for color swatches.""" temp = page.split("new DropDownInfo") names = {} for i in range(1, len(temp)): names[basic.get_middle_text(temp[i], "('", "'")[0]] = basic.get_middle_text(temp[i], "'", "')")[2] return names
def we_also_recommend(self, id, main_id): url = "http://www.res-x.com/ws/r2/Resonance.aspx?appid=kennethcole01&t" url += "k=154212870918247&ss=525178103419747&sg=1&pg=897706724574618&b" url += "x=true&vr=2.67&sc=product_rr&ev=product&ei=" + id + "&cu=&ct=k" url += "ennethcolec01&no=3&cb=r1eh&clk=&cv1=" + id + "&cv23=63&ur=http%" url += "3A//www.kennethcole.com/product/index.jsp%3FproductId%3D3" + id url += "&plk=&rf=" import urllib2 page = urllib2.urlopen(url).read() temp = page.split("certonaRecBoxes") images = [] ids = [] names = [] prices = [] urls = [] # parsing data got from the upper url about we also recommend products for i in range(1, len(temp)): id = [basic.get_middle_text(temp[i], "d=", '\\"')[0]] image = basic.get_middle_text(temp[i], 'src=\\"', '\\"')[0] name = basic.get_middle_text(temp[i], 'alt=\\"', '\\"') price = basic.get_middle_text(temp[i], '<br>', '</a>') url = "http://www.kennethcole.com/product/index.jsp?productId=" url += id[0] urls.append(url) ids.append(id) names.append(name) prices.append(price) images.append(image) jsons = self.make_json(ids, names, prices, self.get_image_server_path(images, main_id), urls) return jsons, images
def get_reviews(self, page): """Gets average product rating. Returns string like 4.6 of 5 reviews.""" id = self.get_review_id(page) url = "http://partylite.ugc.bazaarvoice.com/8504-en_us/" + id + "/reviews.djs?format=embeddedhtml" url = url.replace(" ", "") page = urllib2.urlopen(url).read() page = basic.get_middle_text(page, '<div class=\\"BVRRRatingNormalImage\\">', '<\/div>') if page: rating = basic.get_middle_text(page[0], 'alt=\\"', '\\"')[0] return [rating] else: return []
def parse_jsons(self, jsons, color_names): """Parsing json from json urls. Returning all images in field, also returns them grouped by colors, so those groups can be used later when creating child products in xml""" images = [] images_grouped = {} for i in range(0, len(jsons)): json = urllib2.urlopen(jsons[i]).read() image = basic.get_middle_text(json, '"expressfashion/', ";") rest_of_images = basic.get_middle_text(json, ',expressfashion/', ";") temp = image + rest_of_images images_grouped = basic.add_to_dict(images_grouped, color_names[i], temp) images += temp return self.get_absolute_url(images), images_grouped
def get_swatch_image_name(self, image_sites): """Gets swatch image name from swatch image url""" image_names = [] for x in range(0, len(image_sites)): name = basic.get_middle_text(image_sites[x], "fashion/", "_s")[0] image_names.append(name) return image_names
def create_subproducts(self, page): """Gets information about colors from javascript. Returns field of dicts with information about colors. Those are really color variants for product.""" try: tmp = page.split("var largeImages = new Array();")[1] except IndexError: print "This product has no images" else: tmp = tmp.split("colorDropdownArray")[0] images = basic.get_middle_text(tmp, "ProductGroupProduct(", ");") image_names = self.get_image_names(page) color_products = [] for im in images: product = {} attributes = im.split("',") product['normal_image_url'] = "http://qa.partylite.biz/imaging/resize?fileName=/productcatalog/production" product['normal_image_url'] += self.custom_clean_string(attributes[26], True) product['description'] = basic.cdata(self.custom_clean_string(attributes[27])) product['color_id'] = self.custom_clean_string(attributes[7], True) product['swatch_color'] = basic.cdata(self.custom_clean_string(attributes[9]).replace(" ", "")) product['name'] = basic.cdata(image_names[product['color_id']]) product['add_to_cart_id'] = self.custom_clean_string(attributes[0], True).replace(" ", "") product['price'] = self.custom_clean_string(attributes[10], True) color_products.append(product) return color_products return []
def parse_shop_look(self, hxs): products = hxs.select('//div[@id="cat-ens-prod-item"]') i = 0 # do this with actual id item = ExpressItem() whole_page = hxs.extract() whole_page = "".join(whole_page) ensemble_id = basic.get_middle_text(whole_page, "ensembleId: '", "',") name = hxs.select('//div[@id="cat-ens-prod-con"]/h1/text()').extract() name = basic.clean_string_field(name) item['ensemble_id'] = ensemble_id item['normal_image_url'] = self.shl_get_image(hxs) item['product_id'] = ["DUMMIE1"] item['shop_look'] = ['True'] item['normal'] = ['False'] item['shop_line'] = ['False'] item['in_stock'] = ['IN_STOCK'] item['name'] = name xml.create_xml(item) item.clear() for p in products: i += 1 item = ExpressItem() item['master_product_id'] = ['DUMMIE1'] item['product_id'] = ["DUMMIE1_" + str(i)] item['name'], item['price'], item['style'] = self.shl_basic_info(p) page = p.extract() item['variants'] = basic.cdata_field([self.shl_create_variants(self.get_variants(page))]) item['colors'] = basic.cdata_field(self.shl_get_swatches(p)) xml.create_xml(item)
def get_rating(self, hxs): temp = hxs.select('//div[@id="Customerssay"]/p[2]/text()').extract() if temp: rating = basic.get_middle_text(temp[0].replace(" ", ""), "Rating:", "out") return rating, temp else: return [], temp
def get_colors(self, page, color_names): """Gets color information with images from javascript on the page. Returns json with color name and imagself.images_store = "/" + settings['IMAGES_STORE']e url for that color, and returnes filed of image urls that can be used for download later""" script = basic.get_middle_text(page, 'var imageMap_0 = new Array();', '</script>')[0] colors = basic.get_middle_text(script, '] = ', ';') image_urls = [] colors_json = [] for i in range(0, len(color_names)): color = burton.replace_color_json(colors[i]) color = simplejson.loads(color) color['cname'] = color_names[i] color.pop('reg') image_urls.append(color['enh']) color['enh'] = self.get_server_path(color['enh']) colors_json.append(basic.cdata(simplejson.dumps(color))) return colors_json, image_urls
def get_extra_images(self, hxs): additional_images = hxs.select('//div[@id="AddImg"]/script/text()').extract() if additional_images: temp = basic.get_middle_text(additional_images[0], '"', '"') thumb_images = temp[0].split(",") return thumb_images else: return []
def get_more_images(self, page): """Gets field of images.""" try: script = basic.get_middle_text(page, "var moreImages", "var numberOfImages")[0] except IndexError: print "This product has no images." else: r = basic.get_middle_text(script, "moreImages[", "';") images = [] # return cdata here if needed to go with absolute links for i in range(0, len(r)): if self.production: images.append("http://www.partylite.biz" + r[i].split("= '")[1]) else: images.append("http://qa.partylite.biz" + r[i].split("= '")[1]) return images return []
def get_vars(self, response, hxs): headers1 = { 'User-Agent': 'Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1', 'Host': 'www.sportmann.no', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Connection': 'keep-alive', 'Referer': '/product.aspx?productid=613232', 'Cookie': 'ASP.NET_SessionId=lurvsvrn3jxsfd45cedmsv45; Besok=922884e3-e9cb-4b69-b8c8-215f3cc988a9; __utma=184084580.1353376623.1312483243.1312483243.1312483243.1; __utmb=184084580.9.10.1312483243; __utmc=184084580; __utmz=184084580.1312483243.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' } page = hxs.select('//html').extract() page = " ".join(page) viewst = basic.get_middle_text(page, 'id="__VIEWSTATE" value="', '"') eventval = basic.get_middle_text(page, 'id="__EVENTVALIDATION" value="', '"') prevpage = [""] hidden_field = [""] r = requests.get(response.url, headers=headers1) page_one = r.content viewst_page = basic.get_middle_text(page_one, 'id="__VIEWSTATE" value="', '"') eventval_page = basic.get_middle_text( page_one, 'id="__EVENTVALIDATION" value="', '"') prevpage_page = basic.get_middle_text(page_one, 'id="__PREVIOUSPAGE" value="', '"') hidden_temp = page_one.split('id="__VIEWSTATE"') hidden_temp = hidden_temp[1].split('id="__PREVIOUSPAGE"') hidden_temp = hidden_temp[0].split('<script sr') val_x = len(hidden_temp) - 1 hidden_temp = basic.get_middle_text(hidden_temp[val_x], 'c="', '"') hidden_temp_val = hidden_temp[0] hidden_temp_val = hidden_temp_val.replace('amp;', '') hidden_url = "http://www.sportmann.no" + hidden_temp_val request_hidden = urllib2.Request(hidden_url) response_hidden = urllib2.urlopen(request_hidden) hidden_field_page = basic.get_middle_text( response_hidden.read(), "ctl00_ScriptManager1_HiddenField').value += '", "';") return viewst[0], eventval[0], prevpage[0], hidden_field[ 0], viewst_page[0], eventval_page[0], prevpage_page[ 0], hidden_field_page[0]
def get_colors(self, hxs, page, main_id): item = KennethItem() try: tmp = page.split('displays[0]')[1] except IndexError: print "This product is not available" return 404 script = tmp.split('</script>')[0] displays = script.split("};") global counter ids = [] images = [] color_ids = [] sizes_script = self.get_sizes_part_page(page) color_internal_code = {} for x in range(0, len(displays) - 1): id = basic.get_middle_text(displays[x], 'colorId: "', '"') ids.append(id[0]) reg = displays[x].count("Reg") images_in = [] for i in range(1, reg + 1): image = basic.get_middle_text(displays[x], "vw" + str(i) + 'Reg: "', '"') if len(image) == 0: image = basic.get_middle_text(displays[x], "vw" + str(i) + 'Reg:"', '"') if (len(image) > 0): if (image[0] != "null"): images_in.append(image[0]) if not images_in: images_in = hxs.select('//input[@name="productImage"]/@value').extract() color_ids.append(str(main_id) + "_" + str(x)) item['product_id'] = [str(main_id) + "_" + str(x)] item['color_option_id'] = id item['master_product_id'] = [main_id] item['normal_image_url'] = self.get_image_server_path(images_in, main_id) item['thumb_image_url'] = self.get_image_server_path_thumb(images_in, main_id) item['in_stock'] = ["NOT_IN_STOCK"] item['color'] = self.get_color_name(sizes_script, id[0]) color_internal_code[id[0]] = str(x) self.xml.create_xml(item) images += images_in self.export(item['normal_image_url'], item['product_id'], "productImage") self.get_sizes(sizes_script, ids, main_id, color_internal_code) return images
def get_extra_images(self, hxs): additional_images = hxs.select( '//div[@id="AddImg"]/script/text()').extract() if additional_images: temp = basic.get_middle_text(additional_images[0], '"', '"') thumb_images = temp[0].split(",") return thumb_images else: return []
def get_imagesets(self, hxs): """Function for getting image set in case where there is no color for product. Gets image set info from the javascript on the page and selects only first one, if there is more because there is only one color to associate with (no_color)""" page = hxs.extract() print len(page) iset = basic.get_middle_text(page, 'imagesets = "', '"; //Change') iset = iset[0].split(',') return [iset[0]]
def get_variants(self, page): """Gets jsons for colors with all available sizes. In json are also fetched all information for sizes that are on the site """ script = basic.get_middle_text(page, 'var skuSizeColorObj = new Array();', '</script>')[0] sizes = [] image_urls = [] color_names = [] colors = script.split('skuSizeColorObj') for c in range(1, len(colors)): temp = basic.get_middle_text(colors[c], '= ', ';') # delete swatch image as it obviously won't be needed t = simplejson.loads(burton.replace_for_json(temp[0])) image_urls.append(t['swatchURL']) color_names.append(t['ColorDesc']) t['swatchURL'] = self.get_server_path(t['swatchURL']) sizes.append(basic.cdata(simplejson.dumps(t))) return sizes, image_urls, color_names
def get_variants(self, page): """Getting variants from javascript on the page. Returns three dicts ids, sizes and prices. Format of the dicts is like (key = color, value = field of (ids, sizes and prices))""" temp = page.split("// Load the product variants")[1] temp = temp.split("// Set the field to update with the product variant")[0] variants = temp.split("// Create the variant") sizes = {} ids = {} prices = {} for i in range(1, len(variants)): color = basic.get_middle_text(variants[i], "Color','", "')") if color: color = color[0] else: color = "no_color" ids = basic.add_to_dict(ids, color, basic.get_middle_text(variants[i], "setId('", "')")[0]) if variants[i].find("Size','") != -1: sizes = basic.add_to_dict(sizes, color, basic.get_middle_text(variants[i], "Size','", "')")[0]) prices = basic.add_to_dict(prices, color, basic.get_middle_text(variants[i], 'numericPrice="', '"')[0]) return ids, sizes, prices
def get_vars(self, response, hxs): headers1 = { "User-Agent": "Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1", "Host": "www.sportmann.no", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-us,en;q=0.5", "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7", "Connection": "keep-alive", "Referer": "/product.aspx?productid=613232", "Cookie": "ASP.NET_SessionId=lurvsvrn3jxsfd45cedmsv45; Besok=922884e3-e9cb-4b69-b8c8-215f3cc988a9; __utma=184084580.1353376623.1312483243.1312483243.1312483243.1; __utmb=184084580.9.10.1312483243; __utmc=184084580; __utmz=184084580.1312483243.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)", } page = hxs.select("//html").extract() page = " ".join(page) viewst = basic.get_middle_text(page, 'id="__VIEWSTATE" value="', '"') eventval = basic.get_middle_text(page, 'id="__EVENTVALIDATION" value="', '"') prevpage = [""] hidden_field = [""] r = requests.get(response.url, headers=headers1) page_one = r.content viewst_page = basic.get_middle_text(page_one, 'id="__VIEWSTATE" value="', '"') eventval_page = basic.get_middle_text(page_one, 'id="__EVENTVALIDATION" value="', '"') prevpage_page = basic.get_middle_text(page_one, 'id="__PREVIOUSPAGE" value="', '"') hidden_temp = page_one.split('id="__VIEWSTATE"') hidden_temp = hidden_temp[1].split('id="__PREVIOUSPAGE"') hidden_temp = hidden_temp[0].split("<script sr") val_x = len(hidden_temp) - 1 hidden_temp = basic.get_middle_text(hidden_temp[val_x], 'c="', '"') hidden_temp_val = hidden_temp[0] hidden_temp_val = hidden_temp_val.replace("amp;", "") hidden_url = "http://www.sportmann.no" + hidden_temp_val request_hidden = urllib2.Request(hidden_url) response_hidden = urllib2.urlopen(request_hidden) hidden_field_page = basic.get_middle_text( response_hidden.read(), "ctl00_ScriptManager1_HiddenField').value += '", "';" ) return ( viewst[0], eventval[0], prevpage[0], hidden_field[0], viewst_page[0], eventval_page[0], prevpage_page[0], hidden_field_page[0], )
def get_sizes(self, page, ids, main_id, color_internal_code): options = page.split("};") skus = {} colors_name = {} inStocks = {} sizes = {} prices = {} for x in range(0, len(options) - 1): id = basic.get_middle_text(options[x], 'cId: "', '"') for i in range(0, len(ids)): if (id[0] == ids[i]): sku = basic.get_middle_text(options[x], 'sku: ', ',s') sku = re.sub("[^0-9]", "", sku[0]) skus = self.add_to_dict(skus, ids[i], sku) size = basic.get_middle_text(options[x], 'sDesc: "', '"') sizes = self.add_to_dict(sizes, ids[i], size[0]) price = basic.get_middle_text(options[x], 'price: "', '"') price = self.clean_price(price[0]) prices = self.add_to_dict(prices, ids[i], price[0]) available = basic.get_middle_text(options[x], 'avail: "', '"') inStocks = self.add_to_dict(inStocks, ids[i], available[0]) self.create_subproducts_xml(main_id, color_internal_code, colors_name, sizes, skus, inStocks, prices) return main_id, colors_name, sizes, skus, inStocks, prices
def get_images(self, hxs): page = hxs.select('//html').extract() page = " ".join(page) images = [] temp = page.split('class="gallery_demo_unstyled"') temp = temp[1].split('<div class="right_container">') temp = basic.get_middle_text(temp[0], 'src="', '"') for i in range(0, len(temp)): image_url = "http://www.sportmann.no" + temp[i] images.append(image_url) return images
def get_images(self, hxs): page = hxs.select("//html").extract() page = " ".join(page) images = [] temp = page.split('class="gallery_demo_unstyled"') temp = temp[1].split('<div class="right_container">') temp = basic.get_middle_text(temp[0], 'src="', '"') for i in range(0, len(temp)): image_url = "http://www.sportmann.no" + temp[i] images.append(image_url) return images
def get_basic_info(self, hxs): name = hxs.select('//div[@id="fragment-1"]/h2/text()').extract() short_desc = hxs.select( '//div[@class="description2"]/text()').extract() description = hxs.select( '//div[@id="fragment-1"]/div[@class="description"]').extract() description = sportman.delete_tags(re, description[0]) description = [basic.cdata(description)] old_price = hxs.select('//span[@class="oldprice"]/text()').extract() if (old_price != []): old_price = " ".join(old_price) old_price = old_price.split(':') old_price = old_price[1].replace('Kr', '') old_price = [old_price.replace(" ", "")] else: old_price = old_price price = hxs.select('//span[@class="nowprice"]/text()').extract() if (price != []): price = " ".join(price) price = price.split(':') price = price[1].replace('Kr', '') price = [price.replace(" ", "")] else: price = hxs.select('//span[@class="normalprice"]/text()').extract() price = " ".join(price) price = price.split(':') price = price[1].replace('Kr', '') price = [price.replace(" ", "")] id = hxs.select('//div[@class="articlenumber"]').extract() id = " ".join(id) id = id.replace(u"\xa0", "") id = basic.get_middle_text(id, 'Art.nr.', '</div>') sku = id id = [id[0]] return name, short_desc, description, old_price, price, id, sku
def get_basic_info(self, hxs): name = hxs.select('//div[@id="fragment-1"]/h2/text()').extract() short_desc = hxs.select('//div[@class="description2"]/text()').extract() description = hxs.select('//div[@id="fragment-1"]/div[@class="description"]').extract() description = sportman.delete_tags(re, description[0]) description = [basic.cdata(description)] old_price = hxs.select('//span[@class="oldprice"]/text()').extract() if old_price != []: old_price = " ".join(old_price) old_price = old_price.split(":") old_price = old_price[1].replace("Kr", "") old_price = [old_price.replace(" ", "")] else: old_price = old_price price = hxs.select('//span[@class="nowprice"]/text()').extract() if price != []: price = " ".join(price) price = price.split(":") price = price[1].replace("Kr", "") price = [price.replace(" ", "")] else: price = hxs.select('//span[@class="normalprice"]/text()').extract() price = " ".join(price) price = price.split(":") price = price[1].replace("Kr", "") price = [price.replace(" ", "")] id = hxs.select('//div[@class="articlenumber"]').extract() id = " ".join(id) id = id.replace(u"\xa0", "") id = basic.get_middle_text(id, "Art.nr.", "</div>") sku = id id = [id[0]] return name, short_desc, description, old_price, price, id, sku
def get_variants(self, hxs, response): page = hxs.select("//html").extract() page = " ".join(page) dict_one = {} test_one = [] temp = page.split('<div class="color">') temp = temp[1].split("</div>") temp = temp[0].split("<select name") viewstate, eventvalidation, previouspage, hiddenfield, view_page, even_page, pre_page, hidd_page = self.get_vars( response, hxs ) if len(temp) == 1: color = hxs.select('//div[@class="color"]/text()').extract() value = hxs.select('//input[@id="ctl00_ContentPlaceHolder1_Variant1Hidden"]/@value').extract() color[0] = color[0].replace(" ", "") color = basic.clean_string(color[0]) value = value[0] # color = basic.clean_string(color[0]) # color = color.replace(" ","") # # dict['color'] = color # dict['color_value'] = value[0] else: test_color = basic.get_middle_text(temp[1], "farge</option>", "</select>") color = basic.get_middle_text(test_color[0], '">', "</option>") value = basic.get_middle_text(test_color[0], 'value="', '">') for i in range(0, len(color)): color[i] = color[i].replace(" ", "") # # dict['color'] = color # dict['color_value'] = value size_temp = page.split('<div class="size">') size_temp = size_temp[1].split("</div>") size_temp = size_temp[0].split("<select name") if len(size_temp) == 1: size = hxs.select('//div[@class="size"]/text()').extract() size = basic.clean_string(size[0]) size = [size.replace(" ", "")] size_val = hxs.select('//input[@id="ctl00_ContentPlaceHolder1_Variant2Hidden"]/@value').extract() if size[0] == "": for i in range(len(value)): resp_page = self.get_data(response, hidd_page, view_page, pre_page, even_page, value[i]) a_page = resp_page.split('<div class="siz') a_page = a_page[1].split("</select>") if len(a_page) == 1: size = basic.get_middle_text(a_page[0], 'e">', '<input type="hidden"') size_val = basic.get_middle_text(a_page[0], 'value="', '"') size_val = size_val[0] size_val = [size_val] else: a_page = basic.get_middle_text(a_page[0], "se</option>", "</select>") size = basic.get_middle_text(a_page[0], '">', "</option>") size_val = basic.get_middle_text(a_page[0], 'value="', '">') dict_one["color"] = color[i] dict_one["color_value"] = value[i] dict_one["size_value"] = size_val for x in range(0, len(size)): size[x] = basic.clean_string(size[x]) size[x] = size[x].replace(" ", "") dict_one["size"] = size test_one.append(basic.cdata(json.dumps(dict_one))) else: dict_one["color"] = color dict_one["color_value"] = value dict_one["size"] = size dict_one["size_value"] = size_val test_one.append(basic.cdata(simplejson.dumps(dict_one))) else: test_size = basic.get_middle_text(size_temp[1], "se</option>", "</select>") size = basic.get_middle_text(test_size[0], '">', "</option>") size_val = basic.get_middle_text(test_size[0], 'value="', '">') for x in range(0, len(size)): size[x] = basic.clean_string(size[x]) size[x] = size[x].replace(" ", "") dict_one["color"] = color dict_one["color_value"] = value dict_one["size"] = size dict_one["size_value"] = size_val test_one.append(basic.cdata(json.dumps(dict_one))) return test_one
def get_variants(self, hxs, response): page = hxs.select('//html').extract() page = " ".join(page) dict_one = {} test_one = [] temp = page.split('<div class="color">') temp = temp[1].split('</div>') temp = temp[0].split('<select name') viewstate, eventvalidation, previouspage, hiddenfield, view_page, even_page, pre_page, hidd_page = self.get_vars( response, hxs) if (len(temp) == 1): color = hxs.select('//div[@class="color"]/text()').extract() value = hxs.select( '//input[@id="ctl00_ContentPlaceHolder1_Variant1Hidden"]/@value' ).extract() color[0] = color[0].replace(" ", "") color = basic.clean_string(color[0]) value = value[0] # color = basic.clean_string(color[0]) # color = color.replace(" ","") # # dict['color'] = color # dict['color_value'] = value[0] else: test_color = basic.get_middle_text(temp[1], 'farge</option>', '</select>') color = basic.get_middle_text(test_color[0], '">', '</option>') value = basic.get_middle_text(test_color[0], 'value="', '">') for i in range(0, len(color)): color[i] = color[i].replace(" ", "") # # dict['color'] = color # dict['color_value'] = value size_temp = page.split('<div class="size">') size_temp = size_temp[1].split('</div>') size_temp = size_temp[0].split('<select name') if (len(size_temp) == 1): size = hxs.select('//div[@class="size"]/text()').extract() size = basic.clean_string(size[0]) size = [size.replace(" ", "")] size_val = hxs.select( '//input[@id="ctl00_ContentPlaceHolder1_Variant2Hidden"]/@value' ).extract() if size[0] == "": for i in range(len(value)): resp_page = self.get_data(response, hidd_page, view_page, pre_page, even_page, value[i]) a_page = resp_page.split('<div class="siz') a_page = a_page[1].split('</select>') if len(a_page) == 1: size = basic.get_middle_text(a_page[0], 'e">', '<input type="hidden"') size_val = basic.get_middle_text( a_page[0], 'value="', '"') size_val = size_val[0] size_val = [size_val] else: a_page = basic.get_middle_text(a_page[0], 'se</option>', '</select>') size = basic.get_middle_text(a_page[0], '">', '</option>') size_val = basic.get_middle_text( a_page[0], 'value="', '">') dict_one["color"] = color[i] dict_one["color_value"] = value[i] dict_one["size_value"] = size_val for x in range(0, len(size)): size[x] = basic.clean_string(size[x]) size[x] = size[x].replace(" ", "") dict_one["size"] = size test_one.append(basic.cdata(json.dumps(dict_one))) else: dict_one["color"] = color dict_one["color_value"] = value dict_one['size'] = size dict_one['size_value'] = size_val test_one.append(basic.cdata(simplejson.dumps(dict_one))) else: test_size = basic.get_middle_text(size_temp[1], 'se</option>', '</select>') size = basic.get_middle_text(test_size[0], '">', '</option>') size_val = basic.get_middle_text(test_size[0], 'value="', '">') for x in range(0, len(size)): size[x] = basic.clean_string(size[x]) size[x] = size[x].replace(" ", "") dict_one["color"] = color dict_one["color_value"] = value dict_one['size'] = size dict_one['size_value'] = size_val test_one.append(basic.cdata(json.dumps(dict_one))) return test_one
def get_product_id(self, hxs): temp = hxs.select('//div[@id="wrap"]/script/text()').extract() id = basic.get_middle_text(temp[0], 'productid","', '"') return id[0]
def get_all_sizes(self, page): script = basic.get_middle_text(page, 'var distsizeobj=new Array();', 'var indexcolor=0;')[0] all_sizes = basic.get_middle_text(script, ']="','";') return [basic.cdata(simplejson.dumps(all_sizes))]
def get_add_to_cart_id(self, page): """Gets add to cart id from the javascript on the page.""" tmp = basic.get_middle_text(page, "if(isOrderStarted){", "}else")[0] tmp = basic.get_middle_text(tmp, "addItemToCart(", ",") return tmp
def get_review_id(self, page): """Gets review id that is used in javascript for reviews.""" return basic.get_middle_text(page, 'productId: "', '"')[0]
def shl_get_image(self, hxs): page = hxs.extract() image = basic.get_middle_text(page, 'imagesets = "', '";') image = "http://t.express.com/com/scene7/s7d5/=/is/image/expressfashion/%s/i81" % (image[0]) return [image]
def get_all_sizes(self, page): script = basic.get_middle_text(page, 'var distsizeobj=new Array();', 'var indexcolor=0;')[0] all_sizes = basic.get_middle_text(script, ']="', '";') return [basic.cdata(simplejson.dumps(all_sizes))]