def parse(self, sync, *args, **kwargs): file = self.config['URL'] if '_local_' in kwargs: file = 'feeds/data/%s.%s' % (self.config['ACCOUNT'], self.config['TYPE'].lower()) else: # get the file and save it path = '%s/%s-%d.%s' % (settings.FEEDS_ROOT, self.config['ACCOUNT'], sync.id, self.config['TYPE'].lower()) urllib.urlretrieve(self.config['URL'], path) file = path # clean the file import fileinput for line in fileinput.input(file, inplace=1): keys_with_spaces = ['Product Name', 'List price', 'Sell price', 'All terms', 'Stock Level', 'Publisher Reference', 'Post date', 'Updated date'] for key in keys_with_spaces: line = line.replace(key, key.replace(' ','_')) print line, ack_brand = Brand.objects.get(id=19) tinkle_brand = Brand.objects.get(id=28) karadi_brand = Brand.objects.get(id=29) books_and_comics = Category.objects.get(name='Books & Comics') xmldoc = etree.parse(file) products = [] for product in xmldoc.xpath("/xml/node"): data = dict(cleaned_data=self.get_default_cleaned_data()) no_html_fields = ['List_price','Sell_price', 'Product_Name','Description','Path'] # single value fields single_value_fields = ['ISBN','Product_Name','Description','List_price','Sell_price', 'international','Path','Nid','Published'] for field in single_value_fields: data[field] = '' node = product.xpath(field.split(' ')[0]) if node and len(node) > 0 and node[0].text: data[field] = node[0].text if field in no_html_fields: data[field] = htmlutils.to_text(node[0].text) if data.get('Published','').lower() == 'no': continue if 'subscription' in data.get('Product_Name','').lower() and 'tinkle' in data.get('Product_Name','').lower(): print 'adding tinkle subscription' else: print 'skipping other ack products' continue # ignore blaclists if self.is_blacklisted_sku(data['Nid']): continue # create cleaned data data['cleaned_data']['brand'] = tinkle_brand data['cleaned_data']['category'] = books_and_comics data['cleaned_data']['sku'] = data['Nid'] data['cleaned_data']['model'] = data['ISBN'] or '' data['cleaned_data']['title'] = data['Product_Name'] data['cleaned_data']['image_url'] = [self.get_image_url(data['Path'])] data['cleaned_data']['shipping_duration'] = '4-6 Weeks' data['cleaned_data']['offer_price'] = Decimal(self.get_text(data['Sell_price']).replace(',','').replace('Rs','')) data['cleaned_data']['list_price'] = Decimal(self.get_text(data['List_price']).replace('Rs','').replace(',','')) data['cleaned_data']['description'] = data['Description'] data['cleaned_data']['availability'] = AvailabilityMap.objects.get( applies_to = 'account', account = self.config['ACCOUNT']).availability products.append(data) print len(products) return products
def parse(self, sync, *args, **kwargs): file = self.config['URL'] if '_local_' in kwargs: file = 'feeds/data/%s.%s' % (self.config['ACCOUNT'], self.config['TYPE'].lower()) else: # get the file and save it path = '%s/%s-%d.%s' % (settings.FEEDS_ROOT, self.config['ACCOUNT'], sync.id, self.config['TYPE'].lower()) urllib.urlretrieve(self.config['URL'], path) file = path xmldoc = etree.parse(file) products = [] for product in xmldoc.xpath("/root/products"): data = dict(cleaned_data=self.get_default_cleaned_data()) no_html_fields = ['Brand_Name','categories_name'] # single value fields single_value_fields = ['SKU','Title','Features','Specification', 'Overview','Brand_Name','Image_URL','categories_name', 'Warranty_Period','Offer_Price','MRP','Shipping_Duration'] for field in single_value_fields: data[field] = '' node = product.xpath(field) if node and len(node) > 0: data[field] = node[0].text if field in no_html_fields: data[field] = htmlutils.to_text(node[0].text) # ignore blaclists if self.is_blacklisted_brand(data['Brand_Name']): continue if self.is_blacklisted_sku(data['SKU']): continue if self.is_blacklisted_category(data['categories_name']): continue # create cleaned data data['cleaned_data']['brand_mapping'] = self.get_brand_mapping(data['Brand_Name']) data['cleaned_data']['category_mapping'] = self.get_category_mapping(data['categories_name']) data['cleaned_data']['sku'] = data['SKU'] data['cleaned_data']['brand'] = self.get_brand_mapping(data['Brand_Name']).mapped_to data['cleaned_data']['category'] = self.get_category_mapping(data['categories_name']).mapped_to data['cleaned_data']['model'] = self.get_model_name(data['SKU']) data['cleaned_data']['title'] = data['Title'] data['cleaned_data']['image_url'] = [data['Image_URL']] data['cleaned_data']['shipping_duration'] = data['Shipping_Duration'] or '8-10 Working Days' data['cleaned_data']['offer_price'] = Decimal(data['Offer_Price']) if data['MRP'].replace('.','').replace('0',''): data['cleaned_data']['list_price'] = Decimal(data['MRP']) else: data['cleaned_data']['list_price']= Decimal(data['Offer_Price']) data['cleaned_data']['description'] = 'Overview\n\n%sFeatures\n\n%sSpecs\n\n%s' % ( striptags(data['Overview']), striptags(data['Features']), striptags(data['Specification'])) data['cleaned_data']['availability'] = AvailabilityMap.objects.get( applies_to = 'account', account = self.config['ACCOUNT']).availability products.append(data) return products
def parse(self, sync, *args, **kwargs): file = self.config['URL'] if '_local_' in kwargs: file = 'feeds/data/%s.%s' % (self.config['ACCOUNT'], self.config['TYPE'].lower()) else: # get the file and save it path = '%s/%s-%d.%s' % (settings.FEEDS_ROOT, self.config['ACCOUNT'], sync.id, self.config['TYPE'].lower()) urllib.urlretrieve(self.config['URL'], path) file = path xmldoc = etree.parse(file) products = [] for product in xmldoc.xpath("/root/products"): data = dict(cleaned_data=self.get_default_cleaned_data()) no_html_fields = ['Brand_Name', 'categories_name'] # single value fields single_value_fields = [ 'SKU', 'Title', 'Features', 'Specification', 'Overview', 'Brand_Name', 'Image_URL', 'categories_name', 'Warranty_Period', 'Offer_Price', 'MRP', 'Shipping_Duration' ] for field in single_value_fields: data[field] = '' node = product.xpath(field) if node and len(node) > 0: data[field] = node[0].text if field in no_html_fields: data[field] = htmlutils.to_text(node[0].text) # ignore blaclists if self.is_blacklisted_brand(data['Brand_Name']): continue if self.is_blacklisted_sku(data['SKU']): continue if self.is_blacklisted_category(data['categories_name']): continue # create cleaned data data['cleaned_data']['brand_mapping'] = self.get_brand_mapping( data['Brand_Name']) data['cleaned_data'][ 'category_mapping'] = self.get_category_mapping( data['categories_name']) data['cleaned_data']['sku'] = data['SKU'] data['cleaned_data']['brand'] = self.get_brand_mapping( data['Brand_Name']).mapped_to data['cleaned_data']['category'] = self.get_category_mapping( data['categories_name']).mapped_to data['cleaned_data']['model'] = self.get_model_name(data['SKU']) data['cleaned_data']['title'] = data['Title'] data['cleaned_data']['image_url'] = [data['Image_URL']] data['cleaned_data']['shipping_duration'] = data[ 'Shipping_Duration'] or '8-10 Working Days' data['cleaned_data']['offer_price'] = Decimal(data['Offer_Price']) if data['MRP'].replace('.', '').replace('0', ''): data['cleaned_data']['list_price'] = Decimal(data['MRP']) else: data['cleaned_data']['list_price'] = Decimal( data['Offer_Price']) data['cleaned_data'][ 'description'] = 'Overview\n\n%sFeatures\n\n%sSpecs\n\n%s' % ( striptags(data['Overview']), striptags( data['Features']), striptags(data['Specification'])) data['cleaned_data']['availability'] = AvailabilityMap.objects.get( applies_to='account', account=self.config['ACCOUNT']).availability products.append(data) return products
def parse(self, sync, *args, **kwargs): file = self.config["URL"] if "_local_" in kwargs: file = "feeds/data/%s.%s" % (self.config["ACCOUNT"], self.config["TYPE"].lower()) else: # get the file and save it path = "%s/%s-%d.%s" % (settings.FEEDS_ROOT, self.config["ACCOUNT"], sync.id, self.config["TYPE"].lower()) urllib.urlretrieve(self.config["URL"], path) file = path # clean the file import fileinput for line in fileinput.input(file, inplace=1): keys_with_spaces = [ "Product Name", "List price", "Sell price", "All terms", "Stock Level", "Publisher Reference", "Post date", "Updated date", ] for key in keys_with_spaces: line = line.replace(key, key.replace(" ", "_")) print line, ack_brand = Brand.objects.get(id=19) tinkle_brand = Brand.objects.get(id=28) karadi_brand = Brand.objects.get(id=29) books_and_comics = Category.objects.get(name="Books & Comics") xmldoc = etree.parse(file) products = [] for product in xmldoc.xpath("/xml/node"): data = dict(cleaned_data=self.get_default_cleaned_data()) data["cleaned_data"]["status"] = "active" no_html_fields = ["List_price", "Sell_price", "Product_Name", "Description", "Path"] # single value fields single_value_fields = [ "ISBN", "Product_Name", "Description", "List_price", "Sell_price", "international", "Path", "Nid", "Published", ] for field in single_value_fields: data[field] = "" node = product.xpath(field.split(" ")[0]) if node and len(node) > 0 and node[0].text: data[field] = node[0].text if field in no_html_fields: data[field] = htmlutils.to_text(node[0].text) if data.get("Published", "").lower() == "no": continue if ( "subscription" in data.get("Product_Name", "").lower() and "tinkle" in data.get("Product_Name", "").lower() ): print "skipping tinkle subscription" continue if "robinage" in data.get("Product_Name", "").lower(): data["cleaned_data"]["shipping_duration"] = "15-20 Working Days" elif "brainwave" in data.get("Product_Name", "").lower(): data["cleaned_data"]["shipping_duration"] = "4-6 Weeks" else: data["cleaned_data"]["shipping_duration"] = "7-10 Working Days" # ignore blaclists if self.is_blacklisted_sku(data["Nid"]): continue # create cleaned data data["cleaned_data"]["brand"] = ack_brand if "karadi" in data["Product_Name"].lower(): data["cleaned_data"]["brand"] = karadi_brand if "tinkle" in data["Product_Name"].lower(): data["cleaned_data"]["brand"] = tinkle_brand data["cleaned_data"]["category"] = books_and_comics data["cleaned_data"]["sku"] = data["Nid"] data["cleaned_data"]["model"] = data["ISBN"] or "" data["cleaned_data"]["title"] = data["Product_Name"] data["cleaned_data"]["image_url"] = [self.get_image_url(data["Path"])] # data['cleaned_data']['shipping_duration'] = '7-10 Working Days' data["cleaned_data"]["list_price"] = Decimal( self.get_text(data["List_price"]).replace(",", "").replace("Rs", "") ) data["cleaned_data"]["offer_price"] = Decimal( self.get_text(data["Sell_price"]).replace("Rs", "").replace(",", "") ) data["cleaned_data"]["description"] = data["Description"] data["cleaned_data"]["availability"] = AvailabilityMap.objects.get( applies_to="account", account=self.config["ACCOUNT"] ).availability products.append(data) print len(products) return products