def add_prices(self, bundle, prices): from dateutil.parser import parse if prices: bundle.obj.prices.all().delete() for price_dict in prices: price_dict["unit"] = UNIT.enum_dict[price_dict["unit"]] price_dict["started_at"] = parse(price_dict["started_at"]) price_dict["ended_at"] = parse(price_dict["ended_at"]) price = Price(product=bundle.obj, **price_dict) price.save()
def update_product_prices(request): from dateutil import parser try: if request.method == 'POST': if not is_authenticated(request): return HttpUnauthorized() data = json.loads(request.body) ids = [s.split("/")[-2] for s in data["products"]] products = Product.objects.filter(id__in=ids) prices_dicts = [{ "unit": UNIT.enum_dict[p["unit"]], "amount": p["amount"], "currency": p["currency"], "started_at": parser.parse(p["started_at"]).date(), "ended_at": parser.parse(p["ended_at"]).date() } for p in data["prices"]] for product in products: product.prices.all().delete() for price_dict in prices_dicts: Price(product=product, **price_dict).save() return HttpResponse() else: return HttpBadRequest() except Exception: traceback.print_exc()
def _product_crawler(self): from products.models import Product, Picture, Price def _to_decimal(s): from decimal import Decimal as D return D(s.strip().replace(u'€', '').replace(',', '.').replace(' ', '')) while True: try: product_url, category = self.product_links.popitem() except KeyError: break product_url = quote(product_url) try: with closing(urlopen(self.base_url + product_url)) as product_page: product_soup = BeautifulSoup(product_page, 'html.parser') except HTTPError: print 'error loading page for object at url', self.base_url + product_url image_url = product_soup.find('div', id='photo').a.get('href') image_url = quote(image_url) infosProduits = product_soup.find('div', id='infosProduits') price = infosProduits.find('div', id='prix').text price = _to_decimal(price) deposit_amount = infosProduits.find( 'div', id='composition').find('strong').text deposit_amount = _to_decimal(deposit_amount) description = infosProduits.find('p', class_='expandable').text composition = infosProduits.find('div', id='composition') description += composition.h2.text description += '\n' description += composition.p.text summary = infosProduits.h1.text from products.models import Category, Price from products.choices import UNIT product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get(slug=category_mapping[category])) try: with closing(urlopen(self.base_url + image_url)) as image: product.pictures.add( Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()))) except HTTPError as e: print '\nerror loading image for object at url:', self.base_url + product_url product.prices.add(Price(amount=price, unit=UNIT.DAY)) sys.stdout.write('.') sys.stdout.flush()
def handle(self, *args, **options): from products.models import Picture, Price, Category, Product from products.choices import UNIT from accounts.models import Patron try: patron = Patron.objects.get(pk=22784) address = patron.addresses.all()[0] except Patron.DoesNotExist: print "Can't find the user" return if len(args) != 1: print "I need exactly one argument" return with open(args[0]) as xlsx: sheet = xlrd.open_workbook(file_contents=xlsx.read()).sheets()[0] rows = iter(xrange(sheet.nrows)) header = tuple(next_row(sheet, next(rows))) # the header line next_row(sheet, next(rows)) # the emtpy line for row in iter(rows): while True: try: product_row = dict(zip(header, next_row(sheet, row))) image_url = product_row["photo"] summary = product_row["titre"] description = product_row["description"] category = Category.objects.get( slug=product_row["categorie"]) product = Product.objects.create( summary=summary, description=description, deposit_amount=0, category=category, address=address, owner=patron) product.prices.add(Price(amount=1, unit=UNIT.DAY)) try: with closing(urlopen(image_url)) as image: picture = Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read())) product.pictures.add(picture) except HTTPError as e: print "error" product.save() except: break else: break
def _obj_process_fields(self, product, picture_data, day_price_data): # Create the picture object if there is a picture in the request if picture_data: picture = Picture(product=product) # Write the image content to a file img_path = upload_to(picture, "") img_file = ContentFile(decodestring(picture_data)) img_path = default_storage.save(img_path, img_file) picture.image.name = img_path picture.save() # Add a day price to the object if there isnt any yet if day_price_data: Price(product=product, unit=1, amount=D(day_price_data)).save()
def _product_crawler(self): from products.models import Product, Picture, Price # Return the price in the right format def _to_decimal(s): from decimal import Decimal as D return D(s.strip().replace(u'€', '').replace(',', '.').replace(' ', '')) while True: try: product_url, category = self.product_links.popitem() except KeyError: break try: with closing(urlopen(product_url)) as product_page: product_soup = BeautifulSoup(product_page, 'html.parser') except HTTPError: print 'error loading page for object at url', self.base_url + product_url #no need to parse all the page block = product_soup.find('div', id="content") # Get the title infosProduits = block.find('h1', class_="product_title entry-title").text # Get the description try: description = block.find('div', id='tab-description').find('p').text except: description = 'odessance' # Format the title summary = infosProduits deposit_amount = _to_decimal('0.0') details = block.find('div', class_="short-description").find('div', class_='std').text # description += '\n %s' % details # Create the product from products.models import Category, Price from products.choices import UNIT try: product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get(slug=category_mapping[category])) try: images = block.find('ul', class_="product_thumbnails").find_all('li') #this way we get all the four images main_img = images[0].find('a').get('href') with closing(urlopen(main_img)) as image: product.pictures.add(Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()) ) ) except HTTPError as e: print '\nerror loading image for object at url:', self.base_url + product_url # Add the price to the product try: regex = re.compile(r"(?<=Tarif location\W:\W)\d+") price = regex.search(details) price = _to_decimal(price.group(0)) product.prices.add(Price(amount=price, unit=UNIT.DAY)) sys.stdout.flush() except: print 'ERROR PRICE' pass print 'PRODUCT SUCCESSFULY CREATED' except: print 'PRODUCT CANNOT BE CREATED' pass
def _get_products(self): from products.models import Product, Picture, Price for x in xrange(len(self.products['data'])): # Get the product title summary = self.products['data'][x]['title_article'] # Get the product price price = self.products['data'][x]['prix'] # Get the product description nb_player = 'Nombre de joueurs: ' + self.products['data'][x][ 'nbjoueursmin'] + '-' + self.products['data'][x][ 'nbjoueursmax'] + '\n' game_time = 'Temps(minutes): ' + self.products['data'][x][ 'temps_minutes'] + '\n' age_min = 'Age minimum: ' + self.products['data'][x][ 'agemin'] + '\n' game_genre = 'Genre: ' + self.products['data'][x][ 'title_genre'] + '\n' description = nb_player + game_time + age_min + game_genre + self.products[ 'data'][x]['descr'] #print description # Get the product image image_url = self.image_med_base_url + self.products['data'][x][ 'imgsrc'] deposit_amount = 0.0 from products.models import Category from products.choices import UNIT try: product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get(slug=self.category)) try: with closing(urlopen(image_url)) as image: product.pictures.add( Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()))) except HTTPError as e: print '\nerror loading image for object at url:', image_url print summary try: product.prices.add(Price(amount=price, unit=UNIT.DAY)) except Exception, e: print 'PRICE ERROR' pass except Exception, e: print 'CANNOT CREATE THE PRODUCT %s \n' % (summary) print 'error: %s' % str(e) break
def handle_import(self, *args, **options): credentials_keys = ( 'user', 'password', 'host', 'port', 'database', ) credentials = { k: v for k, v in options.items() if k in credentials_keys } dry_run = options['dryrun'] export_skipped = options['export-skipped'] lp = int(options['limit-products']) lu = int(options['limit-users']) cat_id = int(options['category']) level = options['level'] owner = options['owner'] user_prg = 0 prod_prg = 0 user_total = 0 prod_total = 0 skipped_total = 0 slug_attempt = 0 product_desc_template = Template(self.PRODUCT_DESC_TEMPLATE) transaction.set_autocommit(False) try: self.stdout.write( "Connecting to {user}:*****@{host}:{port}/{database} ...". format(**credentials)) cnx = mysql.connector.connect(**credentials) c = cnx.cursor() del options['password'] except: self.stderr.write("Could not establish connection " + "to the rentalcompare database. Cause:") raise el_c = connection.cursor() try: DIVERS_CAT = Category.objects.get(id=cat_id) ELOUE_SITE = Site.objects.get(id=ELOUE_SITE_ID) propack = ProPackage.objects.create(name=self.PRO_PACKAGE_NAME, price=0) ir = ImportRecord.objects.create(origin=self.ORIGIN, file_name=self.FILENAME, imported_at=datetime.now()) imported_users_ids = self.get_imported_users_qs().values_list( 'original_id', flat=True) imported_products_ids = self.get_imported_products_qs( ).values_list('original_id', flat=True) filters = [] if level: filters.append("level='%s'" % level) if owner: filters.append("username='******'" % owner) where = ("where " + " ".join(filters)) if filters else "" c.execute("select count(*) from ob_users " + where) (user_count, ) = c.fetchone() user_count = min(user_count, lu) if user_count == 1: self.stdout.write("Importing user " + owner) elif user_count > 1: self.stdout.write("Importing %s users (type=%s)" % ( user_count, level if level else "*", )) else: raise Exception("No users correspond to your criteria") c.execute( "select * from ob_users " + where + " order by registered desc limit %s", (lu, )) RcUser = self.get_user_type(c.column_names) chunk = c.fetchmany(size=self.USERS_CHUNK_SIZE) username_counts = {} while len(chunk) > 0: for rc_user in imap(RcUser._make, chunk): user_prg = user_prg + 1 # if rc_user.id in imported_users_ids: # continue email_exists = Patron.objects.exists(email=rc_user.email) mess = "Importing user %5s out of %5s: %-50s" % \ (user_prg, user_count, ("reusing " if email_exists else "") + rc_user.username, ) self.stdout.write(mess, ending='\r') if not rc_user.email: # TODO do not skip w/o emails self.skip_user(rc_user, "email missing") continue if not rc_user.email: # TODO do not skip w/o emails self.skip_user(rc_user, "email missing") continue # PRO: # Nom entrprise company # Email email # Nom client # Username username # is_prof=true # # tel, phonenumber # addr # site # logo entrepr # desc entrepr #general_email ? u = { 'username': rc_user.username[:30], 'password': rc_user.password, 'email': rc_user.email, 'first_name': rc_user.display_name[:30], 'last_name': rc_user.lastname[:30], 'url': "http://%s" % rc_user.website if rc_user.website is not None else "", 'about': rc_user.about_me, 'import_record': ir, 'original_id': rc_user.id, 'avatar': rc_user.logo, # '': , } # addresse(s) if rc_user.level == "vendor": if rc_user.phonenumber is None: self.skip_user(rc_user, "pro with no phone") continue if not rc_user.has_address: self.skip_user(rc_user, "pro with no address") continue # pro-specific fields u.update({ 'company_name': rc_user.company[:50], 'is_professional': True, }) elif rc_user.level == "user": pass # if email_exists: # # TODO do not skip existing emails # self.skip_user(rc_user, "email exists") # continue # TODO handle unallowed characters if not email_exists: username_counts[rc_user.username] = 0 username_base = rc_user.username if len( rc_user.username) <= 27 else rc_user.username[:27] new_username = rc_user.username[:30] while Patron.objects.exists(username=new_username): username_counts[ rc_user.username] = username_counts[ rc_user.username] + 1 new_username = username_base + '_' + str( username_counts[rc_user.username]) u['username'] = new_username user = Patron(**u) user.init_slug() user.slug = user.slug[:50] slug_base = user.slug if len( user.slug) <= 47 else user.slug[:47] slug_attempt = 0 while Patron.objects.exists(slug=user.slug): # TODO generate new slug properly slug_attempt = slug_attempt + 1 user.slug = slug_base + "-" + str(slug_attempt) else: user = Patron.objects.get(email=rc_user.email) if rc_user.id not in imported_users_ids: if rc_user.phonenumber is not None: user.default_number = PhoneNumber( patron=user, number=rc_user.phonenumber, kind=PHONE_TYPES.OTHER) user.save() # user address if rc_user.has_address: # TODO refactor condition addr = rc_user.make_address(user) addr.save() user.default_address = addr user.save() if rc_user.level == "vendor": # boutique(s) # TODO refactor into User.build_boutique if rc_user.id not in imported_users_ids: agency = ProAgency.objects.create( patron=user, name=rc_user.company, phone_number=user.default_number, address1=addr.address1, address2=addr.address2, zipcode=addr.zipcode, city=addr.city, state=addr.state, country=addr.country) Subscription.objects.create(patron=user, propackage=propack) else: agency = ProAgency.objects.filter( patron=user).first() # products c.execute( "select count(*) from ob_products where vendor_id=%(user_id)s limit %(quantity)s;", { 'user_id': rc_user.id, 'quantity': lp }) (prod_count, ) = c.fetchone() prod_count = min(prod_count, lp) c.execute( "select * from ob_products where vendor_id=%(user_id)s order by date_lastaction desc limit %(quantity)s;", { 'user_id': rc_user.id, 'quantity': lp }) RcProduct = self.get_product_type(c.column_names) class RcPreparedProduct(): @classmethod def _make(clazz, iterable): res = RcProduct._make(iterable) res = res._replace(tags=', '.join( sorted( list( set(tag.strip() for tag in res.tags.split(',') if tag))))) return res prod_chunk = c.fetchmany( size=min(self.PRODUCTS_CHUNK_SIZE, lp)) products = [] prices = [] pictures = [] prod_cat = [] prod_prg = 0 prod_skipped = 0 while (len(prod_chunk) > 0): # prepare products and related objects for bulk save el_c.execute( "select nextval('products_product_id_seq')" + " from generate_series(1,%(prod_count)s)", {'prod_count': len(prod_chunk)}) for (rc_product, (alloc_id, )) in izip( imap(RcPreparedProduct._make, prod_chunk), el_c): if rc_product.id in imported_products_ids: prod_skipped = prod_skipped + 1 continue p = { "id": alloc_id, #owner "owner": user, # desc/summary "summary": rc_product.name, "description": product_desc_template\ .render(Context(rc_product._asdict())).strip(), #TODO Add all attributes # TODO caution "deposit_amount": 0, # currency "currency": rc_user.currency, #TODO handle 0 # addresse = "address": user.default_address, # phone = "phone": user.default_number, # qty "quantity": rc_product.qty if rc_product.qty is not None else 0, # category "category": DIVERS_CAT, #TODO add real category, "import_record": ir, "original_id": rc_product.id, "created_at":datetime.combine(rc_product.date_added, datetime_orig.time.min), } product = Product(**p) product.prepare_for_save() products.append(product) prod_cat.append( Product2Category(product=product, category=DIVERS_CAT, site=ELOUE_SITE)) if rc_product.price: prices.append( Price(product_id=alloc_id, amount=rc_product.price, currency=rc_user.currency, unit=UNIT.DAY)) if rc_product.price_weekly: prices.append( Price(product_id=alloc_id, amount=rc_product.price_weekly, currency=rc_user.currency, unit=UNIT.WEEK)) if rc_product.primary_photo: pictures.append( Picture(product_id=alloc_id, image=rc_product.primary_photo, created_at=datetime.now())) # bulk save products and related objects Product.objects.bulk_create(products) Product2Category.objects.bulk_create(prod_cat) ELOUE_SITE.products.add(*products) agency.products.add(*products) Price.objects.bulk_create(prices) Picture.objects.bulk_create(pictures) # get next chunk of products prod_prg = prod_prg + len(prod_chunk) skipped_total = skipped_total + prod_skipped prod_chunk = c.fetchmany( size=min(self.PRODUCTS_CHUNK_SIZE, lp)) products = [] prod_cat = [] prices = [] pictures = [] self.stdout.write( "\rImporting products for user %s: %s / %s (%s skipped)" % ( rc_user.username, prod_prg, prod_count, prod_skipped, ), ending='\r') prod_total = prod_total + prod_prg ELOUE_SITE.patrons.add(user) ir.patrons.add(user) user_total = user_total + 1 chunk = c.fetchmany(size=self.USERS_CHUNK_SIZE) users = [] self.stdout.write("\n") self.stdout.write("\rTotal users imported: %s" % (user_total, ), ending='\n') self.stdout.write("\rTotal products imported: %s" % (prod_total - skipped_total, ), ending='\n') self.stdout.write("\rTotal products skipped: %s" % (skipped_total, ), ending='\n') if dry_run: self.stdout.write( self.style.NOTICE("Dry run was enabled, rolling back."), ending='\n') transaction.rollback() else: self.stdout.write("Committing...", ending='\n') transaction.commit() c.close() cnx.close() except: transaction.rollback() self.stderr.write("\nGot an error, rolling back. Cause:", ending='\n') raise self.stdout.write("Import done.", ending='\n') if export_skipped: import json def date_handler(obj): return obj.isoformat() if hasattr(obj, 'isoformat') else obj self.stdout.write("Saving skipped objects...", ending='\n') f = open("skipped_users.json", "w") json.dump(self.skipped_users, f, default=date_handler, indent=1) f.close() f = open("skipped_products.json", "w") json.dump(self.skipped_products, f, default=date_handler, indent=1) f.close() f = open("skipped_logos.json", "w") json.dump(self.skipped_logos, f, default=date_handler, indent=1) f.close() self.stdout.write("Done.", ending='\n')
def handle(self, *args, **options): from products.models import Picture, Price, Product, Category from accounts.models import Patron try: patron = Patron.objects.get(username='******') address = patron.addresses.all()[0] except Patron.DoesNotExist: print "Can't find user 'spotsound'" return if len(args) != 1: print 'I need exactly one argument, ' return with open(args[0]) as xlsx: sheet = xlrd.open_workbook(file_contents=xlsx.read()).sheets()[0] rows = iter(xrange(sheet.nrows)) header = tuple(next_row(sheet, next(rows))) # the header line next_row(sheet, next(rows)) # the emtpy line find = etree.XPath("//*[@id='image-block']/a/img") for row in iter(rows): while True: try: product_row = dict(zip(header, next_row(sheet, row))) print row, product_row['nom de la photo'] url = product_row['nom de la photo'] request = urllib2.Request(url) request.add_header('Accept-Encoding', 'gzip,deflate') response = urllib2.urlopen(request) if response.info().get('Content-Encoding') == 'gzip': response = io.BytesIO(response.read()) response = gzip.GzipFile(fileobj=response) html = etree.parse(response, parser=etree.HTMLParser(encoding='utf-8', recover=True, remove_comments=True)) img = find(html) request = urllib2.Request(BASE_URL+img[0].attrib['src']) response = urllib2.urlopen(request) if response.info().get('Content-Encoding') == 'gzip': response = io.BytesIO(response.read()) response = gzip.GzipFile(fileobj=response) picture = Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=response.read() ) ) product = Product( summary=product_row['titre'], deposit_amount=product_row['caution'], description = product_row['description'], address = address, quantity = product_row[u'quantité'].replace('.0', ''), owner=patron, category=Category.objects.get(pk=category_mapping[product_row[u'catégorie']]) ) product.save() day_price = Price(unit=1, amount=product_row[u'Pirx journée']) we_price = Price(unit=2, amount=product_row[u'Prix week end']) week_price = Price(unit=3, amount=product_row[u'Prix semaine']) product.pictures.add(picture) product.prices.add(day_price, we_price, week_price) except (urllib2.HTTPError, urllib2.URLError, httplib.BadStatusLine) as e: log.exception("Exception {0} occured, retry ...".format(e)) continue else: break
#print "try upload image" with closing(urlopen(image_url)) as image: product.pictures.add(Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()) ) ) #print "picture : %s" % product.pictures.all()[0] except HTTPError as e: print '\nerror loading image for object at url:', self.base_url + product_url # Add the price to the product if self.price_tag: try: if price: product.prices.add(Price(amount=price, unit=UNIT.DAY)) #print "price : %s" % product.prices.all()[0] except Exception, e: print 'PRICE ERROR' pass # sys.stdout.write('.') # sys.stdout.flush() except Exception, e: print 'CANNOT CREATE THE PRODUCT %s \n %s' % (summary, product_url) print 'error: %s' % str(e) pass print "\n %s products created" % self.patron.products.all().count()
#print "try upload image" with closing(urlopen(image_url)) as image: product.pictures.add( Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()))) #print "picture : %s" % product.pictures.all()[0] except HTTPError as e: print '\nerror loading image for object at url:', self.base_url + product_url # Add the price to the product if self.price_tag: try: if price: product.prices.add( Price(amount=price, unit=UNIT.DAY)) #print "price : %s" % product.prices.all()[0] except Exception, e: print 'PRICE ERROR: %s' % str(e) pass # sys.stdout.write('.') # sys.stdout.flush() except Exception, e: print 'CANNOT CREATE THE PRODUCT %s \n %s' % (summary, product_url) print 'error: %s' % str(e) pass print "\n %s products created" % self.patron.products.all().count()
def _product_crawler(self): from products.models import Product, Picture, Price # Return the price in the right format def _to_decimal(s): from decimal import Decimal as D return D(s.strip().replace(u'€', '').replace(',', '.').replace(' ', '')) #print self.product_links while True: try: product_url, category = self.product_links.popitem() except KeyError: break #print product_url #product_url = quote(product_url) try: with closing(urlopen(product_url)) as product_page: product_soup = BeautifulSoup(product_page, 'html.parser') except HTTPError: print 'error loading page for object at url', product_url #Get the image try: image_url = product_soup.find( 'div', class_="imgBox").find('img').get('src') #print "image_url : %s" % image_url except: print "pass image" pass #Get the title try: summary = product_soup.find('h1').text #print "summary : %s" % summary except: print "pass title" pass # Get the description try: description = product_soup.find('p', class_='description').text #print "description : %s" % description except: description = " " print 'pass description' pass # Get the price try: price = product_soup.find('div', class_='right').find( 'table', class_='particuliers').findAll('td')[-3].text price = _to_decimal(price) #print "price : %s" % price except: price = "10.00" print 'pass price' pass # Create deposit deposit_amount = 0.0 # Create the product from products.models import Category, Price from products.choices import UNIT try: #print "try create" product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get( slug=category_mapping[category])) #print "product_id : %s" % product.pk try: #print "try upload image" with closing(urlopen(image_url)) as image: product.pictures.add( Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()))) #print "picture : %s" % product.pictures.all()[0] except HTTPError as e: print '\nerror loading image for object at url:', self.base_url + product_url # Add the price to the product try: product.prices.add(Price(amount=price, unit=UNIT.DAY)) #print "price : %s" % product.prices.all()[0] except: print 'PRICE ERROR' pass # sys.stdout.write('.') # sys.stdout.flush() except: print 'CANNOT CREATE THE PRODUCT %s \n %s' % (summary, product_url) pass print "\n %s products created" % self.patron.products.all().count()
def _subpage_crawler(self): from products.models import Product, Picture, Price """Create the list of products by finding the link of each product page""" def _to_decimal(s): from decimal import Decimal as D return D(s.strip().replace(u'€', '').replace(',', '.').replace(' ', '')) while True: try: family = self.product_families.pop() except IndexError: break # print self.base_url + family with closing(urlopen(self.base_url + family)) as product_list_page: product_list_soup = BeautifulSoup(product_list_page, 'html.parser') if product_list_soup.find('ul', id='photogallery_listlarge_items'): product_list = product_list_soup.find( 'ul', id='photogallery_listlarge_items').findAll('li') # print 'Found %d object' % len(product_list) self.total += len(product_list) for product in product_list: infosProduits = product.find( 'dt', class_="item_title").find('a').get('title') image_url = product.find('a').get('href') try: product.find('dd') description = product.find('dd').text # print description except: description = '' # long way to go to get the price... try: price = re.search(u'[0-9]+(\.[0-9]+)?\s?\u20AC', description) price = price.group(0) price = _to_decimal(price) self.price_found += 1 except: try: price = re.search( u'[0-9]+(\.[0-9]+)?\s?\u20AC', infosProduits) price = price.group(0) price = _to_decimal(price) self.price_found += 1 except: pass pass summary = infosProduits deposit_amount = 0.0 # Create the product from products.models import Category, Price from products.choices import UNIT try: product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get( slug=category_mapping[family])) try: with closing(urlopen(image_url)) as image: product.pictures.add( Picture.objects.create( image=uploadedfile. SimpleUploadedFile( name='img', content=image.read()))) except HTTPError as e: print '\nerror loading image for object at url:', self.base_url + product_url # Add the price to the product try: price = _to_decimal(description) product.prices.add( Price(amount=price, unit=UNIT.DAY)) sys.stdout.flush() except: print 'PRICE ERROR' pass except: print 'CANNOT CREATE THE PRODUCT' pass
def _product_crawler(self): from products.models import Product, Picture, Price # Return the price in the right format def _to_decimal(s): from decimal import Decimal as D return D(s.strip().replace(u'€', '').replace(',', '.').replace(' ', '')) while True: try: product_url, category = self.product_links.popitem() except KeyError: break # product_url = quote(product_url) try: with closing(urlopen(self.base_url + product_url)) as product_page: product_soup = BeautifulSoup(product_page, 'html.parser') except HTTPError: print 'error loading page for object at url', self.base_url + product_url # Get the image image_url = product_soup.find('form', id='caddy').find('img').get('src') image_url = quote(image_url) # Get the title infosProduits = product_soup.find('form', id='caddy').find('h1').text print infosProduits # Get the price # price = product_soup.find('div', class_='prix-val').text prices = product_soup.find('table', class_='bloc-tarif-part').find_all('td', class_='detail_prix') price = prices[0].text price = _to_decimal(price) # Get the description description = product_soup.find('div', id='descrlongue').text if len(prices) == 5: description += '\n' + prices[4].text + u'€' # Format the title summary = infosProduits deposit_amount = 0.0 # Create the product from products.models import Category, Price from products.choices import UNIT product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get(slug=category_mapping[category])) try: with closing(urlopen(self.base_url + image_url)) as image: product.pictures.add(Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()) ) ) except HTTPError as e: print '\nerror loading image for object at url:', self.base_url + product_url # Add the price to the product product.prices.add(Price(amount=price, unit=UNIT.DAY)) sys.stdout.write('.') sys.stdout.flush()
def _subpage_crawler(self): from products.models import Product, Picture, Price # Return the price in the right format def _to_decimal(s): from decimal import Decimal as D return D(s.strip().replace(u'€', '').replace(',', '.').replace(' ', '')) """Create the list of products by finding the link of each product page""" while True: try: family = self.product_families.pop() category = family except IndexError: break try: with closing(urlopen(self.base_url + family)) as product_list_page: product_list_soup = BeautifulSoup(product_list_page, 'html.parser') product_list = product_list_soup.find_all( 'div', class_='product-presentation product-presentation-1') for product_soup in product_list: image_url = product_soup.find( 'a', class_="showOriginalImage active").get('href') # print image_url #Get the title infosProduits = product_soup.find('div', class_='name').text # print infosProduits # Get the price price = product_soup.find('div', class_='price').text price = _to_decimal(price) #Get the deposit ammount deposit_amount = 0.0 # Get the description description = "" description_soup = product_soup.find( 'div', class_='description') for p in description_soup.find_all('p'): if p.text != 'LOCATION': description += '%s \n' % p.text # print description #summary summary = infosProduits # Create the product from products.models import Category, Price from products.choices import UNIT try: product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get( slug=category_mapping[category])) try: with closing(urlopen(image_url)) as image: product.pictures.add( Picture.objects.create( image=uploadedfile. SimpleUploadedFile( name='img', content=image.read()))) except HTTPError as e: print '\nerror loading image for object at url:', self.base_url + product_url # Add the price to the product try: product.prices.add( Price(amount=price, unit=UNIT.DAY)) sys.stdout.flush() except: print 'PRICE ERROR' pass except: print 'CANNOT CREATE PRODUCT %s' % summary pass except HTTPError: print 'error loading page for object at url', self.base_url
def test_amount_values_negative(self): price = Price(amount=-1, product_id=1, unit=1, currency='EUR') self.assertRaises(ValidationError, price.full_clean)
def _product_crawler(self): from products.models import Product, Picture, Price # Return the price in the right format def _to_decimal(s): from decimal import Decimal as D return D(s.strip().replace(u'€', '').replace(',', '.').replace(' ', '')) #print self.product_links while True: try: product_url, category = self.product_links.popitem() except KeyError: break #print product_url #product_url = quote(product_url) try: with closing(urlopen(product_url)) as product_page: product_soup = BeautifulSoup(product_page, 'html.parser') except HTTPError: print 'error loading page for object at url', product_url #Get the image try: image_url = product_soup.find('a', class_="MagicZoomPlus").find('img').get('src') #print "image_url : %s" % image_url except: print "pass image" pass #Get the title try: summary = product_soup.find('h1').text #print "summary : %s" % summary except: print "pass title" pass # Get the description try: description1 = product_soup.find('div', id='short_description_content').find('p').text description2 = product_soup.find('div', id='more_info_sheets').find('p').text description3 = product_soup.find('div', id='more_info_sheets').find_all('span', style="box-sizing: border-box; font-size: 12.222222328186px;") description4 = "\n".join([description.text for description in description3]) description = "%s\n%s\n%s" % (description1, description2, description4) #print "description : %s" % description except: description = " " print 'pass description' pass # Get the price try: price1 = product_soup.find('span', id='our_price_display').text price2 = (re.findall('\d+', price1 )) price = "%s.%s" % (int(price2[0]), int(price2[1])) #print "price : %s" % price except: price = "10.00" print 'pass price' pass # Create deposit deposit_amount = 0.0 # Create the product from products.models import Category, Price from products.choices import UNIT try: #print "try create" product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get(slug=category_mapping[category]) ) #print "product_id : %s" % product.pk try: #print "try upload image" with closing(urlopen(image_url)) as image: product.pictures.add(Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()) ) ) #print "picture : %s" % product.pictures.all()[0] except HTTPError as e: print '\nerror loading image for object at url:', self.base_url + product_url # Add the price to the product try: product.prices.add(Price(amount=price, unit=UNIT.DAY)) #print "price : %s" % product.prices.all()[0] except: print 'PRICE ERROR' pass # sys.stdout.write('.') # sys.stdout.flush() except: print 'CANNOT CREATE THE PRODUCT %s \n %s' % (summary, product_url) pass print "\n %s products created" % self.patron.products.all().count()
def test_amount_values_positive(self): try: price = Price(amount=20, product_id=4, unit=0, currency='EUR') price.full_clean() except ValidationError, e: self.fail(e)
def _product_crawler(self): from products.models import Product, Picture, Price # Return the price in the right format def _to_decimal(s): from decimal import Decimal as D return D(s.strip().replace(u'€', '').replace(',', '.').replace(' ', '')) while True: try: product_url, category = self.product_links.popitem() except KeyError: break try: with closing(urlopen(self.base_url + product_url)) as product_page: product_soup = BeautifulSoup(product_page, 'html.parser') except HTTPError: print 'error loading page for object at url', self.base_url + product_url #no need to parse all the page block = product_soup.find('div', id="catalogue_pwb") # Get the image image_url = block.find('a', id='zoomlightbox').find('img').get('src') image_url = self.base_url + '/' + image_url # Get the title infosProduits = block.find('h1', class_="h1_pwb").find( 'span', id='grand_titre_nom_produit_fiche_produit').text # Get the description description = '' if block.find('div', id="zoneAttributsSimplesContenu"): description = block.find('div', id="zoneAttributsSimplesContenu").text description = description.replace(u'\n', ' ') # Format the title summary = infosProduits deposit_amount = 0.0 # Create the product from products.models import Category, Price from products.choices import UNIT try: product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get( slug=category_mapping[category])) try: with closing(urlopen(image_url)) as image: product.pictures.add( Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()))) except HTTPError as e: print '\nerror loading image for object at url:', self.base_url + product_url # Add the price to the product try: price = block.find('span', id="zone_prix").find( 'span', id="prix_pas_promotion_euro_fiche_produit").text price = _to_decimal(price) print price product.prices.add(Price(amount=price, unit=UNIT.DAY)) sys.stdout.flush() except: 'PRICE ERROR' pass except: 'ERROR : CANNOT create PRODUCT %s' % summary pass
def _product_crawler(self): from products.models import Product, Picture, Price while True: try: product_url, category = self.product_links.popitem() except KeyError: break try: with closing(urlopen(self.base_url + product_url)) as product_page: product_soup = BeautifulSoup(product_page, 'html.parser') except HTTPError: print 'error loading page for object at url', product_url # Get description of product description = product_soup.find(self.description_tag["name"], self.description_tag["attrs"]).text #print 'Description : %s' % description # Get price of product price = product_soup.find(self.price_tag["name"], self.price_tag["attrs"]) price = price.find('h4').text price = str(float(''.join(x for x in price if x.isdigit())) * 1.2) #print 'Price : %s' % price # Get summary of product summary = product_soup.find('h1').text summary = (summary.split("//")[1]).strip() #print 'Summary :%s' % summary # Get image of product image_url = self.base_url + product_soup.find( self.image_tag["name"], self.image_tag["attrs"]).get('src') image_url = image_url.replace(' ', '%20') image_url = image_url.encode('utf-8') if image_url.find('é') != -1: image_url = image_url.replace('é', '%C3%A9') redirect_url = self.base_url + product_url self.nb_product += 1 deposit_amount = 0.0 from products.models import Category, Price from products.choices import UNIT try: product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get( slug=category_mapping[category]), is_allowed=False, redirect_url=redirect_url) try: with closing(urlopen(image_url)) as image: product.pictures.add( Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()))) except HTTPError as e: print '\nerror loading image for object at url:', image_url try: product.prices.add(Price(amount=price, unit=UNIT.DAY)) except Exception, e: print 'PRICE ERROR' break except Exception, e: print 'CANNOT CREATE THE PRODUCT %s \n' % (summary) print 'error: %s' % str(e) break
def _product_crawler(self): from products.models import Product, Picture, Price # Return the price in the right format def _to_decimal(s): from decimal import Decimal as D s2 = s.strip().replace(u'Location à partir de', '').replace(',', '.').replace(' ', '') #print s2 return s2 while True: try: product_url, category = self.product_links.popitem() except KeyError: #print KeyError break try: with closing(urlopen(product_url)) as product_page: product_soup = BeautifulSoup(product_page, 'html.parser') #print product_soup except HTTPError: print 'error loading page for object at url', product_url #Get the image try: image_href = product_soup.find( 'a', class_="elevatezoom-gallery").find('img').get('src') #print image_href image_url = " %s/boutique/%s" % (self.base_url, image_href) #print image_url except: print "pass image" pass #Get the title try: summary = product_soup.find('h1').text #print summary except: print "pass title" pass # Get the description try: description = product_soup.find( 'span', id='texte_description_fiche_produit').text #print description except: print 'pass description' pass # Get the price try: price1 = product_soup.find( 'span', id='texte_prix_si_prix_desactive').text price2 = re.findall('\d+', price1) price = "%s.%s" % (int(price2[0]), int(price2[1])) #print price except: print 'pass price' pass # Create deposit deposit_amount = 0.0 # Create the product from products.models import Category, Price from products.choices import UNIT try: #print "try create" product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, address=self.address, owner=self.patron, category=Category.objects.get( slug=category_mapping[category])) #print product.address try: #print "try upload image" with closing(urlopen(image_url)) as image: product.pictures.add( Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read()))) #print image except HTTPError as e: print '\nerror loading image for object at url:', family # Add the price to the product try: product.prices.add(Price(amount=price, unit=UNIT.DAY)) except: print 'PRICE ERROR' pass except: print 'CANNOT CREATE THE PRODUCT %s \n %s' % (summary, product_url) pass print "\n %s products created" % self.patron.products.all().count()
def handle(self, *args, **options): from products.models import Picture, Price, Category, Product from products.choices import UNIT from accounts.models import Patron if len(args) != 3: print "I need exactly three argument table path, image folder path and patron id" return try: patron = Patron.objects.get(pk=args[2]) address = patron.addresses.all()[0] except Patron.DoesNotExist: print "Can't find the user" return with open(args[0]) as xlsx: sheet = xlrd.open_workbook(file_contents=xlsx.read()).sheets()[0] rows = iter(xrange(sheet.nrows)) header = tuple(next_row(sheet, next(rows))) # the header line next_row(sheet, next(rows)) # the emtpy line for row in iter(rows): while True: try: product_row = dict(zip(header, next_row(sheet, row))) summary = product_row["titre"].lower() description = product_row["description"] try: category = Category.objects.get( slug=product_row["categorie"]) except: print "error category: " + product_row["categorie"] try: deposit_amount = product_row['caution'] except: deposit_amount = 0 product = Product.objects.create( summary=summary, description=description, deposit_amount=deposit_amount, category=category, address=address, owner=patron) try: product.prices.add( Price(amount=product_row['prix_jour'], unit=UNIT.DAY)) except: pass try: product.prices.add( Price(amount=product_row['prix_weekend'], unit=UNIT.WEEK_END)) except: pass try: product.prices.add( Price(amount=product_row['prix_semaine'], unit=UNIT.WEEK)) except: pass try: product.prices.add( Price(amount=product_row['prix_2semaines'], unit=UNIT.TWO_WEEKS)) except: pass try: product.prices.add( Price(amount=product_row['prix_mois'], unit=UNIT.MONTH)) except: pass try: image_name = product_row["photo"] except: pass try: image_path = '%s/%s' % (args[1], image_name) with closing(open(image_path)) as image: picture = Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read())) product.pictures.add(picture) except: print image_path pass try: image_url = product_row["photo_url"] except: pass try: with closing(urlopen(image_url)) as image: picture = Picture.objects.create( image=uploadedfile.SimpleUploadedFile( name='img', content=image.read())) product.pictures.add(picture) except: pass product.save() print product except Exception, e: print e break else: break