def filter_products(links, product_page=None, products=None, running_total=None): if products == None: products = [] #This function is recursive, (has_next == False) is the base case has_next = False #Lets check to see if anything on this page is on sale #Make an opbect the parser can handle soup = BeautifulSoup(product_page) #Find the table rows allrows = soup.findAll('td') specials = [] for row in allrows: if "specials.gif" in str(row): this_row = BeautifulSoup(str(row)) images = this_row.findAll('img') for img in images: if ("title=\"" in str(img) and ("Coles Special" not in str(img)) and ("More product information available" not in str(img)) ): specials.append(str(img).split("title=\"")[1].split("\"")[0]) #The products can be identified by the fact that they preceed a link called "Add to trolley", #Therefore we need to keep track of preceeding links previous_link = None for link in links: if link.text == "Add to trolley": sale = False product_name = previous_link.text if product_name in specials: sale=True product_price = "$"+link.url.split("$")[1].split("'")[0] products.append((product_name,product_price,sale)) try: decimal_price = float(link.url.split("$")[1].split("'")[0]) ### VERSION 2.0 STUFF date = str(now.year)+"-"+str(now.month)+"-"+str(now.day) if len(Product.objects.filter(name=product_name)) == 0: this_product = Product(name = product_name) this_product.save() print "New Product: "+product_name else: assert len(Product.objects.filter(name=product_name)) == 1 this_product = Product.objects.get(name=product_name) print "Existing Product: "+product_name date_clash = False for p in this_product.price_history.all(): if str(p.date) == date: print "Pre-existing record for this date" date_clash = True if not date_clash: this_price = Price(price=decimal_price,date=date,sale=sale,product_id=this_product.id) this_price.save() this_product.price_history.add(this_price) this_product.save() except ValueError: print "Unable to parse price, not a valid number:" +product_price #### if running_total: running_total.increment(product_name)
import sys sys.path.append("/var/www_coles_dj/coles_scrape") sys.path.append("/var/www_coles_dj/") import os os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' from django.db import models import datetime now = datetime.datetime.now() from coles_data.models import Product, Price new_product = "foo" if len(Product.objects.filter(name=new_product)) == 0: x = Product(name = new_product) x.save() print "New Product: "+new_product else: assert len(Product.objects.filter(name=new_product)) == 1 x = Product.objects.get(name=new_product) print "Existing Product: "+new_product y = Price(price=0.5,date=str(now.year)+"-"+str(now.month)+"-"+str(now.day),sale=False,product_id=x.id) y.save() x.price_history.add(y) x.save() print Price.objects.all()
from coles_data.models import Product, Price try: f = open(sys.argv[1], 'r') date = raw_input("Date for this file: ") for line in f: try: name, price, sale = line.split("||") price = float(price[1:]) sale = sale.rstrip() sale = (sale == "True") #If this product does not already exist in the database if len(Product.objects.filter(name=name)) == 0: this_product = Product(name = name) this_product.save() else: assert len(Product.objects.filter(name=name)) == 1 this_product = Product.objects.get(name=name) date_clash = False for p in this_product.price_history.all(): if str(p.date) == date: print "Pre-existing record for this date" date_clash = True if not date_clash: this_price = Price(price=price,date=date,sale=sale,product_id=this_product.id) this_price.save() this_product.price_history.add(this_price) this_product.save()