import json from pprint import pprint from byteify import byteify with open('brussels_metro_v2.json') as data_file: data = json.load(data_file) #lets byteify our input json input so it gets rid of u's in front of all attributes #the module for converting encoding is in byteify.py data=byteify(data) #modules for parsing a metro and apply blind search algorithm are: #i. Breadth first search #ii. Uniform cost search #iii. Depth first search #iv. Iterative deepening #Breath First Search def BFS(input_data,start,end): #breath search can be implemented by queues counter=0; array=[] array.append(start) findstation=0; while (counter<len(array) and findstation==0): if array[counter]==end: findstation=1; break; else: for station in input_data['stations']:
THRESHOLD = 50 # Any deal with votes above the threshold will be included in the email SENDER = "*****@*****.**" RECIPIENTS = ["*****@*****.**", "*****@*****.**"] USERNAME = "******" PASSWORD = "******" FILE_DIR = "/your/working/directory/here/deals_data" URLS = [] URLS.append("http://ozbargain.com.au/deals?page=0") URLS.append("http://ozbargain.com.au/deals?page=1") URLS.append("http://ozbargain.com.au/deals?page=2") URLS.append("http://ozbargain.com.au/deals?page=3") URLS.append("http://ozbargain.com.au/deals?page=4") newDeals = getDeals(URLS) newDeals = byteify(newDeals) print newDeals if os.path.isfile(FILE_DIR): f = open(FILE_DIR, 'r') oldDeals = json.load(f) f.close() oldDeals = byteify(oldDeals) mailContent = "" htmlContent = "" for id in newDeals: if id in oldDeals: print "Votes: ", oldDeals[id]['votes'], "->", newDeals[id]['votes'] if ((int(newDeals[id]['votes']) - int(oldDeals[id]['votes']) >= VOTE_DIFF) or int(newDeals[id]['votes']) >= THRESHOLD) and (str(oldDeals[id]['emailed']) == 'False'): print "#####################################" print "ID: ", id
def scrape_sale(sale): page = urllib2.urlopen(sale) soup = BeautifulSoup(page, 'lxml') catalog = soup.find('table', { 'id': 'DataTable' }) conn = sqlite3.connect("fta.sqlite") cursor = conn.cursor() cursor.execute("CREATE TABLE IF NOT EXISTS items (\ location text, \ timeout text, \ sale text, \ itemId text, \ link text, \ info text, \ description text, \ UNIQUE(itemId) ON CONFLICT REPLACE)") timeout = "June 10, 2017 2:15 PM EST" header = soup.find('div', id='wrapper').find('p', align='center') title = header.findAll(text=True)[1] timeout = byteify(title.split(' - ')[-1]) args = [] for row in catalog.findAll("tr")[1:]: cells = row.findAll('td') itemId = cells[0].find(text=True).strip(".") description = None info = None location = None itemPage = string.replace(sale, "mnprint", "mnlist") link = "{}/{}".format(itemPage, itemId) details = cells[1].findAll('b') for elem in details: tag = elem.find(text=True) content = elem.next_sibling.strip() if (not content): continue if (content[0] == ":"): content = content[2:] if ("Description" in tag): description = content elif ("Additional Info" in tag): info = content elif ("Item Location" in tag): location = content else: pass if (description and info and location): break args.append(( location, timeout, sale, itemId, link, info, description)) cursor.executemany( "INSERT INTO items(location, timeout, sale, itemId, \ link, info, description) \ VALUES(?,?,?,?,?,?,?)", args) conn.commit()
VOTE_DIFF = 5 # Number of change in votes required to include deal in the email THRESHOLD = 50 # Any deal with votes above the threshold will be included in the email SENDER = "*****@*****.**" RECIPIENTS = ["*****@*****.**", "*****@*****.**"] USERNAME = "******" PASSWORD = "******" FILE_DIR = "/your/working/directory/here/deals_data" URLS = [] URLS.append("http://ozbargain.com.au/deals?page=0") URLS.append("http://ozbargain.com.au/deals?page=1") URLS.append("http://ozbargain.com.au/deals?page=2") URLS.append("http://ozbargain.com.au/deals?page=3") URLS.append("http://ozbargain.com.au/deals?page=4") newDeals = getDeals(URLS) newDeals = byteify(newDeals) print newDeals if os.path.isfile(FILE_DIR): f = open(FILE_DIR, 'r') oldDeals = json.load(f) f.close() oldDeals = byteify(oldDeals) mailContent = "" htmlContent = "" for id in newDeals: if id in oldDeals: print "Votes: ", oldDeals[id]['votes'], "->", newDeals[id]['votes'] if ((int(newDeals[id]['votes']) - int(oldDeals[id]['votes']) >= VOTE_DIFF) or int(newDeals[id]['votes']) >= THRESHOLD) and (str(
#!/usr/bin/python from byteify import byteify import json # Prints out the deals_data json file in an easy to read format DEALS_DATA_DIR = "file location of deals_data" f = open(DEALS_DATA_DIR, 'r') data = json.load(f) data = byteify(data) for id in data: print id, data[id]['date'], "Votes:", data[id]['votes'], "Emailed:", data[ id]['emailed'], data[id]['title']