def getData(gh, hallcode, machine_range): mdb = DBConnection() con = mdb.db content = unicode(gh.content) hxs = HtmlXPathSelector(text=unicode(gh.content)) rows = hxs.select('//table//tr').extract() machine = "" try: machine2 = hxs.select('//div[@id="dedama_past_table"]//h4/text()').extract()[0] except: return for m in machine2: try: int(m) machine += m except: pass dump = {} dump["timestamp"] = datetime.now() dump["hallcode"] = hallcode modelClick = re.search(r'modelClick\((.*)\);', content, re.M|re.I) modelClick = modelClick.group().split(",") modelClick = modelClick[1].strip() modelClick = re.search(r'([0-9]+)', modelClick) modelClick = modelClick.group().strip() machine_type = modelClick print "machine_set", machine_type dump["machine_type"] = modelClick dump["machine"] = machine dump["date"] = today_date dump["machine_range"] = machine_range jackpots = [] i = 0 for r in rows: if i == 0: i = 1 continue res = {} res["timestamp"] = datetime.now() res["hallcode"] = hallcode res["machine_type"] = machine_type res["machine"] = machine res["date"] = today_date res["renchan"] = 0 res["machine_range"] = machine_range hxs2 = HtmlXPathSelector(text=r) cells = hxs2.select('//td/text()').extract() + hxs2.select('//th/text()').extract() jr = [] res["win_number"] = cells[0].strip() if "*" in res["win_number"]: res["renchan"] = 1 else: res["renchan"] = 0 try: res["win_number"] = int(res["win_number"].replace("*", "").strip()) except: res["win_number"] = 0 try: res["column5"] = cells[4].strip() except: pass time_of_win = cells[1].strip() if time_of_win == "--": time_of_win = "NaN" res["time_of_win"] = time_of_win res["spin_count_of_win"] = cells[2].strip() try: res["spin_count_of_win"] = int(cells[2].strip()) except: res["spin_count_of_win"] = 0 try: res["total_balls_out"] = int(cells[3].strip()) except: res["total_balls_out"] = 0 for c in cells: jr.append(c.strip()) jackpots.append(jr) key = {} key["hallcode"] = hallcode key["machine"] = machine key['machine_type'] = machine_type key["date"] = today_date key["time_of_win"] = time_of_win print "saving ", res one_record = con['data'].find_one(key) if not one_record: records_of_the_date = con['data'].find({ "hallcode":hallcode, "machine":machine, "machine_type":machine_type, "date":today_date, }) highest_range = get_highest_range(records_of_the_date) if machine_range < highest_range: res['machine_range'] = highest_range con["data"].update(key, res, upsert=True) elif time_of_win == 'NaN': con["data"].update(key, res, upsert=True) #save hallcode, machine_type, machine if one is new if not mdb.machine_details.find({'hallcode':hallcode}).count(): mdb.insert_hallcode(hallcode) if not mdb.machine_details.find({'machine_type':machine_type, 'ancestors':[hallcode]}): mdb.insert_machine_type(hallcode, machine_type) if not mdb.machine_details.find({'machine':machine, 'ancestors':[machine_type, hallcode]}).count(): mdb.insert_machine(hallcode, machine_type, machine) dump["series"] = jackpots con.db["pachinko_dump2"].insert(dump)