def load_transactions(): print("loading {} transactions".format(soccer_db.transactions.count())) transactions = [] for t in soccer_db.transactions.find(): try: t.pop('_id') if t.get('team_from'): team_from = Team.objects.find(t['team_from'], create=True) team_from_id = team_from.id else: team_from_id= None if t.get('team_to'): team_to = Team.objects.find(t['team_to'], create=True) team_to_id = team_to.id else: team_to_id = None person = Bio.objects.find(t['person']) transactions.append({ 'ttype': t['ttype'], 'person_id': person.id, 'team_to_id': team_to_id, 'team_from_id': team_from_id, 'date': t['date'], }) except: import pdb; pdb.set_trace() insert_sql("transactions_transaction", transactions)
def load_assists(): print("\nloading assists\n") team_getter = make_team_getter() bio_getter = make_bio_getter() goal_getter = make_goal_getter() def create_assists(goal): #if goal['competition'] == 'Major League Soccer' and goal['season'] == '1996': # import pdb; pdb.set_trace() if not goal['assists']: return [] if goal['assists'] == ['']: return [] team_id = team_getter(goal['team']) bio_id = ogbio_id = None if goal['goal']: bio_id = bio_getter(goal['goal']) if not goal['date']: return {} d = datetime.date(goal['date'].year, goal['date'].month, goal['date'].day) goal_id = goal_getter(team_id, bio_id, goal['minute'], d) if not goal_id: #import pdb; pdb.set_trace() print("Cannot create assists for %s" % goal) return [] seen = set() for assister in goal['assists']: assist_ids = [bio_getter(e) for e in goal['assists']] for i, assist_id in enumerate(assist_ids, start=1): if assist_id and assist_id not in seen: seen.add(assist_id) assists.append({ 'player_id': assist_id, 'goal_id': goal_id, 'order': i, }) assists = [] i = 0 for i, goal in enumerate(soccer_db.goals.find()): if i % 50000 == 0: print(i) create_assists(goal) print(i) print(len(assists)) insert_sql('goals_assist', assists)
def generate_stats_generic(table, qs, make_key, update_dict): """ Generate team, career, etc. stats. Maybe could improve this. """ #print("Merging stats.") final_dict = {} # Don't try to add these items. excluded = ('player_id', 'team_id', 'competition_id', 'season_id', 'source_id') for stat in qs.values(): # Set unaddable values to none. for k,v in stat.items(): if v in ('?', 'None', '-'): stat[k] = None # This determines what is filtered. # e.g., create all-time player stats with # make_key = lambda s: s['player'] key = make_key(stat) # Create a new entry for this stat type if key not in final_dict: # This should set all necessary fields. final_dict[key] = stat else: # d = final_dict[key] for key, value in stat.items(): if key not in excluded: if not d[key]: d[key] = value else: if value: try: d[key] += value except: import pdb; pdb.set_trace() x = 5 for key, stat in final_dict.items(): stat.pop('id') for e in update_dict.keys(): if e in stat: stat.pop(e) # update_dict seems unnecessary at this point. Those values aren't in the given stat. #stat.update(update_dict) insert_sql(table, list(final_dict.values()))
def vmadd(): if 'username' not in session: return redirect('/login/') if request.method == 'POST': msg = sessionmsg() if msg['role'] != 0 and msg['role'] != 1: owner = msg['username'] status = 0 create_date = "1900-01-01" expire_date = "1900-01-01" #form_result数据样例{'project': u'1', 'term': u'1', 'used': u'22222222222', 'resource': u'2', 'system': u'os6'} form_result = request.form.to_dict() data = vmappend(form_result) field = [ "hostname", "system", "term", "cpu", "mem", "disk", "project", "used", "ip", "mask", "gateway", "create_date", "expire_date", "owner", "status" ] data.update(create_date=create_date, expire_date=expire_date, owner=owner, status=status) try: result = insert_sql(table_name, field, data) if result['code'] == 0: result = { 'code': 0, 'msg': data['hostname'] + "虚机申请成功,等待审核" } except: result = {'code': 1, 'msg': data['hostname'] + "虚机已存在"} else: result = {'code': 1, 'msg': "管理员无法申请账号"} return json.dumps(result)
def vpnadd(): if 'username' not in session: return redirect('/login/') if request.method=='POST': msg = sessionmsg() if msg['role'] != 0 and msg['role'] != 1: #user_field = ["id","username","name_cn","password","mobile","email","role","status"] #data={'username':msg['username']} #user_data = getone('user',data,user_field) #user_id = (user_data['msg']).get('id') owner = msg['username'] status = 0 #create_date = datetime.datetime.now() #expire_date = create_date + datetime.timedelta(days = 365) #create_date = create_date.strftime('%Y-%m-%d') #expire_date = expire_date.strftime('%Y-%m-%d') create_date = "1900-01-01" expire_date = "1900-01-01" field = ["name","term","used","create_date","expire_date","owner","status"] data= {k:v[0] for k,v in dict(request.form).items()} data.update(create_date=create_date,expire_date=expire_date,owner=owner,status=status) try: result = insert_sql(table_name,field,data) if result['code'] == 0: result ={'code':0, 'msg':data['name'] + "账号申请成功,等待审核"} except: result ={'code':1, 'msg':data['name'] + "已存在"} else: result ={'code':1, 'msg':"管理员无法申请账号"} return json.dumps(result)
def vpnadd1(): if 'username' not in session: return redirect('/login/') #获取当前用户列表,传入前端select添加vpn模态框 if request.method=='GET': field = "username" result = getall('user',field) print "result:" print result return json.dumps(result['msg']) #添加vpn if request.method=='POST': status = 0 create_date = "1900-01-01" expire_date = "1900-01-01" field = ["name","term","used","create_date","expire_date","owner","status"] data= {k:v[0] for k,v in dict(request.form).items()} data.update(create_date=create_date,expire_date=expire_date,status=status) #检查user表账号是否存在 username=['username'] data_check = {'username':data['owner']} result = check('user',username,data_check) if result['code'] == 1: result ={'code':1, 'msg':data['owner'] + "申请人账号不存在"} return json.dumps(result) #添加vpn数据 try: result = insert_sql(table_name,field,data) if result['code'] == 0: result ={'code':0, 'msg':data['name'] + "账号添加成功"} except: result ={'code':1, 'msg':data['name'] + "账号已存在"} return json.dumps(result)
def generate_stats_generic(table, qs, make_key, update_dict): """ Generate team, career, etc. stats. Maybe could improve this. """ final_dict = {} excluded = ('player_id', 'team_id', 'competition_id', 'season_id', 'source_id') for stat in qs.values(): # Guard against unaddable values for k,v in stat.items(): if v in ('?', 'None', '-'): stat[k] = None # This determines what is filtered. # e.g., create all-time player stats with # make_key = lambda s: s['player'] key = make_key(stat) # Create a new entry for this stat type if key not in final_dict: # This should set all necessary fields. final_dict[key] = stat else: d = final_dict[key] for key, value in stat.items(): if key not in excluded: if not d[key]: d[key] = value else: if value: try: d[key] += value except: import pdb; pdb.set_trace() _ = 0 for key, stat in final_dict.items(): stat.pop('id') for e in update_dict.keys(): if e in stat: stat.pop(e) insert_sql(table, list(final_dict.values()))
def generate_stats_generic(table, qs, make_key, update_dict): """ Generate team, career, etc. stats. Maybe could improve this. """ final_dict = {} excluded = ('player_id', 'team_id', 'competition_id', 'season_id', 'source_id') for stat in qs.values(): # Guard against unaddable values for k, v in stat.items(): if v in ('?', 'None', '-'): stat[k] = None # This determines what is filtered. # e.g., create all-time player stats with # make_key = lambda s: s['player'] key = make_key(stat) # Create a new entry for this stat type if key not in final_dict: # This should set all necessary fields. final_dict[key] = stat else: d = final_dict[key] for key, value in stat.items(): if key not in excluded: if not d[key]: d[key] = value else: if value: try: d[key] += value except: import pdb pdb.set_trace() _ = 0 for key, stat in final_dict.items(): stat.pop('id') for e in update_dict.keys(): if e in stat: stat.pop(e) insert_sql(table, list(final_dict.values()))
def namedadd(): if 'username' not in session: return redirect('/login/') msg = sessionmsg() if request.method == 'POST': data = {k: v[0] for k, v in dict(request.form).items()} result = insert_sql('dns_records', fields, data) if result['code'] == 0: result = {'code': 0, 'msg': "Add Zone Successful"} return json.dumps(result)
def idcadd(): if 'username' not in session: return redirect('/login/') msg = sessionmsg() if request.method=='POST': idc = {k:v[0] for k,v in dict(request.form).items()} field = ['name','name_cn','address','adminer','phone'] result = insert_sql('idc',field,idc) if result['code'] == 0: result ={'code':0, 'msg':"IDC user success"} return json.dumps(result)
def add(): if 'username' not in session: return redirect('/login/') if request.method=='POST': field = ["username","name_cn","password","mobile","email","role","status"] data= {k:v[0] for k,v in dict(request.form).items()} print data result = insert_sql('user',field,data) if result['code'] == 0: result ={'code':0, 'msg':"add user success"} return json.dumps(result)
def register(): if request.method == 'POST': data = {k: v[0] for k, v in dict(request.form).items()} data['password'] = hashlib.md5(data['password'] + salt).hexdigest() field = ['id', 'username', 'password', 'role'] result = insert_sql('user', fieald, data) print result if result['code'] == 0: return '用添加成功' else: return '用户添加失败' return render_template('user-add.html')
def addhost(): if 'username' not in session: return redirect('/login/') if request.method == 'POST': data = dict((k, v[0]) for k, v in dict(request.form).items()) field = ['username', 'password', 'ip'] result = insert_sql('host', field, data) if result['code'] == 0: return '用添加成功' else: return '用户添加失败' return render_template('add-host.html')
def serveradd(): if 'username' not in session: return redirect('/login/') msg = sessionmsg() if request.method=='GET': idc = list('idc',idc_fields) cabinet = list('cabinet',cabinet_fields) return render_template('serveradd.html',msg=msg,idc=idc['msg'],cabinet=cabinet['msg']) if request.method=='POST': server = {k:v[0] for k,v in dict(request.form).items()} result = insert_sql('server',server_field,server) if result['code'] == 0: result ={'code':0, 'msg':"success"} return json.dumps(result)
def load_transactions(): print("loading {} transactions".format(soccer_db.transactions.count())) transactions = [] for t in soccer_db.transactions.find(): try: t.pop('_id') if t.get('team_from'): team_from = Team.objects.find(t['team_from'], create=True) team_from_id = team_from.id else: team_from_id = None if t.get('team_to'): team_to = Team.objects.find(t['team_to'], create=True) team_to_id = team_to.id else: team_to_id = None person = Bio.objects.find(t['person']) transactions.append({ 'ttype': t['ttype'], 'person_id': person.id, 'team_to_id': team_to_id, 'team_from_id': team_from_id, 'date': t['date'], }) except: import pdb pdb.set_trace() insert_sql("transactions_transaction", transactions)
def cabinetadd(): if 'username' not in session: return redirect('/login/') msg = sessionmsg() if request.method == 'GET': fields = ['id', 'name'] result = list('idc', fields) return render_template('cabinetadd.html', msg=msg, idc=result['msg']) if request.method == 'POST': cabinet = {k: v[0] for k, v in dict(request.form).items()} field = ['name', 'idc_id', 'u_num', 'power'] result = insert_sql('cabinet', field, cabinet) if result['code'] == 0: result = {'code': 0, 'msg': "success"} return json.dumps(result)
def jobadd(): if 'username' not in session: return redirect('/login/') msg = sessionmsg() if request.method=='GET': return render_template('jobadd.html',msg=msg) if request.method=='POST': job = {k:v[0] for k,v in dict(request.form).items()} job['apply_name'] = session['username'] field = ['apply_type','apply_name','apply_desc'] result = insert_sql('job',field,job) if result['code'] == 0: result ={'code':0, 'msg':"Job Add success"} return json.dumps(result)
def add(): if 'username' not in session: return redirect('/login/') msg = sessionmsg() if request.method=='GET': msg = {'username':session['username'],'role':session['role']} return render_template('add.html',msg=msg) if request.method=='POST': field = ["username","name_cn","password","mobile","email","role","status"] data= {k:v[0] for k,v in dict(request.form).items()} result = insert_sql('user',field,data) if result['code'] == 0: result ={'code':0, 'msg':"add user success"} return json.dumps(result)
def add(): if 'username' not in session: return redirect('/login/') if request.method == 'POST': field = [ "username", "name_cn", "password", "mobile", "email", "role", "status" ] data = {k: v[0] for k, v in dict(request.form).items()} data['password'] = hashlib.md5(data['password'] + salt).hexdigest() try: result = insert_sql('user', field, data) if result['code'] == 0: result = {'code': 0, 'msg': "add user success"} else: result = {'code': 1, 'msg': "add user failed"} except: result = {'code': 1, 'msg': "username already exsit"} return json.dumps(result)
def vmadd1(): if 'username' not in session: return redirect('/login/') #获取当前用户列表,传入前端select添加vpn模态框 if request.method == 'GET': field = "username" result = getall('user', field) print "result:" print result return json.dumps(result['msg']) #添加vm if request.method == 'POST': status = 0 create_date = "1900-01-01" expire_date = "1900-01-01" form_result = request.form.to_dict() data = vmappend(form_result) field = [ "hostname", "system", "term", "cpu", "mem", "disk", "project", "used", "ip", "mask", "gateway", "create_date", "expire_date", "owner", "status" ] data.update(create_date=create_date, expire_date=expire_date, status=status) #检查user表账号是否存在 username = ['username'] data_check = {'username': data['owner']} result = check('user', username, data_check) if result['code'] == 1: result = {'code': 1, 'msg': data['owner'] + "申请人账号不存在"} return json.dumps(result) #添加vm数据 try: result = insert_sql(table_name, field, data) if result['code'] == 0: result = {'code': 0, 'msg': data['hostname'] + "添加成功"} except: result = {'code': 1, 'msg': data['hostname'] + "添加失败"} return json.dumps(result)
def register(): if request.method == 'POST': data = dict((k, v[0]) for k, v in dict(request.form).items()) data['password'] = hashlib.md5(data['password'] + salt).hexdigest() # 1.判断用户输入用户名密码是否为空 if not data["username"] or not data["password"]: error = 'UserName Or Password Not Null !' return render_template('register.html', error=error) # 2.判断该用户是否存在 elif check_user('user_messages', data): error = u'用户 %s 已经存在' % (data["username"]) return render_template('register.html', error=error) # 3.注册该用户信息至数据库 else: field = ['username', 'password', 'role'] if insert_sql('user_messages', data, field): return redirect("/login/") return render_template('register.html')
def load_goals(): print("\nloading goals\n") team_getter = make_team_getter() bio_getter = make_bio_getter() game_getter = make_game_getter() gid_getter = make_gid_getter() l = [] def create_goal(goal): team_id = team_getter(goal['team']) bio_id = ogbio_id = None if goal['goal']: bio_id = bio_getter(goal['goal']) if goal.get('own_goal_player'): ogbio_id = bio_getter(goal['own_goal_player']) # Tough to apply a goal without a date... if not goal['date']: return {} # Coerce to date to match dict. d = datetime.date(goal['date'].year, goal['date'].month, goal['date'].day) # Try gid first, fall back on team/date. game_id = None if 'gid' in goal: game_id = gid_getter(goal['gid']) if game_id is None: game_id = game_getter(team_id, d) if not game_id: print("Cannot create %s" % goal) return {} else: return { 'date': goal['date'], 'minute': goal['minute'], 'team_id': team_id, #'team_original_name': '', 'player_id': bio_id, #player, 'own_goal_player_id': ogbio_id, 'game_id': game_id, 'own_goal': goal.get('own_goal', False), 'penalty': goal.get('penalty', False), } i = 0 # if no goals. goals = [] for i, goal in enumerate(soccer_db.goals.find()): if i % 50000 == 0: print(i) g = create_goal(goal) if g: goals.append(g) print(i) insert_sql('goals_goal', goals) events = [] for goal in goals: event = { 'game_id': goal['game_id'], 'team_id': goal['team_id'], 'minute': goal['minute'], 'etype': 'goal', 'subject_id': goal['player_id'], 'object_id': None, 'description': '', } if goal['player_id']: events.append(event) insert_sql('events_event', events)
def load_fouls(): print("loading fouls") print(soccer_db.fouls.count()) team_getter = make_team_getter() bio_getter = make_bio_getter() game_getter = make_game_getter() gid_getter = make_gid_getter() l = [] def create_foul(foul): if foul['team'] is None: return {} team_id = team_getter(foul['team']) bio_id = None bio_id = bio_getter(foul['name']) if foul['type'] == 'red': ftype = 'red' elif foul['type'] == 'yellow': ftype = 'yellow' else: import pdb; pdb.set_trace() if not foul['date']: return {} # Coerce to date to match dict. d = datetime.date(foul['date'].year, foul['date'].month, foul['date'].day) # Try gid first, fall back on team/date. game_id = None if 'gid' in foul: game_id = gid_getter(foul['gid']) if game_id is None: game_id = game_getter(team_id, d) if not game_id: print("Cannot create %s" % goal) return {} else: return { 'game_id': game_id, 'team_id': team_id, 'minute': foul['minute'], 'etype': ftype, #'team_original_name': '', 'subject_id': bio_id, #player, 'description': '', } i = 0 # if no goals. fouls = [] for i, foul in enumerate(soccer_db.fouls.find()): if i % 50000 == 0: print(i) f = create_foul(foul) if f: fouls.append(f) insert_sql('events_event', fouls)
def load_standings(): print("\n loading {} standings\n".format(soccer_db.standings.count())) team_getter = make_team_getter() competition_getter = make_competition_getter() season_getter = make_season_getter() # Create final standings from day-to-day standings final_standings = set() all_standings = set() max_game_standings = {} l = [] for standing in soccer_db.standings.find().sort('team', 1): standing.pop('_id') competition_id = competition_getter(standing['competition']) season_id = season_getter(standing['season'], competition_id) team_id = team_getter(standing['team']) stage = standing.get('stage') or '' group = standing.get('group') or '' # make sure this is gone everywhere. # division = standing.get('division') or '' # division final = standing.get('final', False) d = { 'competition_id': competition_id, 'season_id': season_id, 'group': group, 'stage': stage, 'team_id': team_id, 'date': standing.get('date'), 'games': standing['games'], 'goals_for': standing.get('goals_for'), 'goals_against': standing.get('goals_against'), 'wins': standing['wins'], 'shootout_wins': standing['shootout_wins'], 'losses': standing['losses'], 'shootout_losses': standing['shootout_losses'], 'ties': standing['ties'], 'points': standing.get('points'), 'final': final, 'deduction_reason': '', } l.append(d) """ key = (standing['team'], standing['competition'], standing['season']) all_standings.add(key) if final: final_standings.add(key) if key not in max_game_standings or standing['games'] > max_game_standings[key]: max_game_standings[key] = d """ insert_sql("standings_standing", l) # Handle this somewhere else. # Generate appropriate final standings. print("Generating final standings.") l2 = [] for key in all_standings - final_standings: standing = max_game_standings[key].copy() #standing['final'] = True #standing['date'] = None l2.append(standing) insert_sql("standings_standing", l2)
def insert_game_minutes(): print("Generating {} game minutes".format(len(l))) insert_sql("games_gameminute", l)
def load_drafts(): print("\nloading drafts\n") competition_getter = make_competition_getter() season_getter = make_season_getter() team_getter = make_team_getter() # Create the set of drafts. """ for draft in soccer_db.drafts.find().sort('team', 1): draft.pop('_id') if draft['competition']: competition_id = competition_getter(draft['competition']) else: competition_id = None season_id = season_getter(draft['season'], competition_id) Draft.objects.create(**{ 'name': draft['name'], 'season_id': season_id, 'competition_id': competition_id, 'start': draft.get('start'), 'end': draft.get('end'), }) """ print("\nloading {} picks\n".format(soccer_db.picks.count())) #import pdb; pdb.set_trace() # Create picks picks = [] for pick in soccer_db.picks.find(): # draft, text, player, position, team c = pick.get('competition') if c: competition_id = competition_getter(pick.get('competition')) else: competition_id = None season_id = season_getter(pick.get('season'), competition_id) #draft = Draft.objects.get(name=pick.get('draft'), competition_id=competition_id, season_id=season_id) draft = None if pick['team'] == 'Sean_Irish_LAGalaxy/Geneva': pass #import pdb; pdb.set_trace() team_id = team_getter(pick['team']) if pick.get('former_team'): former_team_id = team_getter(pick['former_team']) else: former_team_id = None # Set the player reference. text = pick['text'] # Draft picks were "drafted" in the MLS Allocation and Dispersal drafts. if "SuperDraft" in text: player_id = None elif text.lower() == 'pass': player_id = None else: player_id = Bio.objects.find(text).id """ picks.append({ #'draft_id': draft.id, 'player_id': player_id, 'team_id': team_id, 'text': text, 'position': pick.get('position') or '', 'former_team_id': former_team_id, 'number': pick['number'], }) """ picks.append({ #'draft_id': draft.id, 'ttype': 'draft pick', 'date': pick['date'], 'person_id': player_id, 'team_to_id': team_id, 'team_from_id': former_team_id, #'text': text, #'position': pick.get('position') or '', #'number': pick['number'], }) #insert_sql("drafts_pick", list(picks)) insert_sql("transactions_transaction", list(picks))
def load_lineups(): # This should be removed in favor of substitutions # Ultimately subsumed by a combination game stats / substitutions # Too big to be worthwhile. # Need to do this with raw sql and standard dict management functions. print("\nloading lineups\n") from django.db import connection team_getter = make_team_getter() bio_getter = make_bio_getter() game_getter = make_game_getter() gid_getter = make_gid_getter() def create_appearance(a): if not a['name']: print(a) return None team_id = team_getter(a['team']) player_id = bio_getter(a['name']) game_id = None if 'gid' in a: game_id = gid_getter(a['gid']) if game_id is None: game_id = game_getter(team_id, a['date']) #game_id = game_getter(team_id, a['date']) if not game_id: import pdb; pdb.set_trace() print("Cannot create %s" % a) return {} if a['on'] is not None and a['off'] is not None: try: minutes = int(a['off']) - int(a['on']) except: print("Fail on %s" % str(a)) minutes = None else: minutes = None return { 'team_id': team_id, 'game_id': game_id, 'player_id': player_id, 'on': a['on'], 'off': a['off'], 'minutes': minutes, 'order': a.get('order', None), } # Create the appearance objects. l = [] i = 0 for i, a in enumerate(soccer_db.lineups.find()): u = create_appearance(a) if u: l.append(u) if i % 50000 == 0: print(i) print(i) print("Creating lineups") insert_sql('lineups_appearance', l)
def load_game_stats(): print("\nloading game stats\n") team_getter = make_team_getter() bio_getter = make_bio_getter() source_getter = make_source_getter() game_getter = make_game_getter() gid_getter = make_gid_getter() game_result_getter = make_game_result_getter() birthdate_dict = dict(Bio.objects.exclude(birthdate=None).values_list("id", "birthdate")) print("\nprocessing") l = [] i = 0 for i, stat in enumerate(soccer_db.gstats.find(timeout=False)): # no timeout because this query takes forever. if i % 50000 == 0: print(i) if stat['player'] == '': #import pdb; pdb.set_trace() continue try: bio_id = bio_getter(stat['player']) except: continue if bio_id is None: continue bd = birthdate_dict.get(bio_id) if stat['date'] and bd: # Coerce bd from datetime.date to datetime.time bdt = datetime.datetime.combine(bd, datetime.time()) age = (stat['date'] - bdt).days / 365.25 else: age = None team_id = team_getter(stat['team']) if 'gid' in stat: game_id = gid_getter(stat['gid']) else: game_id = game_getter(team_id, stat['date']) #game_id = game_getter(team_id, stat['date']) result = game_result_getter(team_id, stat['date']) if game_id is None or team_id is None: continue def c2i(key, coerce_none=True): # Coerce an integer if key in stat and stat[key] != None: if type(stat[key]) != int: import pdb; pdb.set_trace() return stat[key] elif key in stat and stat[key] == None: return 0 else: return None l.append({ 'player_id': bio_id, 'team_id': team_id, 'game_id': game_id, 'games_started': c2i('games_started'), 'games_played': c2i('games_played'), 'minutes': c2i('minutes'), 'goals': c2i('goals'), 'assists': c2i('assists'), 'shots': c2i('shots'), 'shots_on_goal': c2i('shots_on_goal'), 'fouls_committed': c2i('fouls_committed'), 'fouls_suffered': c2i('fouls_suffered'), 'yellow_cards': c2i('yellow_cards'), 'red_cards': c2i('red_cards'), 'on': c2i('on', False), 'off': c2i('off', False), 'age': age, 'result': result, }) print(i) insert_sql("stats_gamestat", l)
def load_games(): print("\n loading {} games\n".format(soccer_db.games.count())) stadium_getter = make_stadium_getter() team_getter = make_team_getter() competition_getter = make_competition_getter() source_getter = make_source_getter() bio_getter = make_bio_getter() city_getter = make_city_getter() country_getter = make_country_getter() season_getter = make_season_getter() games = [] game_sources = [] for game in soccer_db.games.find().sort('date', 1): if game.get('city') == 'Columbus Crew Stadium': import pdb; pdb.set_trace() # Apply stadium / state / country information. stadium_id = city_id = country_id = None if game.get('stadium'): stadium_id = stadium_getter(game['stadium']) s = Stadium.objects.get(id=stadium_id) if s.city: city_id = s.city.id else: city_id = None elif game.get('city'): city_id = city_getter(game['city']).id elif game.get('location'): country_id = country_getter(game['location']) if country_id is None: city_id = city_getter(game['location']).id competition_id = competition_getter(game['competition']) #game['competition'] = Competition.objects.get(id=game['competition']) #season_id = Season.objects.find(game['season'], competition_id).id # this!! season_id = season_getter(game['season'], competition_id) if game['season'] is None: import pdb; pdb.set_trace() team1_id = team_getter(game['team1']) team2_id = team_getter(game['team2']) home_team_id = None if game.get('home_team'): home_team_id = team_getter(game['home_team']) goals = (game['team1_score'] or 0) + (game['team2_score'] or 0) referee_id = linesman1_id = linesman2_id = linesman3_id = None if game['referee']: referee_id = bio_getter(game['referee']) if game.get('linesman1'): linesman1_id = bio_getter(game['linesman1']) if game.get('linesman2'): linesman2_id = bio_getter(game['linesman2']) if game.get('linesman3'): linesman3_id = bio_getter(game['linesman3']) if game.get('sources'): sources = sorted(set(game.get('sources'))) elif game.get('source'): sources = [game['source']] else: sources = [] for source in sources: if source.strip() == '': continue elif source.startswith('http'): source_url = source else: source_url = '' source_id = source_getter(source) t = (game['date'], team1_id, source_id, source_url) game_sources.append(t) result_unknown = game.get('result_unknown') or False not_played = game.get('not_played') or False forfeit = game.get('forfeit') or False minigame = game.get('minigame') or False indoor = game.get('indoor') or False minutes = game.get('minutes') or 90 neutral = game.get('neutral') or False attendance = game.get('attendance') stage = game.get('stage') or '' group = game.get('group') or '' rnd = game.get('round') or '' # There are lots of problems with the NASL games, # And probably ASL as well. Need to spend a couple # of hours repairing those schedules. if game['shootout_winner']: shootout_winner = team_getter(game['shootout_winner']) else: shootout_winner = None location = game.get('location', '') location = location or '' if 'gid' not in game: game['gid'] = get_id_by_time() games.append({ 'date': game['date'], 'has_date': bool(game['date']), 'team1_id': team1_id, 'team1_original_name': game['team1_original_name'], 'team2_id': team2_id, 'team2_original_name': game['team2_original_name'], 'team1_score': game['team1_score'], 'official_team1_score': game.get('official_team1_score'), 'team2_score': game['team2_score'], 'official_team2_score': game.get('official_team2_score'), 'shootout_winner_id': shootout_winner, 'team1_result': game['team1_result'], 'team2_result': game['team2_result'], 'result_unknown': result_unknown, 'not_played': not_played, 'forfeit': forfeit, 'goals': goals, 'minigame': minigame, 'indoor': indoor, 'minutes': minutes, 'competition_id': competition_id, 'season_id': season_id, 'stage': stage, 'group': group, 'round': rnd, 'home_team_id': home_team_id, 'neutral': neutral, 'stadium_id': stadium_id, 'city_id': city_id, 'country_id': country_id, 'location': location, 'notes': game.get('notes', ''), 'video': game.get('video', ''), 'attendance': attendance, 'referee_id': referee_id, 'linesman1_id': linesman1_id, 'linesman2_id': linesman2_id, 'linesman3_id': linesman3_id, 'merges': game['merges'], 'gid': game['gid'], }) print("Inserting {} games results.".format(len(games))) # Broke on massive attendance. # Watch out for crazy integer values. insert_sql("games_game", games) print("Inserting games sources.") game_getter = make_game_getter() l = [] for date, team_id, source_id, source_url in game_sources: # Don't call game_getter without date. Need to give games unique id's. if date: game_id = game_getter(team_id, date) if game_id: l.append({ 'game_id': game_id, 'source_id': source_id, 'source_url': source_url, }) insert_sql("games_gamesource", l)
def load_bios(): print("loading bios") cg = make_city_getter() # Find which names are used so we can only load these bios. # Huh? This is unnecessary. #fields = [('lineups', 'name'), ('goals', 'goal'), ('stats', 'name'), ('awards', 'recipient'), ('picks', 'text')] #names = set() # Add names to names field where they have been used. #for coll, key in fields: # names.update([e[key] for e in soccer_db[coll].find()]) # Load bios. for bio in soccer_db.bios.find().sort('name', 1): #if bio['name'] not in names: #print("Skipping %s" % bio['name']) # continue bio.pop('_id') if not bio['name']: import pdb; pdb.set_trace() print("NO BIO: %s" % str(bio)) continue # nationality should be many-to-many if 'nationality' in bio: bio.pop('nationality') bd = {} for key in 'name', 'height', 'birthdate', 'height', 'weight': if key in bio: bd[key] = bio[key] or None if bio.get('birthplace'): bd['birthplace'] = cg(bio['birthplace']) if bio.get('deathplace'): bd['deathplace'] = cg(bio['deathplace']) # Having unexpected problems here... bd['hall_of_fame'] = bio.get('hall_of_fame') if bd['hall_of_fame'] not in (True, False): bd['hall_of_fame'] = False Bio.objects.create(**bd) bio_getter = make_bio_getter() bio_ct_id = ContentType.objects.get(app_label='bios', model='bio').id images = [] for bio in soccer_db.bios.find().sort('name', 1): if bio.get('img'): bid = bio_getter(bio['name']) fn = bio['img'].rsplit('/')[-1] images.append({ 'filename': fn, 'content_type_id': bio_ct_id, 'object_id': bid, }) insert_sql("images_image", images)
def load_games(): print("\n loading {} games\n".format(soccer_db.games.count())) stadium_getter = make_stadium_getter() team_getter = make_team_getter() competition_getter = make_competition_getter() source_getter = make_source_getter() bio_getter = make_bio_getter() city_getter = make_city_getter() country_getter = make_country_getter() season_getter = make_season_getter() games = [] game_sources = [] for game in soccer_db.games.find().sort('date', 1): if game.get('city') == 'Columbus Crew Stadium': import pdb pdb.set_trace() # Apply stadium / state / country information. stadium_id = city_id = country_id = None if game.get('stadium'): stadium_id = stadium_getter(game['stadium']) s = Stadium.objects.get(id=stadium_id) if s.city: city_id = s.city.id else: city_id = None elif game.get('city'): city_id = city_getter(game['city']).id elif game.get('location'): country_id = country_getter(game['location']) if country_id is None: city_id = city_getter(game['location']).id competition_id = competition_getter(game['competition']) #game['competition'] = Competition.objects.get(id=game['competition']) #season_id = Season.objects.find(game['season'], competition_id).id # this!! season_id = season_getter(game['season'], competition_id) if game['season'] is None: import pdb pdb.set_trace() team1_id = team_getter(game['team1']) team2_id = team_getter(game['team2']) home_team_id = None if game.get('home_team'): home_team_id = team_getter(game['home_team']) goals = (game['team1_score'] or 0) + (game['team2_score'] or 0) referee_id = linesman1_id = linesman2_id = linesman3_id = None if game['referee']: referee_id = bio_getter(game['referee']) if game.get('linesman1'): linesman1_id = bio_getter(game['linesman1']) if game.get('linesman2'): linesman2_id = bio_getter(game['linesman2']) if game.get('linesman3'): linesman3_id = bio_getter(game['linesman3']) if game.get('sources'): sources = sorted(set(game.get('sources'))) elif game.get('source'): sources = [game['source']] else: sources = [] for source in sources: if source.strip() == '': continue elif source.startswith('http'): source_url = source else: source_url = '' source_id = source_getter(source) t = (game['date'], team1_id, source_id, source_url) game_sources.append(t) result_unknown = game.get('result_unknown') or False not_played = game.get('not_played') or False forfeit = game.get('forfeit') or False minigame = game.get('minigame') or False indoor = game.get('indoor') or False minutes = game.get('minutes') or 90 neutral = game.get('neutral') or False attendance = game.get('attendance') stage = game.get('stage') or '' group = game.get('group') or '' rnd = game.get('round') or '' # There are lots of problems with the NASL games, # And probably ASL as well. Need to spend a couple # of hours repairing those schedules. if game['shootout_winner']: shootout_winner = team_getter(game['shootout_winner']) else: shootout_winner = None location = game.get('location', '') location = location or '' if 'gid' not in game: game['gid'] = get_id_by_time() games.append({ 'date': game['date'], 'has_date': bool(game['date']), 'team1_id': team1_id, 'team1_original_name': game['team1_original_name'], 'team2_id': team2_id, 'team2_original_name': game['team2_original_name'], 'team1_score': game['team1_score'], 'official_team1_score': game.get('official_team1_score'), 'team2_score': game['team2_score'], 'official_team2_score': game.get('official_team2_score'), 'shootout_winner_id': shootout_winner, 'team1_result': game['team1_result'], 'team2_result': game['team2_result'], 'result_unknown': result_unknown, 'not_played': not_played, 'forfeit': forfeit, 'goals': goals, 'minigame': minigame, 'indoor': indoor, 'minutes': minutes, 'competition_id': competition_id, 'season_id': season_id, 'stage': stage, 'group': group, 'round': rnd, 'home_team_id': home_team_id, 'neutral': neutral, 'stadium_id': stadium_id, 'city_id': city_id, 'country_id': country_id, 'location': location, 'notes': game.get('notes', ''), 'video': game.get('video', ''), 'attendance': attendance, 'referee_id': referee_id, 'linesman1_id': linesman1_id, 'linesman2_id': linesman2_id, 'linesman3_id': linesman3_id, 'merges': game['merges'], 'gid': game['gid'], }) print("Inserting {} games results.".format(len(games))) # Broke on massive attendance. # Watch out for crazy integer values. insert_sql("games_game", games) print("Inserting games sources.") game_getter = make_game_getter() l = [] for date, team_id, source_id, source_url in game_sources: # Don't call game_getter without date. Need to give games unique id's. if date: game_id = game_getter(team_id, date) if game_id: l.append({ 'game_id': game_id, 'source_id': source_id, 'source_url': source_url, }) insert_sql("games_gamesource", l)
def load_stats(): print("\nloading stats\n") @timer def f(): return team_getter, bio_getter, competition_getter, season_getter, source_getter = ( make_team_getter(), make_bio_getter(), make_competition_getter(), make_season_getter(), make_source_getter(), ) print("\nprocessing\n") l = [] i = 0 # for i, stat in enumerate(soccer_db.stats.find(timeout=False)): # no timeout because this query takes forever. for i, stat in enumerate(soccer_db.stats.find(no_cursor_timeout=True)): if i % 50000 == 0: print(i) if stat['name'] == '': #import pdb; pdb.set_trace() continue team_id = team_getter(stat['team']) bio_id = bio_getter(stat['name']) competition_id = competition_getter(stat['competition']) season_id = season_getter(stat['season'], competition_id) # cf game_sources stuff. """ # change to sources! if stat.get('sources'): sources = sorted(set(stat.get('sources'))) elif stat.get('source'): sources = [stat['source']] else: sources = [] for source in sources: if source.strip() == '': continue elif source.startswith('http'): source_url = source else: source_url = '' source_id = source_getter(source) #t = (game['date'], team1_id, source_id, source_url) #stat_sources.append(t) """ if stat.get('source'): source_id = source_getter(stat['source']) else: source_id = None def c2i(key): # Coerce an integer if key in stat and stat[key] != None: if type(stat[key]) != int: import pdb pdb.set_trace() return stat[key] else: return None l.append({ 'player_id': bio_id, 'team_id': team_id, 'competition_id': competition_id, 'season_id': season_id, 'games_started': c2i('games_started'), 'games_played': c2i('games_played'), 'minutes': c2i('minutes'), 'goals': c2i('goals'), 'assists': c2i('assists'), 'shots': c2i('shots'), 'shots_on_goal': c2i('shots_on_goal'), 'fouls_committed': c2i('fouls_committed'), 'fouls_suffered': c2i('fouls_suffered'), 'yellow_cards': c2i('yellow_cards'), 'red_cards': c2i('red_cards'), 'source_id': source_id, }) print(i) insert_sql("stats_stat", l)
def load_fouls(): print("loading fouls") print(soccer_db.fouls.count()) team_getter = make_team_getter() bio_getter = make_bio_getter() game_getter = make_game_getter() gid_getter = make_gid_getter() l = [] def create_foul(foul): if foul['team'] is None: return {} team_id = team_getter(foul['team']) bio_id = None bio_id = bio_getter(foul['name']) if foul['type'] == 'red': ftype = 'red' elif foul['type'] == 'yellow': ftype = 'yellow' else: import pdb pdb.set_trace() if not foul['date']: return {} # Coerce to date to match dict. d = datetime.date(foul['date'].year, foul['date'].month, foul['date'].day) # Try gid first, fall back on team/date. game_id = None if 'gid' in foul: game_id = gid_getter(foul['gid']) if game_id is None: game_id = game_getter(team_id, d) if not game_id: print("Cannot create %s" % goal) return {} else: return { 'game_id': game_id, 'team_id': team_id, 'minute': foul['minute'], 'etype': ftype, #'team_original_name': '', 'subject_id': bio_id, #player, 'description': '', } i = 0 # if no goals. fouls = [] for i, foul in enumerate(soccer_db.fouls.find()): if i % 50000 == 0: print(i) f = create_foul(foul) if f: fouls.append(f) insert_sql('events_event', fouls)
def load_awards(): print("\nloading awards\n") awards = set() award_dict = {} competition_getter = make_competition_getter() season_getter = make_season_getter() team_getter = make_team_getter() bio_getter = make_bio_getter() # Create the set of all awards. for item in soccer_db.awards.find(): t = (item['competition'], item['award'], item.get('type', '')) awards.add(t) # Create all awards. for t in awards: competition, name, award_type = t # Using find because currently using NCAA awards but don't have ncaa standings. # Should be able to switch to competition_getter here. if competition: competition = Competition.objects.find(competition) a = Award.objects.create(competition=competition, name=name, type=award_type) award_dict[t] = a bio_ct_id = ContentType.objects.get(app_label='bios', model='bio').id team_ct_id = ContentType.objects.get(app_label='teams', model='team').id # Create awardItems. items = [] for item in soccer_db.awards.find().sort('recipient', 1): item.pop('_id') # So we can have a season, a year, both, or neither for an award item award = award_dict[(item['competition'], item['award'], item.get('type', ''))] award_id = award.id # NCAA seasons don't exist. # Would be good to use get otherwise to ensure we have good data. if award.competition: competition_id = award.competition.id season_id = season_getter(item['season'], competition_id) else: season_id = None model_name = item.pop('model') if model_name == 'Bio': content_type_id = bio_ct_id object_id = bio_getter(item['recipient']) elif model_name == 'Team': content_type_id = team_ct_id object_id = team_getter(item['recipient']) else: import pdb; pdb.set_trace() raise items.append({ 'award_id': award_id, 'season_id': season_id, 'content_type_id': content_type_id, 'object_id': object_id, }) insert_sql("awards_awarditem", list(items))
def load_game_stats(): print("\nloading game stats\n") team_getter = make_team_getter() bio_getter = make_bio_getter() source_getter = make_source_getter() game_getter = make_game_getter() gid_getter = make_gid_getter() game_result_getter = make_game_result_getter() birthdate_dict = dict( Bio.objects.exclude(birthdate=None).values_list("id", "birthdate")) print("\nprocessing") l = [] i = 0 #for i, stat in enumerate(soccer_db.gstats.find(timeout=False)): # no timeout because this query takes forever. for i, stat in enumerate(soccer_db.gstats.find(no_cursor_timeout=True)): if i % 50000 == 0: print(i) if stat['player'] == '': #import pdb; pdb.set_trace() continue try: bio_id = bio_getter(stat['player']) except: continue if bio_id is None: continue bd = birthdate_dict.get(bio_id) if stat['date'] and bd: # Coerce bd from datetime.date to datetime.time bdt = datetime.datetime.combine(bd, datetime.time()) age = (stat['date'] - bdt).days / 365.25 else: age = None team_id = team_getter(stat['team']) if 'gid' in stat: game_id = gid_getter(stat['gid']) else: game_id = game_getter(team_id, stat['date']) #game_id = game_getter(team_id, stat['date']) result = game_result_getter(team_id, stat['date']) if game_id is None or team_id is None: continue def c2i(key, coerce_none=True): # Coerce an integer if key in stat and stat[key] != None: if type(stat[key]) != int: import pdb pdb.set_trace() return stat[key] elif key in stat and stat[key] == None: return 0 else: return None l.append({ 'player_id': bio_id, 'team_id': team_id, 'game_id': game_id, 'games_started': c2i('games_started'), 'games_played': c2i('games_played'), 'minutes': c2i('minutes'), 'goals': c2i('goals'), 'assists': c2i('assists'), 'shots': c2i('shots'), 'shots_on_goal': c2i('shots_on_goal'), 'fouls_committed': c2i('fouls_committed'), 'fouls_suffered': c2i('fouls_suffered'), 'yellow_cards': c2i('yellow_cards'), 'red_cards': c2i('red_cards'), 'on': c2i('on', False), 'off': c2i('off', False), 'age': age, 'result': result, }) print(i) insert_sql("stats_gamestat", l)
def load_stats(): print("\nloading stats\n") @timer def f(): return team_getter, bio_getter, competition_getter, season_getter, source_getter = ( make_team_getter(), make_bio_getter(), make_competition_getter(), make_season_getter(), make_source_getter(),) print("\nprocessing\n") l = [] i = 0 for i, stat in enumerate(soccer_db.stats.find(timeout=False)): # no timeout because this query takes forever. if i % 50000 == 0: print(i) if stat['name'] == '': #import pdb; pdb.set_trace() continue team_id = team_getter(stat['team']) bio_id = bio_getter(stat['name']) competition_id = competition_getter(stat['competition']) season_id = season_getter(stat['season'], competition_id) # cf game_sources stuff. """ # change to sources! if stat.get('sources'): sources = sorted(set(stat.get('sources'))) elif stat.get('source'): sources = [stat['source']] else: sources = [] for source in sources: if source.strip() == '': continue elif source.startswith('http'): source_url = source else: source_url = '' source_id = source_getter(source) #t = (game['date'], team1_id, source_id, source_url) #stat_sources.append(t) """ if stat.get('source'): source_id = source_getter(stat['source']) else: source_id = None def c2i(key): # Coerce an integer if key in stat and stat[key] != None: if type(stat[key]) != int: import pdb; pdb.set_trace() return stat[key] else: return None l.append({ 'player_id': bio_id, 'team_id': team_id, 'competition_id': competition_id, 'season_id': season_id, 'games_started': c2i('games_started'), 'games_played': c2i('games_played'), 'minutes': c2i('minutes'), 'goals': c2i('goals'), 'assists': c2i('assists'), 'shots': c2i('shots'), 'shots_on_goal': c2i('shots_on_goal'), 'fouls_committed': c2i('fouls_committed'), 'fouls_suffered': c2i('fouls_suffered'), 'yellow_cards': c2i('yellow_cards'), 'red_cards': c2i('red_cards'), 'source_id': source_id, }) print(i) insert_sql("stats_stat", l)