def process(start_year, end_year, TBA, SQL_Write, SQL_Read, clean=False, cache=True): if clean: print("Loading Teams") for team in TBA.getTeams(cache=cache): SQL_Write.addTeam(team, check=False, add=False, commit=False) SQL_Write.add() all_teams = [t.id for t in SQL_Read.getTeams()] for year in range(start_year, end_year + 1): print("Year " + str(year)) SQL_Write.addYear({"year": year}, check=(not clean), add=False, commit=False) team_years = [t.team_id for t in SQL_Read.getTeamYears(year=year)] for teamYear in TBA.getTeamYears(year, cache=cache): # handles teams with no matches removed in postprocessing team = teamYear["team"] if team not in team_years and team in all_teams: SQL_Write.addTeamYear(teamYear, check=False, add=False, commit=False) SQL_Write.add() events = TBA.getEvents(year, cache=cache) for event in events: event_key = event["key"] _, event_id = SQL_Write.addEvent( event, check=(not clean), add=False, commit=False ) event_time = event["time"] # if the match is within five days if clean or abs(int(time.time()) - event_time) < 432000: teamEvents = TBA.getTeamEvents(event_key, cache=cache) for teamEvent in teamEvents: teamEvent["year"] = year teamEvent["event_id"] = event_id teamEvent["time"] = event_time SQL_Write.addTeamEvent( teamEvent, check=(not clean), add=False, commit=False ) matches = TBA.getMatches(year, event_key, event_time, cache=cache) for match in matches: match["year"] = year match["event"] = event_id SQL_Write.addMatch( match, check=(not clean), add=False, commit=False ) SQL_Write.add() SQL_Write.add() printStats(TBA, SQL_Write, SQL_Read) SQL_Write.add() printStats(TBA, SQL_Write, SQL_Read)
def pushClean(SQL_Read, cloud_engine): years = getYears(SQL_Read) years.to_sql("rankings_year", cloud_engine, if_exists="replace", index=False) teams = getTeams(SQL_Read) teams.to_sql("rankings_team", cloud_engine, if_exists="replace", index=False) teamYears = getTeamYears(SQL_Read) teamYears.to_sql("rankings_teamyear", cloud_engine, if_exists="replace", index=False) events = getEvents(SQL_Read) events.to_sql("rankings_event", cloud_engine, if_exists="replace", index=False) teamEvents = getTeamEvents(SQL_Read) teamEvents.to_sql("rankings_teamevent", cloud_engine, if_exists="replace", index=False) matches = getMatches(SQL_Read) matches.to_sql("rankings_match", cloud_engine, if_exists="replace", index=False) teamMatches = getTeamMatches(SQL_Read) teamMatches.to_sql("rankings_teammatch", cloud_engine, if_exists="replace", index=False) printStats()
def process(start_year, end_year, SQL_Write, SQL_Read): teams = {} # dict of team nums to team objs for team in SQL_Read.getTeams(): teams[team.id] = team # constants from elo model start, mean_reversion = elo.start_rating(), elo.mean_reversion() team_years_all = {} # master dictionary if start_year > 2003: team_years_2 = {} teamYears2 = SQL_Read.getTeamYears(year=start_year - 2) for teamYear in teamYears2: team_years_2[teamYear.team_id] = teamYear team_years_all[start_year - 2] = team_years_2 if start_year > 2002: team_years_1 = {} teamYears1 = SQL_Read.getTeamYears(year=start_year - 1) for teamYear in teamYears1: team_years_1[teamYear.team_id] = teamYear team_years_all[start_year - 1] = team_years_1 for year in range(start_year, end_year + 1): print(year) # dicts for num to TeamYear, TeamMatch, most recent elo team_years, team_events, team_matches, team_elos = {}, {}, {}, {} team_match_ids = {} # win, loss, tie, count team_year_stats = defaultdict(lambda: [0, 0, 0, 0]) team_event_stats = defaultdict(lambda: [0, 0, 0, 0]) sd_score = SQL_Read.getYear(year=year).score_sd teamYears = SQL_Read.getTeamYears(year=year) print("Team Year Setup") count = 0 for teamYear in teamYears: # eventually will need 2021 logic here (continuation season) num = teamYear.team_id team_years[num] = teamYear team_matches[num] = [] # gets elo using mean reversion elo_2yr = mean_reversion if (year - 2 in team_years_all and num in team_years_all[year - 2] and team_years_all[year - 2][num].elo_max is not None): elo_2yr = team_years_all[year - 2][num].elo_max elo_1yr = mean_reversion if (year - 1 in team_years_all and num in team_years_all[year - 1] and team_years_all[year - 1][num].elo_max is not None): elo_1yr = team_years_all[year - 1][num].elo_max start_rating = elo.existing_rating(elo_1yr, elo_2yr) team_elos[num] = start if year == 2002 else start_rating teamYear.elo_start = team_elos[num] # saves new elo count += 1 if count % 1000 == 0: SQL_Write.add() print("Matches") acc, mse, count = 0, 0, 0 # for statistics event_stats = defaultdict(lambda: [0, 0, 0]) # acc, mse, count matches = SQL_Read.getMatches_year(year=year) for match in sorted(matches): event_id = match.event_id red, blue = match.getTeams() red_elo_pre, blue_elo_pre = {}, {} team_match_ids[match.id] = {} for t in red: red_elo_pre[t] = team_elos[t] team_match_ids[match.id][t] = team_elos[t] for t in blue: blue_elo_pre[t] = team_elos[t] team_match_ids[match.id][t] = team_elos[t] # update object match.red_elo_sum = sum(red_elo_pre.values()) match.blue_elo_sum = sum(blue_elo_pre.values()) # updates win probability fields win_prob = elo.win_prob(red_elo_pre, blue_elo_pre) match.elo_win_prob = win_prob match.elo_winner = "red" if win_prob > 0.5 else "blue" # compute elo changes red_elo_post, blue_elo_post = elo.update_rating( sd_score, red_elo_pre, blue_elo_pre, match.red_score, match.blue_score, match.playoff, ) winner = match.winner red_mapping = {"red": 0, "blue": 1, "draw": 2} blue_mapping = {"blue": 0, "red": 1, "draw": 2} # update dictionaries for t in red: team_elos[t] = red_elo_post[t] team_matches[t].append(red_elo_post[t]) team_event_id = utils.getTeamEventId(t, match.event_id) if team_event_id not in team_events: team_events[team_event_id] = [[ red_elo_pre[t], match.playoff ]] team_events[team_event_id].append( [red_elo_post[t], match.playoff]) team_year_stats[t][3] += 1 team_year_stats[t][red_mapping[winner]] += 1 team_event_stats[team_event_id][3] += 1 team_event_stats[team_event_id][red_mapping[winner]] += 1 for t in blue: team_elos[t] = blue_elo_post[t] team_matches[t].append(blue_elo_post[t]) team_event_id = utils.getTeamEventId(t, match.event_id) if team_event_id not in team_events: team_events[team_event_id] = [[ blue_elo_pre[t], match.playoff ]] team_events[team_event_id].append( [blue_elo_post[t], match.playoff]) team_year_stats[t][3] += 1 team_year_stats[t][blue_mapping[winner]] += 1 team_event_stats[team_event_id][3] += 1 team_event_stats[team_event_id][blue_mapping[winner]] += 1 # update stats win_probs = {"red": 1, "blue": 0, "draw": 0.5} error = (win_probs[winner] - match.elo_win_prob)**2 event_stats[event_id][1] += error # mse mse += error if winner == match.elo_winner: event_stats[event_id][0] += 1 # acc acc += 1 count += 1 event_stats[event_id][2] += 1 # count if count % 1000 == 0: SQL_Write.add() # aggregate stats acc = round(acc / len(matches), 4) mse = round(mse / len(matches), 4) print("Team Matches") for team_match in SQL_Read.getTeamMatches(year=year): team_match.elo = team_match_ids[team_match.match_id][ team_match.team_id] SQL_Write.commit() # optional, reduces memory overhead print("Team Events") count = 0 for team in teamYears: for team_event in team.team_events: id = team_event.id if id not in team_events: SQL_Write.remove(team_event) continue data = team_events[id] elos = [obj[0] for obj in team_events[id]] team_event.elo_start = elos[0] team_event.elo_end = elos[-1] team_event.elo_max = max(elos) team_event.elo_mean = sum(elos) / len(elos) team_event.elo_diff = elos[-1] - elos[0] team_event.elo_pre_playoffs = elos[0] cont = True for i in range(len(data) - 1, -1, -1): if cont and data[i][1] == 0: ind = min(i + 1, len(data) - 1) team_event.elo_pre_playoffs = data[ind][0] cont = False wins, losses, ties, count = team_event_stats[id] winrate = max(-1, round((wins + ties / 2) / count, 4)) team_event.wins = wins team_event.losses = losses team_event.ties = ties team_event.count = count team_event.winrate = winrate count += 1 if count % 1000 == 0: SQL_Write.add() SQL_Write.commit() print("Events") # all event elo stats based on pre-playoff elos for event in SQL_Read.getEvents_year(year=year): event_id = event.id elos = [] for team_event in event.team_events: elos.append(team_event.elo_pre_playoffs) elos.sort(reverse=True) event.elo_max = elos[0] event.elo_top8 = -1 if len(elos) < 8 else elos[7] event.elo_top24 = -1 if len(elos) < 24 else elos[23] event.elo_mean = round(sum(elos) / len(elos), 2) event.elo_sd = round(statistics.pstdev(elos), 2) event_acc, event_mse, event_count = event_stats[event_id] event.elo_acc = round(event_acc / event_count, 4) event.elo_mse = round(event_mse / event_count, 4) print("Team Years") year_elos, count = [], 0 remove = [] for team in team_years: elos = team_matches[team] if elos == []: SQL_Write.remove(team_years[team]) remove.append(team) else: elo_max = max(elos[min(len(elos) - 1, 8):]) year_elos.append(elo_max) for team in remove: team_years.pop(team) year_elos.sort(reverse=True) team_year_count = len(team_years) for team in team_years: obj = team_years[team] elos = team_matches[team] elo_max = max(elos[min(len(elos) - 1, 8):]) obj.elo_max = elo_max obj.elo_mean = round(sum(elos) / len(elos), 2) obj.elo_end = team_elos[team] obj.elo_diff = obj.elo_end - obj.elo_start pre_champs = obj.elo_start for team_event in sorted(obj.team_events): # goes from team_event to event if team_event.event.type < 3: pre_champs = team_event.elo_end obj.elo_pre_champs = pre_champs wins, losses, ties, count = team_year_stats[team] winrate = round((wins + 0.5 * ties) / count, 4) obj.wins = wins obj.losses = losses obj.ties = ties obj.count = count obj.winrate = winrate obj.elo_rank = rank = year_elos.index(elo_max) + 1 obj.elo_percentile = round(rank / team_year_count, 4) count += 1 if count % 1000 == 0: SQL_Write.add() print("Years") year_elos.sort(reverse=True) year_obj = SQL_Read.getYear(year=year) year_obj.elo_max = year_elos[0] year_obj.elo_1p = year_elos[round(0.01 * len(year_elos))] year_obj.elo_5p = year_elos[round(0.05 * len(year_elos))] year_obj.elo_10p = year_elos[round(0.10 * len(year_elos))] year_obj.elo_25p = year_elos[round(0.25 * len(year_elos))] year_obj.elo_median = year_elos[round(0.50 * len(year_elos))] year_obj.elo_mean = round(sum(year_elos) / len(year_elos), 2) year_obj.elo_sd = round(statistics.pstdev(year_elos), 2) year_obj.elo_acc = acc year_obj.elo_mse = mse team_years_all[year] = team_years # keeps memory down if year - 2 in team_years_all: team_years_all.pop(year - 2) SQL_Write.commit() printStats(SQL_Write=SQL_Write, SQL_Read=SQL_Read) print("Teams") for team in SQL_Read.getTeams(): years = {} wins, losses, ties, count = 0, 0, 0, 0 for year in team.team_years: years[year.year_id] = year.elo_max wins += year.wins losses += year.losses ties += year.ties count += year.count keys = years.keys() vals = years.values() recent = [] for year in range(2017, end_year + 1): if year in years: recent.append(years[year]) r_y, y = len(recent), len(vals) team.elo = -1 if not team.active else years[max(keys)] """ temporary solution applying mean reversion if no 2020 matches """ if team.active and max(keys) == 2019: yr_1 = 1450 if 2019 not in years else years[2019] yr_2 = 1450 if 2018 not in years else years[2018] team.elo = 0.56 * yr_1 + 0.24 * yr_2 + 0.20 * 1450 """ End temporary block """ team.elo_recent = -1 if r_y == 0 else round(sum(recent) / r_y, 2) team.elo_mean = -1 if y == 0 else round(sum(vals) / y, 2) team.elo_max = -1 if y == 0 else max(vals) winrate = round((wins + ties / 2) / count, 4) team.wins = wins team.losses = losses team.ties = ties team.count = count team.winrate = winrate SQL_Write.commit() printStats(SQL_Write=SQL_Write, SQL_Read=SQL_Read)
def main(start_year, end_year, TBA, SQL_Write, SQL_Read, clean): process(start_year, end_year, SQL_Write, SQL_Read) test(start_year, end_year, SQL_Write, SQL_Read) printStats(SQL_Write=SQL_Write, SQL_Read=SQL_Read)
def main(start_year, end_year, TBA, SQL_Write, SQL_Read, clean, cache=True): process(start_year, end_year, TBA, SQL_Write, SQL_Read, clean, cache) post_process(TBA, SQL_Write, SQL_Read, clean) printStats()
def process(start_year, end_year, SQL_Read, SQL_Write): teams, means = {}, {} for team in SQL_Read.getTeams(): teams[team.id] = team team_years_all = {} # master dictionary if start_year > 2003: team_years_2 = {} teamYears2 = SQL_Read.getTeamYears(year=start_year - 2) for teamYear in teamYears2: team_years_2[teamYear.team_id] = teamYear team_years_all[start_year - 2] = team_years_2 if start_year > 2002: team_years_1 = {} teamYears1 = SQL_Read.getTeamYears(year=start_year - 1) for teamYear in teamYears1: team_years_1[teamYear.team_id] = teamYear team_years_all[start_year - 1] = team_years_1 means[start_year - 1] = SQL_Read.getYear(year=start_year - 1).score_mean for year in range(start_year, end_year + 1): print(year) year_obj = SQL_Read.getYear(year) sd_score = year_obj.score_sd TM = 2 if year <= 2004 else 3 team_years, team_oprs = {}, {} opr_acc, opr_mse, mix_acc, mix_mse, count = 0, 0, 0, 0, 0 rp1_acc, rp1_mse, rp2_acc, rp2_mse, count_rp = 0, 0, 0, 0, 0 # populate starting elo from previous year mean_score = year_obj.score_mean means[year] = mean_score prior_opr_global = mean_score / TM ils_1_seed = logistic_inv(year_obj.rp_1_mean / TM) ils_2_seed = logistic_inv(year_obj.rp_2_mean / TM) team_ils_1, team_ils_2 = {}, {} temp = 0 for teamYear in SQL_Read.getTeamYears(year=year): num = teamYear.team_id prior_opr = prior_opr_global if (year - 1 in team_years_all and num in team_years_all[year - 1] and team_years_all[year - 1][num].opr_end is not None): prior_opr = team_years_all[year - 1][num].opr_end prior_opr = prior_opr / means[year - 1] * mean_score prior_opr = 0.90 * prior_opr + 0.10 * prior_opr_global teamYear.opr_start = prior_opr teamYear.opr_end = prior_opr # will be overwritten rate = prior_opr / prior_opr_global teamYear.opr_auto = rate * year_obj.auto_mean / TM teamYear.opr_teleop = rate * year_obj.teleop_mean / TM teamYear.opr_1 = rate * year_obj.one_mean / TM teamYear.opr_2 = rate * year_obj.two_mean / TM teamYear.opr_endgame = rate * year_obj.endgame_mean / TM teamYear.opr_fouls = year_obj.foul_mean / TM # no rate teamYear.opr_no_fouls = rate * year_obj.no_foul_mean / TM boost = (teamYear.elo_start - 1500) * 0.001 teamYear.ils_1 = team_ils_1[num] = max(-1 / 3, ils_1_seed + boost) teamYear.ils_2 = team_ils_2[num] = max(-1 / 3, ils_2_seed + boost) team_years[num] = teamYear team_oprs[num] = prior_opr temp += 1 if temp % 1000 == 0: SQL_Write.add() team_events = defaultdict(list) events = sorted(SQL_Read.getEvents(year=year)) for event in events: for team_event in event.team_events: num = team_event.team_id if num in teams: team_event.opr_start = team_oprs[num] team_event.opr_end = team_oprs[num] # overwritten later team_event.opr_auto = team_years[num].opr_auto team_event.opr_teleop = team_years[num].opr_teleop team_event.opr_1 = team_years[num].opr_1 team_event.opr_2 = team_years[num].opr_2 team_event.opr_endgame = team_years[num].opr_endgame team_event.opr_fouls = team_years[num].opr_fouls team_event.opr_no_fouls = team_years[num].opr_no_fouls team_event.ils_1_start = team_ils_1[num] team_event.ils_2_start = team_ils_2[num] team_event.ils_1_end = team_ils_1[num] # overwritten later team_event.ils_2_end = team_ils_2[num] # overwritten later quals = sorted(SQL_Read.getMatches(event=event.id, playoff=False)) playoffs = sorted(SQL_Read.getMatches(event=event.id, playoff=True)) oprs, ils, team_ils_1, team_ils_2, stats = process_event( event, quals, playoffs, year, sd_score, team_ils_1, team_ils_2) opr_acc += stats[0] opr_mse += stats[1] mix_acc += stats[2] mix_mse += stats[3] count += stats[4] rp1_acc += stats[5] rp1_mse += stats[6] rp2_acc += stats[7] rp2_mse += stats[8] count_rp += stats[9] for team_event in event.team_events: num = team_event.team_id if num not in oprs: continue opr = clean(oprs[num][-1][0]) ils_1 = clean(ils[num][-1][0]) ils_2 = clean(ils[num][-1][1]) event_dict = { "opr_end": opr, "opr_auto": clean(oprs[num][-1][1]), "opr_teleop": clean(oprs[num][-1][2]), "opr_1": clean(oprs[num][-1][3]), "opr_2": clean(oprs[num][-1][4]), "opr_endgame": clean(oprs[num][-1][5]), "opr_fouls": clean(oprs[num][-1][6]), "opr_no_fouls": clean(oprs[num][-1][7]), "ils_1_end": ils_1, "ils_2_end": ils_2, } team_event.opr_end = event_dict["opr_end"] team_event.opr_auto = event_dict["opr_auto"] team_event.opr_teleop = event_dict["opr_teleop"] team_event.opr_1 = event_dict["opr_1"] team_event.opr_2 = event_dict["opr_2"] team_event.opr_endgame = event_dict["opr_endgame"] team_event.opr_fouls = event_dict["opr_fouls"] team_event.opr_no_fouls = event_dict["opr_no_fouls"] team_event.ils_1_end = event_dict["ils_1_end"] team_event.ils_2_end = event_dict["ils_2_end"] team_events[num].append(event_dict) team_oprs[num] = opr team_ils_1[num] = ils_1 team_ils_2[num] = ils_2 for i, m in enumerate(sorted(team_event.team_matches)): index = -1 if m.match.playoff else i m.opr_score = clean(oprs[num][index][0]) m.opr_auto = clean(oprs[num][index][1]) m.opr_teleop = clean(oprs[num][index][2]) m.opr_1 = clean(oprs[num][index][3]) m.opr_2 = clean(oprs[num][index][4]) m.opr_endgame = clean(oprs[num][index][5]) m.opr_fouls = clean(oprs[num][index][6]) m.opr_no_fouls = clean(oprs[num][index][7]) m.ils_1 = clean(ils[num][index][0]) m.ils_2 = clean(ils[num][index][1]) oprs_end = sorted([clean(oprs[t][-1][0]) for t in oprs], reverse=True) event.opr_max = oprs_end[0] event.opr_top8 = -1 if len(oprs_end) < 8 else oprs_end[7] event.opr_top24 = -1 if len(oprs_end) < 24 else oprs_end[23] event.opr_mean = round(sum(oprs_end) / len(oprs_end), 2) event.opr_sd = round(statistics.pstdev(oprs_end), 2) SQL_Write.add() oprs = [] for num in team_years: # 1771 in 2004 only played in elims shrug if num not in team_events: team_years.pop(num) obj = team_years[num] best_event = sorted( team_events[num], key=lambda e: e["opr_end" if year < 2016 else "opr_no_fouls"], )[-1] obj.opr_end = best_event["opr_end"] obj.opr_auto = best_event["opr_auto"] obj.opr_teleop = best_event["opr_teleop"] obj.opr_1 = best_event["opr_1"] obj.opr_2 = best_event["opr_2"] obj.opr_endgame = best_event["opr_endgame"] obj.opr_fouls = best_event["opr_fouls"] obj.opr_no_fouls = best_event["opr_no_fouls"] obj.ils_1 = team_ils_1[num] obj.ils_2 = team_ils_2[num] oprs.append( best_event["opr_end" if year < 2016 else "opr_no_fouls"]) oprs.sort(reverse=True) team_year_count = len(oprs) for num in team_years: obj = team_years[num] obj.opr_rank = rank = ( oprs.index(obj.opr_end if year < 2016 else obj.opr_no_fouls) + 1) obj.opr_percentile = round(rank / team_year_count, 4) team_years_all[year] = team_years # keeps memory down if year - 2 in team_years_all: team_years_all.pop(year - 2) year_obj = SQL_Read.getYear(year=year) year_obj.opr_max = oprs[0] year_obj.opr_1p = oprs[round(0.01 * len(oprs))] year_obj.opr_5p = oprs[round(0.05 * len(oprs))] year_obj.opr_10p = oprs[round(0.10 * len(oprs))] year_obj.opr_25p = oprs[round(0.25 * len(oprs))] year_obj.opr_median = oprs[round(0.50 * len(oprs))] year_obj.opr_mean = round(sum(oprs) / len(oprs), 2) year_obj.opr_sd = round(statistics.pstdev(oprs), 2) year_obj.opr_acc = round(opr_acc / count, 4) year_obj.opr_mse = round(opr_mse / count, 4) year_obj.mix_acc = round(mix_acc / count, 4) year_obj.mix_mse = round(mix_mse / count, 4) year_obj.rp1_acc = -1 if year < 2016 else round(rp1_acc / count_rp, 4) year_obj.rp1_mse = -1 if year < 2016 else round(rp1_mse / count_rp, 4) year_obj.rp2_acc = -1 if year < 2016 else round(rp2_acc / count_rp, 4) year_obj.rp2_mse = -1 if year < 2016 else round(rp2_mse / count_rp, 4) # for faster feedback, could be removed SQL_Write.commit() printStats(SQL_Write=SQL_Write, SQL_Read=SQL_Read) SQL_Write.commit() printStats(SQL_Write=SQL_Write, SQL_Read=SQL_Read)
def main(SQL, SQL_Read): cloud_engine = SQL.getCloudEngine() # local_session = SQL.getLocalSession() pushClean(SQL_Read, cloud_engine) printStats()