def list_all_games(self): # List games games_lst = pd.DataFrame() for iy in self.seasons: iSea = Season(self.db_root, int(iy.replace('Season_', '')[:4])) games_lst = pd.concat((games_lst, iSea.games_info), axis=0) self.games_lst = games_lst
def setUp(self): """Initialization""" self.db_root = Config( ).data_dir # This is the location of the Hockey database self.repoCode = get_git_root() self.repoModel = path.join( self.repoCode, 'ReinforcementLearning/NHL/playerstats/offVSdef/Automatic_classification/MODEL_perceptron_1layer_10units_relu' ) # Now lets get game data self.season = Season( db_root=self.db_root, repo_model=self.repoModel, year_begin=2012) # Season.from_year_begin(2012) # '20122013' # Montreal received Ottawa on march 13, 2013, let's convert game date to game code gameId = self.season.get_game_id(home_team_abbr='MTL', game_date=datetime.date(year=2013, month=3, day=13)) self.a_game = Game(self.season, gameId=gameId)
def test_games_before_january_not_2012(self): """Can I fetch games for home teams on the months October-December of a season?""" for _ in range(20): # number of time to do the test year_begin = choice(list(set(range(2005, 2014)) - {2012})) # year_begin = randint(2005, 2014) season = Season(global_logger, db_root=self.db_root, repo_model=self.repoModel, year_begin=year_begin) home_team = choice(list(season.get_teams())) the_month = randint(10, 12) the_day = randint(15, 30) base_date = datetime.date(year=season.year_begin, month=the_month, day=the_day) delta_in_days = randint( 20, 40) # let's make this large enough for a game to always exist. # print("[base: %s] Trying to fetch games for '%s', up until %d days before" % (base_date, home_team, delta_in_days)) result = season.get_game_at_or_just_before( game_date=base_date, home_team_abbr=home_team, delta_in_days=delta_in_days) if result is None: season.get_game_at_or_just_before(game_date=base_date, home_team_abbr=home_team, delta_in_days=delta_in_days) self.assertIsNotNone( result, "[%s] Impossible to find a game for '%s' up to %d days before %s" % (season, home_team, delta_in_days, base_date))
class TestGame(unittest.TestCase): """Testing definitions of Game's.""" def setUp(self): """Initialization""" self.db_root = Config( ).data_dir # This is the location of the Hockey database self.repoCode = get_git_root() self.repoModel = path.join( self.repoCode, 'ReinforcementLearning/NHL/playerstats/offVSdef/Automatic_classification/MODEL_perceptron_1layer_10units_relu' ) # Now lets get game data self.season = Season( db_root=self.db_root, repo_model=self.repoModel, year_begin=2012) # Season.from_year_begin(2012) # '20122013' # Montreal received Ottawa on march 13, 2013, let's convert game date to game code gameId = self.season.get_game_id(home_team_abbr='MTL', game_date=datetime.date(year=2013, month=3, day=13)) self.a_game = Game(self.season, gameId=gameId) def test_shifts_differential(self): """Are the differential for lines properly calculated?""" days_before = 20 all_teams = self.season.get_teams() for _ in range(10): # repeat this test 10 times random_date = datetime.date(year=2013, month=random.randint(1, 4), day=random.randint(1, 28)) random_team = random.sample(all_teams, 1)[0] result = self.season.get_game_at_or_just_before( random_date, home_team_abbr=random_team, delta_in_days=days_before) if result is None: print( "WARNING => No home game for '%s' up to %d days before %s" % (random_team, days_before, random_date)) else: random_game_id, game_date = result print("[home team: '%s'] Examining game %d (from %s)" % (random_team, random_game_id, game_date)) random_game = Game(self.season, gameId=random_game_id) df_differential = random_game.lineShifts.shifts[ 'differential'].reset_index() # ? idxs_change_differential = df_differential.diff()[ df_differential.diff().differential != 0].index.values for idx in idxs_change_differential[ 1:]: # skip fist one, because it's a NaN self.assertNotEqual( random_game.lineShifts.shifts.iloc[idx]['GOAL'], 0, "Differential is %d but there was no goal scored" % (random_game.lineShifts.shifts.iloc[idx] ['differential'])) def test_shifts_goals_generate_differential(self): """Are the differential for lines properly calculated?""" days_before = 20 all_teams = self.season.get_teams() for _ in range(10): # repeat this test 10 times random_date = datetime.date(year=2013, month=random.randint(1, 4), day=random.randint(1, 28)) random_team = random.sample(all_teams, 1)[0] result = self.season.get_game_at_or_just_before( random_date, home_team_abbr=random_team, delta_in_days=days_before) if result is None: print( "WARNING => No home game for '%s' up to %d days before %s" % (random_team, days_before, random_date)) else: random_game_id, game_date = result print("[home team: '%s'] Examining game %d (from %s)" % (random_team, random_game_id, game_date)) random_game = Game(self.season, gameId=random_game_id) df_goals = random_game.lineShifts.shifts['GOAL'].reset_index() idxs_goals = [ a_val for a_val in df_goals[df_goals.GOAL != 0].index.values if a_val > 0 ] for idx in idxs_goals: diff = random_game.lineShifts.shifts.iloc[idx][ 'differential'] diff_before = random_game.lineShifts.shifts.iloc[ idx - 1]['differential'] goals_now = random_game.lineShifts.shifts.iloc[idx]['GOAL'] goals_before = random_game.lineShifts.shifts.iloc[ idx - 1]['GOAL'] expected_result = diff_before + goals_now self.assertEqual( diff, expected_result, "\n%s\n [index: %d] Differential is %d but it should be => %d (== (diff before) %d + %d (goals now))" % (random_game.lineShifts.shifts.iloc[idx - 2:idx + 2][[ 'GOAL', 'differential' ]], idx, diff, expected_result, diff_before, goals_now))
def pull_RL_data(self, repoModel, repoSave=None, verbose=0, fetcher='default'): # Prepare players model: reload info CLS = ANN_classifier() sess, annX, annY = CLS.ann_reload_model(repoModel) self.classifier = {'annX': annX, 'annY': annY, 'sess': sess} self.players_model = pickle.load( open(path.join(repoModel, 'baseVariables.p'), 'rb')) # Make lines dictionary self.make_line_dictionary() # List line shifts RL_data = pd.DataFrame() GAME_data = pd.DataFrame() PLAYER_data = pd.DataFrame() count = 0 allR = [] # Loop on seasons for iy in np.unique(self.games_lst['season'].values): # Extract season data iSea = Season(self.db_root, int(str(iy)[:4])) # List games games = self.games_lst[self.games_lst['season'] == iy] # Loop on games for ic, ih, ia in zip(games['gcode'].values, games['hometeam'].values, games['awayteam'].values): # Extract game data iGame = Game(iSea, gameId=ic) # Check if some data was retrieved: if len(iGame.df_wc) > 0: iGame.players_classes_mgr.set_stats_fetcher = fetcher player_classes = iGame.players_classes_mgr.get( equal_strength=True, regular_time=True, min_duration=20, nGames=30) # update shifts to reflect the same parameters lineSHFT = iGame.as_df('both', True, True, 20) # Check if some data was retrieved: if len(player_classes) > 0: # Add game identifier data lineSHFT['season'] = iy lineSHFT['gameCode'] = ic lineSHFT['hometeam'] = ih lineSHFT['awayteam'] = ia S, A, R, nS, nA, coded = iGame.build_statespace( self.line_dictionary) allR.append(np.sum(R)) # Concatenate data df_ic = np.transpose( np.reshape(np.concatenate((S, A, R)), [3, -1])) RL_data = pd.concat( (RL_data, pd.DataFrame( df_ic, columns=['state', 'action', 'reward' ])), axis=0) GAME_data = pd.concat((GAME_data, lineSHFT[coded]), axis=0) # Players data plDT = player_classes plDT['season'] = iy plDT['gameCode'] = ic PLAYER_data = pd.concat((PLAYER_data, plDT), axis=0) # Save data if not repoSave is None and count % 20 == 0: pickle.dump( { 'RL_data': RL_data, 'nStates': nS, 'nActions': nA }, open(path.join(repoSave, 'RL_teaching_data.p'), 'wb')) pickle.dump( GAME_data, open(path.join(repoSave, 'GAME_data.p'), 'wb')) pickle.dump( PLAYER_data, open(path.join(repoSave, 'PLAYER_data.p'), 'wb')) elif verbose > 0: print('*** EMPTY GAME ***') elif verbose > 0: print('*** EMPTY GAME ***') # Status bar if verbose > 0: stdout.write('\r') # the exact output you're looking for: stdout.write( "Game %i/%i - season %s game %s: [%-60s] %d%%, completed" % (count, len(self.games_lst), iy, ic, '=' * int(count / len(self.games_lst) * 60), 100 * count / len(self.games_lst))) stdout.flush() count += 1 self.RL_data = RL_data self.state_size = nS self.action_size = nA
def evaluate_all_coaches(self, season_year_begin: int, teams_opt: Optional[List[str]], n_games: int): from ReinforcementLearning.NHL.playbyplay.state_space_data import HockeySS """Initialization""" os.makedirs(self.base_dir, exist_ok=True) my_config = Config() self.alogger.debug("Data configured to be in '%s'" % (my_config.data_dir)) db_root = my_config.data_dir repoCode = get_git_root() repoModel = path.join( repoCode, 'ReinforcementLearning/NHL/playerstats/offVSdef/Automatic_classification/MODEL_perceptron_1layer_10units_relu' ) season = Season(self.alogger, db_root=db_root, year_begin=season_year_begin, repo_model=repoModel) # Line translation table linedict = HockeySS(db_root) linedict.make_line_dictionary() linedict = linedict.line_dictionary # Load the Qvalues table Qvalues = \ pickle.load(open(path.join(repoCode, 'ReinforcementLearning/NHL/playbyplay/data/stable/RL_action_values.p'), 'rb'))[ 'action_values'] # Visualize it dimensions (period x differential x away line's code x home line's code) print('Q-table dimensions: ', Qvalues.shape) # for what teams will we run this calculation? calc_teams = season.get_teams() if teams_opt is None else teams_opt for a_team in calc_teams: season.alogger.debug("=============> calculating %s" % (a_team)) seconds = get_teams_coach_performance(season, team_abbr=a_team, maybe_a_starting_date=None, line_dict=linedict, Qvalues=Qvalues, how_many_games=n_games) season.alogger.debug(seconds) if seconds["does_not_match_optimal"] == seconds[ "matches_optimal"] == 0: season.alogger.info( "[team: '%s'] No evidence for coach to be evaluated on." % (a_team)) else: total_secs = seconds['matches_optimal'] + seconds[ 'does_not_match_optimal'] season.alogger.info( "['%s'] Home coach's score is %d (secs. optimal) / %d (secs. total) = %.2f (in [0,1])" % (a_team, seconds['matches_optimal'], total_secs, seconds['matches_optimal'] / total_secs)) file_to_save = os.path.join(self.base_dir, a_team + ".pkl") self.alogger.debug("Saving data for '%s' in file %s" % (a_team, file_to_save)) with open(file_to_save, 'wb') as dict_file: pickle.dump(seconds, dict_file) self.alogger.debug("DONE")
def get_teams_coach_performance(season: Season, team_abbr: str, maybe_a_starting_date: Optional[datetime.date], how_many_games: int, line_dict: dict, Qvalues) -> dict: """ :param season: :param team_abbr: :param maybe_a_starting_date: :param line_dict: :param Qvalues: :return: """ params = { "games_to_predict_away_lines": 7, "optimal_examine_num_first_lines": 5, # None examines ALL "first_day_of_season": datetime.date(year=season.year_end, month=2, day=1), # "first_day_of_season": datetime.date(year=season.year_begin, month=12, day=1), # TODO: check why I don't find anything in season.year_begin? } assert (how_many_games > 0) last_game_date_accounted_for = None seconds = { # result will be stored here. 'does_not_match_optimal': 0, # number of seconds the action of coach did not match the optimal 'matches_optimal': 0, # number of seconds the action of coach matched the optimal 'unknown_adversary': 0 # number of seconds we can't determine the value of the action of coach } while how_many_games > 0: season.alogger.info("games left: %d; so far: %s" % (how_many_games, seconds)) how_many_games -= 1 if last_game_date_accounted_for is None: base_date = maybe_a_starting_date if maybe_a_starting_date is not None else params[ "first_day_of_season"] result = season.get_game_at_or_just_before( game_date=base_date, home_team_abbr=team_abbr, delta_in_days=20) if result is None: season.alogger.info( "There is no game for '%s' just before %s" % (team_abbr, base_date)) return seconds gameId, d = result base_date = base_date + datetime.timedelta( days=1) # convenient for next cycle of computation last_game_date_accounted_for = base_date else: found = False while not found: result = season.get_game_at_or_just_before( game_date=base_date, home_team_abbr=team_abbr) # result should never be None, as at worst it will get the game it computed before. assert result is not None gameId, d = result base_date = base_date + datetime.timedelta( days=1) # convenient for next cycle of computation found = (d != last_game_date_accounted_for) season.alogger.info("Fetched game %d, played on %s" % (gameId, d)) data_for_a_game = Game(season, gameId) home_players = data_for_a_game.get_ids_of_home_players() if len(home_players) < 12: season.alogger.info( "Can't get enough info for home players (WEIRD!!). Ignoring game %d" % (data_for_a_game.gameId)) else: # prediction of the lines that the 'away' team will use: formation_opt = season.get_lines_for( d - datetime.timedelta(days=1), how_many_games_back=params["games_to_predict_away_lines"], team_abbrev=data_for_a_game.away_team) if formation_opt is None: season.alogger.debug( "Couldn't get a prediction of lines %s will use" % (data_for_a_game.away_team)) else: formation = formation_opt away_lines_names = formation.as_names away_lines = formation.as_categories season.alogger.info(away_lines_names) season.alogger.info(away_lines) # === Now we get the indices in the Q-values tables corresponding to lines # Get lines and translate them playersCode = data_for_a_game.encode_line_players() linesCode = np.array( [[data_for_a_game.recode_line(line_dict, a) for a in b] for b in playersCode]) # Get the Q-value for that specific line iShift = 0 # First shift lineShifts = data_for_a_game.lineShifts.as_df( team='both', equal_strength=data_for_a_game.shifts_equal_strength, regular_time=data_for_a_game.shifts_regular_time, min_duration=20) player_classes = data_for_a_game.players_classes_mgr.get( equal_strength=True, regular_time=True, min_duration=20, nGames=30) # TODO: why these parameters? plList = list(player_classes.loc[lineShifts['playersID'].iloc[iShift][0]]['firstlast'].values) + \ list(player_classes.loc[lineShifts['playersID'].iloc[iShift][1]]['firstlast'].values) diff = data_for_a_game.recode_differential( lineShifts.iloc[iShift].differential) period = data_for_a_game.recode_period( lineShifts.iloc[iShift].period) q_values = Qvalues[period, diff, linesCode[iShift, 0], linesCode[iShift, 1]] season.alogger.info( '[diff = %d, period = %d] First shift: \n\thome team: %s, %s, %s \n\taway team: %s, %s, %s \n\tQvalue: %.2f' % (diff, period, plList[0], plList[1], plList[2], plList[3], plList[4], plList[5], q_values)) q_values_fetcher_from_game_data = QValuesFetcherFromGameData( game_data=data_for_a_game, lines_dict=line_dict, q_values=Qvalues) line_rec = LineRecommender( game=data_for_a_game, player_category_fetcher=CategoryFetcher( data_for_game=data_for_a_game), q_values_fetcher=q_values_fetcher_from_game_data) home_lines_rec = line_rec.recommend_lines_maximize_average( home_team_players_ids=data_for_a_game. get_ids_of_home_players(), away_team_lines=away_lines, examine_max_first_lines=params[ "optimal_examine_num_first_lines"]) season.alogger.info(home_lines_rec) season.alogger.info( data_for_a_game.formation_ids_to_str(home_lines_rec)) # let's examine actual decisions and how it compares with optimal: for data in lineShifts[[ 'home_line', 'away_line', 'iceduration' ]].itertuples(): home_line = data.home_line away_line = data.away_line num_seconds = data.iceduration away_line_cats = data_for_a_game.classes_of_line(away_line) if None in away_line_cats: season.alogger.info( "Can't get category of one of away players") else: away_line_cats = tuple(np.sort(away_line_cats)) if away_line_cats not in away_lines: # season.alogger.debug("%s (categories %s): no optimal calculated" % (away_line, away_line_cats)) seconds['unknown_adversary'] += num_seconds else: idx_of_away = away_lines.index(away_line_cats) cats_of_optimal = data_for_a_game.classes_of_line( home_lines_rec[idx_of_away]) home_line_cats = data_for_a_game.classes_of_line( home_line) if set(cats_of_optimal) == set(home_line_cats): seconds['matches_optimal'] += num_seconds else: seconds[ 'does_not_match_optimal'] += num_seconds return seconds