def betting_stats(self, stat_names=None, window=None): data = self.game_stats() data['PACE'] = team_stats.pace(data) data['POSSESSIONS'] = team_stats.possessions(data) data['TEAM_OFF_RTG'] = team_stats.off_rating(data) data['TEAM_DEF_RTG'] = team_stats.def_rating(data) data['TEAM_NET_RTG'] = data['TEAM_OFF_RTG'] - data['TEAM_DEF_RTG'] data['TEAM_EFG'] = stats.eff_fg_pct(data, 'TEAM_') data['TEAM_TOV_PCT'] = stats.tov_pct(data, 'TEAM_') data['TEAM_OREB_PCT'] = team_stats.oreb_pct(data) data['TEAM_DREB_PCT'] = team_stats.dreb_pct(data) data['TEAM_FT_PER_FGA'] = stats.ft_per_fga(data, 'TEAM_') efg = data.TEAM_EFG oreb = data.TEAM_OREB_PCT dreb = data.TEAM_DREB_PCT ftr = data.TEAM_FT_PER_FGA tov = data.TEAM_TOV_PCT data[ 'TEAM_FOUR_FACTORS'] = 0.4 * efg + 0.2 * oreb + 0.15 * ftr - 0.25 * tov data[ 'TEAM_FOUR_FACTORS_REB'] = 0.4 * efg + 0.1 * oreb + 0.1 * dreb + 0.15 * ftr - 0.25 * tov if stat_names is None: stat_names = [ 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK' ] stat_names = ['TEAM_' + s for s in stat_names] + ['OPP_' + s for s in stat_names] +\ ['TEAM_OFF_RTG', 'TEAM_DEF_RTG', 'TEAM_NET_RTG', 'TEAM_EFG', 'TEAM_TOV_PCT', 'TEAM_OREB_PCT', 'TEAM_DREB_PCT', 'TEAM_FT_PER_FGA', 'TEAM_FOUR_FACTORS', 'TEAM_FOUR_FACTORS_REB', 'PACE', 'POSSESSIONS'] data = data[['SEASON', 'GAME_ID', 'TEAM_ID'] + stat_names] data = self.windowed_stats(data, stat_names, window=window) games = pd.read_sql( 'SELECT * FROM games JOIN betting ON games.ID is betting.GAME_ID', self.__conn) games = games.merge(data, left_on=['SEASON', 'ID', 'HOME_TEAM_ID'], right_on=['SEASON', 'GAME_ID', 'TEAM_ID']) games = games.merge(data, left_on=['SEASON', 'ID', 'AWAY_TEAM_ID'], right_on=['SEASON', 'GAME_ID', 'TEAM_ID'], suffixes=('', '_AWAY')) games = games[games.HOME_SPREAD_WL != 'P'] return games
def season_stats(self): query = ''' SELECT SEASON, TEAM_ID, AVG(TEAM_MIN) AS TEAM_MIN, AVG(TEAM_FGM) AS TEAM_FGM, AVG(TEAM_FGA) AS TEAM_FGA, AVG(TEAM_FG3M) AS TEAM_FG3M, AVG(TEAM_FG3A) AS TEAM_FG3A, AVG(TEAM_FTM) AS TEAM_FTM, AVG(TEAM_FTA) AS TEAM_FTA, AVG(TEAM_OREB) AS TEAM_OREB, AVG(TEAM_DREB) AS TEAM_DREB, AVG(TEAM_REB) AS TEAM_REB, AVG(TEAM_AST) AS TEAM_AST, AVG(TEAM_TOV) AS TEAM_TOV, AVG(TEAM_STL) AS TEAM_STL, AVG(TEAM_BLK) AS TEAM_BLK, AVG(TEAM_PTS) AS TEAM_PTS, AVG(TEAM_PLUS_MINUS) AS TEAM_PLUS_MINUS, AVG(OPP_MIN) AS OPP_MIN, AVG(OPP_FGM) AS OPP_FGM, AVG(OPP_FGA) AS OPP_FGA, AVG(OPP_FG3M) AS OPP_FG3M, AVG(OPP_FG3A) AS OPP_FG3A, AVG(OPP_FTM) AS OPP_FTM, AVG(OPP_FTA) AS OPP_FTA, AVG(OPP_OREB) AS OPP_OREB, AVG(OPP_DREB) AS OPP_DREB, AVG(OPP_REB) AS OPP_REB, AVG(OPP_AST) AS OPP_AST, AVG(OPP_TOV) AS OPP_TOV, AVG(OPP_STL) AS OPP_STL, AVG(OPP_BLK) AS OPP_BLK, AVG(OPP_PTS) AS OPP_PTS, AVG(OPP_PLUS_MINUS) AS OPP_PLUS_MINUS FROM ({}) GROUP BY SEASON, TEAM_ID '''.format(self.__game_query) data = pd.read_sql(query, self.__conn) data['PACE'] = team_stats.pace(data) data['POSSESSIONS'] = team_stats.possessions(data) data['TEAM_OFF_RTG'] = team_stats.off_rating(data) data['TEAM_DEF_RTG'] = team_stats.def_rating(data) data['TEAM_NET_RTG'] = data['TEAM_OFF_RTG'] - data['TEAM_DEF_RTG'] data['TEAM_EFG'] = stats.eff_fg_pct(data, 'TEAM_') data['TEAM_TOV_PCT'] = stats.tov_pct(data, 'TEAM_') data['TEAM_OREB_PCT'] = team_stats.oreb_pct(data) data['TEAM_DREB_PCT'] = team_stats.dreb_pct(data) data['TEAM_FT_PER_FGA'] = stats.ft_per_fga(data, 'TEAM_') efg = data.TEAM_EFG oreb = data.TEAM_OREB_PCT dreb = data.TEAM_DREB_PCT ftr = data.TEAM_FT_PER_FGA tov = data.TEAM_TOV_PCT data[ 'TEAM_FOUR_FACTORS'] = 0.4 * efg + 0.2 * oreb + 0.15 * ftr - 0.25 * tov data[ 'TEAM_FOUR_FACTORS_REB'] = 0.4 * efg + 0.1 * oreb + 0.1 * dreb + 0.15 * ftr - 0.25 * tov query = ''' SELECT SEASON, TEAM_ID, OPP_ID, COUNT(OPP_ID) AS GAMES_PLAYED FROM ({}) GROUP BY SEASON, TEAM_ID, OPP_ID '''.format(self.__game_query) opponents = pd.read_sql(query, self.__conn) for season in pd.unique(data.SEASON): season_opponents = opponents[opponents.SEASON == season] teams = pd.unique(season_opponents.TEAM_ID) schedule = np.zeros([len(teams), len(teams)]) for team in teams: index = np.array([ x in season_opponents[season_opponents.TEAM_ID == team].OPP_ID.values for x in teams ]) schedule[team == teams, index] = season_opponents[ season_opponents.TEAM_ID == team].GAMES_PLAYED schedule /= sum(season_opponents.GAMES_PLAYED) / len(teams) point_diff = data[data.SEASON == season].TEAM_PLUS_MINUS.values srs = point_diff for i in range(10): srs = point_diff + schedule.dot(srs) data.loc[data.SEASON == season, 'TEAM_SRS'] = srs return data