def get_nba_team_stats(start_year, type='regular'): # Getting Mavs team ID for example nba_teams = teams.get_teams() mavs_id = [ team for team in nba_teams if team['full_name'] == 'Dallas Mavericks' ][0] # Get Mavs reg season yby DF to create empty DF with appropriate columns reg_season_yby_mavs = teamyearbyyearstats.TeamYearByYearStats( team_id=mavs_id['id']).get_data_frames()[0] reg_season_yby = pd.DataFrame(columns=reg_season_yby_mavs.columns) # Get Reg Season YBY stats for all teams, beginning in 2011-12 for team in nba_teams: team_yby_stats = teamyearbyyearstats.TeamYearByYearStats( team_id=team['id']).get_data_frames()[0] team_yby_stats['START_YEAR'] = team_yby_stats['YEAR'] start_year_index = team_yby_stats[team_yby_stats['YEAR'].str.slice( stop=4) == str(start_year)].index.values[0] team_yby_stats = team_yby_stats[(team_yby_stats.index > start_year_index)] reg_season_yby = reg_season_yby.append(team_yby_stats, sort=True) print('Number of unique team IDs: ', reg_season_yby['TEAM_ID'].nunique()) reg_season_yby = reg_season_yby.set_index(['TEAM_ID', 'YEAR']) return reg_season_yby
def fusion(l): """ Crée une base comprenant toutes les statistiques pour chaque saisons pour toutes les équipes """ buf = teamyearbyyearstats.TeamYearByYearStats(team_id=l[0]) teams_stats = buf.get_data_frames()[0] for i in l[1::]: buf = teamyearbyyearstats.TeamYearByYearStats(team_id=i) df2 = buf.get_data_frames()[0] teams_stats = pd.concat([teams_stats, df2], ignore_index=True) return teams_stats
def get_year_by_year_data(outfile): all_teams = teams.get_teams() # Contains list of (id, name) tuples team_data = [(t['id'], t['full_name']) for t in all_teams] # Normalize stats by dividing by number of games params = {"per_mode_simple": "PerGame"} all_data = [] for team in team_data: team_id, team_name = team print('fetching data for team ', team_name) team_stats_dataset = team_stats_endpoint.TeamYearByYearStats( team_id=team_id, **params) team_stats = team_stats_dataset.team_stats.get_data_frame() all_data.append(team_stats) # So we don't get throttled time.sleep(1) # Combine everyting into one bigass DF and compress it out 'outfile' full_data_frame = pd.concat(all_data, ignore_index=True) full_data_frame.to_feather(outfile)
def scrapeTeamStats(): """ Scrapes team stats year by year for every team in the NBA :return: None """ teams = getAllNbaTeams() # iterate over all the teams for team in teams: team_id = team['id'] team_abbrev = team['abbreviation'] time.sleep(5) current_team_stats = teamyearbyyearstats.TeamYearByYearStats( per_mode_simple="PerGame", season_type_all_star="Regular Season", team_id=team_id).get_data_frames()[0] filename = '{}datasets/team_stats/{}_Stats_By_Year.csv'.format( filepath, team_abbrev) recent_team_stats = current_team_stats.tail( 5) # get the 5 most recent years of stats # current_team_stats.to_csv(filename, index=None, header=True) recent_team_stats.to_csv(filename, index=None, header=True) print("Finished scraping team stats for {}".format(team_abbrev))
def __init__(self, name): self.search_name = name self.team = teams.find_teams_by_full_name(self.search_name)[0] self.teamid = self.team['id'] self.teamname = self.team['nickname'] self.teamstats = teamyearbyyearstats.TeamYearByYearStats( self.teamid).get_data_frames()[0]
def get_team_active_seasons(selected_team): # career df career_df = teamyearbyyearstats.TeamYearByYearStats(team_id=selected_team).get_data_frames()[0] #List of active seasons season_lst = [{'label': season, 'value': season} for season in career_df['YEAR']] return season_lst
def team_year_by_year(tid, per_mode): team = teamyearbyyearstats.TeamYearByYearStats(per_mode_simple=per_mode, team_id=tid) team_stats = team.get_dict() headers = [ 'Team', 'Year', 'GP', 'Wins', 'Losses', 'Win %', 'Conf Rank', 'Div Rank', 'PO Wins', 'PO Losses', 'Conf Count', 'Div Count', 'Finals Appearance', 'FGM', 'FGA', 'FG PCT', 'FG3M', 'FG3A', 'FG3 PCT', 'FTM', 'FTA', 'FT PCT', 'OREB', 'DREB', 'REB', 'AST', 'PF', 'STL', 'TOV', 'BLK', 'PTS', 'PTS Rank' ] stats_d = team_stats['resultSets'][0]['rowSet'] stats = [] for stat in stats_d: team_name = stat[1] + ' ' + stat[2] s = stat[3:] s.insert(0, team_name) stats.append(s) return headers, stats
team_info = {'data': []} for team in all_teams: new_team = {} new_team['id'] = team['id'] new_team['abbrev'] = team['abbreviation'] new_team['city'] = team['city'] team_info['data'].append(new_team) team_info['data'][0]['id'] len(team_info['data']) team_resp = {'data': []} i = 0 for i in range(len(team_info['data'])): obj = teamyearbyyearstats.TeamYearByYearStats( team_id=team_info['data'][i]['id']) new_resp = obj.get_normalized_dict() df = pd.DataFrame.from_dict(new_resp['TeamStats'], orient='columns') df_1979 = df['YEAR'] >= "1979-80" df = df[df_1979] team_resp['data'].append(df) df1979 = pd.read_csv('./data/season1979.csv') df1980 = pd.read_csv('./data/season1980.csv') df1981 = pd.read_csv('./data/season1981.csv') df1979_fg3 = df1979['3PA'].sum() df1980_fg3 = df1980['3PA'].sum() df1981_fg3 = df1981['3PA'].sum() df1979_fga = df1979['FGA'].sum()
career = playercareerstats.PlayerCareerStats(player_id='203076') # Filter out relevent columns games = games[["GAME_ID", "TEAM_ID", "MATCHUP", "GAME_DATE", "WL", "YEAR"]] # Find team ids team_ids = list(g_df["TEAM_ID"].unique()) # Get team stat for all years team_stats = pd.DataFrame() for t in team_ids: print("Getting team stats for {}... ".format(t), end='', flush=True) stats = teamyearbyyearstats.TeamYearByYearStats( league_id="00", per_mode_simple="Totals", season_type_all_star="Regular Season", team_id=t, ) print("Done!") stats_df = stats.team_stats.get_data_frame() team_stats = team_stats.append(stats_df) # Merge the stats into vertical pd of games games_vertical_stats = pd.merge(left=games, right=team_stats, how='inner', on=["TEAM_ID","YEAR"]) # Get the first and last match data dup1 = games_vertical_stats.drop_duplicates('GAME_ID', 'first') dup2 = games_vertical_stats.drop_duplicates('GAME_ID', 'last') # Drop columns that describe the same data
def download_current_team_stats(): """ Downloads up-to-date team data for the current season, cleans it, and saves it in a json file "data/team-stats.json" """ team_df = pd.DataFrame(teams.get_teams()) team_id_pairing = team_df[['abbreviation', 'id']] team_id_pairing.columns = ['Tm', 'TEAM_ID'] stat_columns = [ "GP", "WINS", "LOSSES", "WIN_PCT", "CONF_RANK", "DIV_RANK", "PO_WINS", "PO_LOSSES", "CONF_COUNT", "DIV_COUNT", "NBA_FINALS_APPEARANCE", "FGM", "FGA", "FG_PCT", "FG3M", "FG3A", "FG3_PCT", "FTM", "FTA", "FT_PCT", "OREB", "DREB", "REB", "AST", "PF", "STL", "TOV", "BLK", "PTS", "PTS_RANK", ] team_df = pd.DataFrame() for t in teams.get_teams(): t_id = t["id"] # print(t_id, end = ', ') time.sleep(1) stat_json = json.loads( teamyearbyyearstats.TeamYearByYearStats(int(t_id)).get_json()) stat_rows = stat_json["resultSets"][0]["rowSet"] stat_headers = stat_json["resultSets"][0]["headers"] team_df = team_df.append(pd.DataFrame(stat_rows)) team_df.columns = stat_headers team_stats_id_merged = pd.merge(team_df, team_id_pairing) team_stats = team_stats_id_merged.drop( columns=["TEAM_ID", "TEAM_CITY", "TEAM_NAME"]) team_stats = team_stats[["Tm", "YEAR"] + stat_columns] fixed_years = team_stats[["YEAR"]].apply( lambda x: int(x.to_string().split(" ")[-1].split("-")[0]) + 1, axis=1) team_stats[["YEAR"]] = fixed_years team_stats.to_csv("data/nba-api-team-stats.csv") team_stats = pd.read_csv("data/nba-api-team-stats.csv") team_stats = team_stats.drop(columns=["Unnamed: 0"]) team_stats = team_stats[team_stats.YEAR >= earliest_season] team_stats.columns = ["Tm", "Year"] + stat_columns team_stats.to_json("data/team-stats.json")
def __init__(self, id, per_mode): self.dict = teams.find_team_name_by_id(id) self.name = self.dict['nickname'] self.teamstats = teamyearbyyearstats.TeamYearByYearStats( id, per_mode_simple=per_mode).get_data_frames()[0]
from nba_api.stats.endpoints import teamgamelog from nba_api.stats import endpoints from NBA_Player.shot_charts import ShotCharts from NBA_Player.statistics import Statistics from NBA_Player.box_scores import BoxScores # To check what these sets of data offer offer (which variables I can use) teams_dict = teams.get_teams() team_details = [team for team in teams_dict] data = endpoints.leagueleaders.LeagueLeaders(stat_category_abbreviation='FGA') leaders = data.league_leaders.get_data_frame() lakers = [ team for team in teams_dict if team["full_name"] == "Los Angeles Lakers" ][0] player_dict = players.get_players() lakersStats = teamyearbyyearstats.TeamYearByYearStats( team_id=lakers['id']).get_data_frames()[0] lakersRegularSznGameLog = teamgamelog.TeamGameLog( team_id=lakers['id'], season="2019-20", season_type_all_star="Regular Season").get_data_frames()[0] # Linear regression model class can take in two viable variables and creates # a linear regression graph plotting them against each other # model1 = RegressionModel('PTS', 'AST', 'AST', [1, 2, 5, 8, 12]) # model1.draw() # Creating an NBA Player using class lebron = NBAPlayer(fullname='LeBron James', playerTeam="Los Angeles Lakers", opponentTeam="Golden State Warriors")
# checking favourite team team_dict = teams.get_teams() rockets = [team for team in team_dict if team['nickname'] == 'Rockets'] print(rockets) # obtaining the three pointer stats team_id_arr = [ '1610612737', '1610612738', '1610612739', '1610612740', '1610612741', '1610612742', '1610612743', '1610612744', '1610612745', '1610612746', '1610612747', '1610612748', '1610612749', '1610612750', '1610612751', '1610612752', '1610612753', '1610612754', '1610612755', '1610612756', '1610612757', '1610612758', '1610612759', '1610612760', '1610612761', '1610612762', '1610612763', '1610612764', '1610612765', '1610612766' ] singleteam_threept_stats = teamyearbyyearstats.TeamYearByYearStats( league_id='00', team_id='1610612765') #team_threept_stats = [] #for i in range(len(team_id_arr)): # team_threept_stats.append(teamyearbyyearstats.TeamYearByYearStats(league_id='00', team_id=team_id_arr[i])) #print(team_threept_stats) team_threept_stats_df = singleteam_threept_stats.get_data_frames()[0] percent_of_shots_from_three = team_threept_stats_df[ 'FG3A'] / team_threept_stats_df['FGA'] team_threept_stats_df.insert(34, "FG3A/FGA", percent_of_shots_from_three) all_team_three_stats = team_threept_stats_df[['FG3A/FGA', 'FG3A', 'FGA']].copy()