def advanced_season_totals_to_csv(season):
    next_season = season + 1
    client.players_advanced_season_totals(
        season_end_year=season,
        output_type=OutputType.CSV,
        output_file_path="./" + season + "_" + next_season + "_advanced_player_season_totals.csv"
    )
    def assert_player_advanced_season_totals_csv(self):
        players_advanced_season_totals(
            season_end_year=self.year,
            output_type=OutputType.CSV,
            output_file_path=self.output_file_path,
            include_combined_values=self.include_combined_values,
        )

        with open(self.output_file_path, "r", encoding="utf8") as output_file, \
                open(self.expected_output_file_path, "r", encoding="utf8") as expected_output_file:
            self.assertEqual(
                output_file.readlines(),
                expected_output_file.readlines(),
            )
    def assert_player_advanced_season_totals_json(self):
        players_advanced_season_totals(
            season_end_year=self.year,
            output_type=OutputType.JSON,
            output_file_path=self.output_file_path,
            include_combined_values=self.include_combined_values,
        )

        with open(self.output_file_path, "r", encoding="utf8") as output_file, \
                open(self.expected_output_file_path, "r", encoding="utf8") as expected_output_file:
            self.assertEqual(
                json.load(output_file),
                json.load(expected_output_file),
            )
 def test_last_2018_players_advanced_season_totals_row(self):
     result = players_advanced_season_totals(season_end_year=2018)
     self.assertEqual(
         result[604], {
             "age": 20,
             "assist_percentage": 8.8,
             "block_percentage": 3.0,
             "box_plus_minus": -2.0,
             "defensive_box_plus_minus": -0.3,
             "defensive_rebound_percentage": 20.1,
             "defensive_win_shares": 0.5,
             "free_throw_attempt_rate": 0.418,
             "games_played": 43,
             "is_combined_totals": False,
             "minutes_played": 410,
             "name": "Ivica Zubac",
             "offensive_box_plus_minus": -1.8,
             "offensive_rebound_percentage": 11.8,
             "offensive_win_shares": 0.5,
             "player_efficiency_rating": 15.3,
             "positions": [Position.CENTER],
             "slug": "zubaciv01",
             "steal_percentage": 0.9,
             "team": Team.LOS_ANGELES_LAKERS,
             "three_point_attempt_rate": 0.008,
             "total_rebound_percentage": 16.0,
             "true_shooting_percentage": 0.557,
             "turnover_percentage": 15.3,
             "usage_percentage": 17.6,
             "value_over_replacement_player": 0.0,
             "win_shares": 1.0,
             "win_shares_per_48_minutes": 0.118
         })
示例#5
0
def player_stats():
    print("collecting player season totals")
    client.players_season_totals(
        season_end_year=2020,
        output_type=OutputType.CSV,
        output_file_path=
        "C:\\Users\\NWHAL\\Documents\\nba_project\\2020_player_season_totals.csv"
    )

    print("collecting advanced player stats")
    client.players_advanced_season_totals(
        season_end_year=2020,
        output_type=OutputType.CSV,
        output_file_path=
        "C:\\Users\\NWHAL\\Documents\\nba_project\\2020_advanced_player_season_totals.csv"
    )
def get_rookie_advanced_stats(year, rookie_names):
    season_totals = client.players_advanced_season_totals(season_end_year=year)
    stats_dict = {}
    for total in season_totals:
        if total['name'] in rookie_names and total['games_played'] >= 30:
            stats_dict[total['name']] = total
    return stats_dict
def get_roster_player_list(team):
    client.players_advanced_season_totals(
        season_end_year=2020,
        output_type=OutputType.CSV,
        output_file_path="./csv/advanced_players.csv")

    client.players_season_totals(season_end_year=2020,
                                 output_type=OutputType.CSV,
                                 output_file_path="./csv/season_totals.csv")

    df_advanced_stats = pd.read_csv("./csv/advanced_players.csv")
    df_total_season_stats = pd.read_csv("./csv/season_totals.csv")

    players_advanced_roster_stats = df_advanced_stats.loc[
        df_advanced_stats['team'].str.contains(team.name[1].upper())]
    players_total_roster_stats = df_total_season_stats.loc[
        df_advanced_stats['team'].str.contains(team.name[1].upper())]

    return players_advanced_roster_stats, players_total_roster_stats
示例#8
0
def plot_usage_vs_points():
    df = DataFrame()
    df2 = DataFrame()
    for x in (client.players_advanced_season_totals(season_end_year=2020)):
        df = df.append(DataFrame(x), ignore_index=True)

    for x in (client.players_season_totals(season_end_year=2020)):
        df2 = df2.append(DataFrame(x), ignore_index=True)

    df['avg_points'] = round(df2['points'] / df2['games_played'], 2)
    df.plot(x='usage_percentage', y='avg_points', kind='scatter')
    plt.show()
示例#9
0
 def test_players_advanced_season_totals_json(self):
     expected_output_file_path = os.path.join(
         os.path.dirname(__file__),
         "../output/expected/player_advanced_season_totals_2018.json",
     )
     result = players_advanced_season_totals(season_end_year=2018,
                                             output_type=OutputType.JSON)
     with open(expected_output_file_path, "r") as expected_output:
         self.assertEqual(
             json.loads(result),
             json.load(expected_output),
         )
示例#10
0
def get_season_advanced():
    """
    Scrapes advanced stats for all seasons since 1950
    """
    df = pd.DataFrame()
    for year in range(1950, 2020):
        players = client.players_advanced_season_totals(season_end_year=year)
        for player in players:
            player["year"] = year

            df = df.append(player, ignore_index=True)
    return df
示例#11
0
 def create_player_totals_csv(year_start, year_end, advanced=False):
     for i in range(year_start, year_end + 1):
         if advanced:
             if not os.path.exists("data/adv_total_stats"):
                 os.makedirs("data/adv_total_stats")
             client.players_advanced_season_totals(
                 season_end_year=i,
                 output_type=OutputType.CSV,
                 output_file_path="adv_total_stats/stats_{}.csv".format(i))
         else:
             if not os.path.exists("data/total_stats"):
                 os.makedirs("data/total_stats")
             client.players_season_totals(
                 season_end_year=i,
                 output_type=OutputType.CSV,
                 output_file_path="total_stats/stats_{}.csv".format(i))
         # self.feed_season_stats_to_db(stats, i)
         print(
             "Exported stats for the year {}. {} percent completed".format(
                 i, 100 * ((i - year_start + 1) /
                           (year_end - year_start + 1))))
示例#12
0
def get_df_advanced_player_stats(year):

    """
    Aggregates all relevant ADVANCED stats for players in given year
    :param year: Integer Ex.) 2021
    :return: DataFrame
    """

    adv = client.players_advanced_season_totals(season_end_year=year)
    
    df_advanced = pd.json_normalize(adv)
    df_advanced["position"] = df_advanced["positions"].astype(str).str.split(":").str[1]
    df_advanced["position"] = df_advanced["position"].map(lambda x: x.lstrip(" '").rstrip(">]'"))

    cols_groupby = ["slug", "name", "age", "position"]
    cols_adv = ['player_efficiency_rating',
       'true_shooting_percentage', 'three_point_attempt_rate',
       'free_throw_attempt_rate', 'offensive_rebound_percentage',
       'defensive_rebound_percentage', 'total_rebound_percentage',
       'assist_percentage', 'steal_percentage', 'block_percentage',
       'turnover_percentage', 'usage_percentage', 'offensive_win_shares',
       'defensive_win_shares', 'win_shares', 'win_shares_per_48_minutes',
       'offensive_box_plus_minus', 'defensive_box_plus_minus',
       'box_plus_minus', 'value_over_replacement_player']

    cols_adv_contrib = [col + "_contrib" for col in cols_adv]

    df_total_minutes = df_advanced.groupby(cols_groupby)["minutes_played"].sum().reset_index()
    df_total_minutes = df_total_minutes.rename(columns = {"minutes_played": "minutues_played_total"})

    df_stats = pd.merge(df_advanced, df_total_minutes, on=cols_groupby)
    df_stats["weight"] = df_stats["minutes_played"] / df_stats["minutues_played_total"]

    print(df_stats.columns)

    # Get contribs
    for col in cols_adv:
        df_stats[col+"_contrib"] = df_stats[col] * df_stats["weight"]

    # Aggregate contributions to get final df
    df_stats_agg = df_stats.groupby(cols_groupby)[cols_adv_contrib].sum().reset_index()

    # Remove "contrib" column name from all contrib columns
    cols_df_stats_agg = [col.replace("_contrib", "") for col in df_stats_agg.columns]
    df_stats_agg.columns = cols_df_stats_agg

    df_stats.to_pickle(c.PICKLE_PATH_ADV_STATS)

    return df_stats_agg
def clean_advanced(year):
    advanced_stats = client.players_advanced_season_totals(
        season_end_year=year)
    df = pd.DataFrame(advanced_stats)
    # Handle quirk in data where traded players are             represented as multiple observations
    df['minutes_played_total'] = df.groupby('name').minutes_played.transform(
        'sum')
    df['proportion'] = df['minutes_played'] / df['minutes_played_total']
    num_cols = list(df.select_dtypes(include=['int', 'float64']))
    unwanted_num_cols = ['age', 'minutes_played', 'games_played']
    for col in unwanted_num_cols:
        num_cols.remove(col)
    for col in num_cols:
        df[col] = df[col] * df['proportion']
    df_grouped = df.groupby('name')[num_cols].agg('sum')
    df_grouped['age'] = df.groupby('name')['age'].agg('mean')
    df_grouped['year'] = year
    return df_grouped
 def test_first_2018_players_advanced_season_totals_row(self):
     result = players_advanced_season_totals(season_end_year=2018)
     self.assertEqual(
         result[0],
         {
             "age": 24,
             "assist_percentage": 3.4,
             "block_percentage": 0.6,
             "box_plus_minus": -1.5,
             "defensive_box_plus_minus": 0.4,
             "defensive_rebound_percentage": 8.9,
             "defensive_win_shares": 1.0,
             "free_throw_attempt_rate": 0.158,
             "games_played": 75,
             "is_combined_totals": False,
             "minutes_played": 1134,
             "name": "\u00c1lex Abrines",
             "offensive_box_plus_minus": -1.9,
             "offensive_rebound_percentage": 2.5,
             "offensive_win_shares": 1.3,
             "player_efficiency_rating": 9.0,
             "positions": [
                 Position.SHOOTING_GUARD,
             ],
             "slug": "abrinal01",
             "steal_percentage": 1.7,
             "team": Team.OKLAHOMA_CITY_THUNDER,
             "three_point_attempt_rate": 0.759,
             "total_rebound_percentage": 5.6,
             "true_shooting_percentage": 0.567,
             "turnover_percentage": 7.4,
             "usage_percentage": 12.7,
             "value_over_replacement_player": 0.1,
             "win_shares": 2.2,
             "win_shares_per_48_minutes": 0.094
         },
     )
 def test_players_advanced_season_totals_csv_append(self):
     players_advanced_season_totals(
         season_end_year=2018,
         output_type=OutputType.CSV,
         output_file_path="./player_advanced_season_totals_2019.csv",
         output_write_option=OutputWriteOption.APPEND)
 def test_players_advanced_season_totals_csv(self):
     players_advanced_season_totals(
         season_end_year=2018,
         output_type=OutputType.CSV,
         output_file_path="./player_advanced_season_totals_2019.csv")
 def test_players_advanced_season_totals_json(self):
     result = players_advanced_season_totals(season_end_year=2018,
                                             output_type=OutputType.JSON)
     self.assertIsNotNone(result)
 def test_players_advanced_season_totals(self):
     result = players_advanced_season_totals(season_end_year=2018)
     self.assertIsNotNone(result)
# -*- coding: utf-8 -*-
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import OutputType
import pandas as pd

seasons_df = pd.read_excel(io='years_nba.xlsx')
seasons_list = [row['seasons'] for index, row in seasons_df.iterrows()]

#slice seasons_list
seasons = [nba_season[0:4] for nba_season in seasons_list]

for nba_season in seasons:
    file_path_thistime = "./" + nba_season + "_advanced_stats.csv"
    client.players_advanced_season_totals(season_end_year=nba_season,
                                          output_type=OutputType.CSV,
                                          output_file_path=file_path_thistime)
def main():
    print("\n#######################################################################")
    print("#######################################################################")
    print("######################### NBA Report exporter #########################")
    print("#######################################################################")
    print("#######################################################################\n")

    while (True):

        print(
            "1. Players box scores by a date\
           \n2. Players season statistics for a season\
           \n3. Players advanced season statistics for a season\
           \n4. All Team box scores by a date\
           \n5. Schedule for a season\
           \n6. Exit"
        )
        reportObject = input("\nPlease select a option: ")

        # Players box scores by a date
        if (reportObject == "1"):
            inputDate = input("\nEnter a date (use this format 1-1-2018): ")
            fileName = "all-player-box-report-" + inputDate + ".csv"
            dateList = inputDate.split("-")
            print("Exporting report please wait..........")
            # Call Export function
            client.player_box_scores(
                day=dateList[0],
                month=dateList[1],
                year=dateList[2],
                output_type=OutputType.CSV,
                output_file_path="exported_files/" + fileName
            )
            print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n")

        # Players season statistics for a season
        elif (reportObject == "2"):
            endYear = input("\nEnter season end year: ")
            fileName = "all-player-season-report-" + endYear + ".csv"
            print("Exporting report please wait..........")
            # Call Export function
            client.players_season_totals(
                season_end_year=endYear,
                output_type=OutputType.CSV,
                output_file_path="exported_files/" + fileName
            )
            print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n")


        # Players advanced season statistics for a season
        elif (reportObject == "3"):
            endYear = input("\nEnter season end year: ")
            fileName = "all-player-advanced-season-report-" + endYear + ".csv"
            print("Exporting report please wait..........")
            # Call Export function
            client.players_advanced_season_totals(
                season_end_year=endYear,
                output_type=OutputType.CSV,
                output_file_path="exported_files/" + fileName
            )
            print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n")

        # All Team box scores by a date
        elif (reportObject == "4"):
            inputDate = input("\nEnter a date (use this format 1-1-2018): ")
            fileName = "all-team-report-" + inputDate + ".csv"
            dateList = inputDate.split("-")
            print("Exporting report please wait..........")
            # Call Export function
            client.team_box_scores(
                day=dateList[0],
                month=dateList[1],
                year=dateList[2],
                output_type=OutputType.CSV,
                output_file_path="exported_files/" + fileName
            )
            print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n")

        # Schedule for a season
        elif (reportObject == "5"):
            endYear = input("\nEnter season end year: ")
            fileName = "season-schedule-" + endYear + ".csv"
            print("Exporting report please wait..........")
            # Call Export function
            client.season_schedule(
                season_end_year=endYear,
                output_type=OutputType.CSV,
                output_file_path="exported_files/" + fileName
            )
            print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n")

        # Exit
        elif (reportObject == "6"):
            print("\n#######################################################################")
            print("################################# Bye #################################")
            print("#######################################################################\n")
            break

        # Error
        else:
            print("Invalid option!!\n\n")
示例#21
0
 def test_2019_player_advanced_season_totals(self):
     player_season_totals = client.players_advanced_season_totals(season_end_year=2019)
     self.assertIsNotNone(player_season_totals)
     self.assertTrue(len(player_season_totals) > 0)
def get_advanced_season_totals(year):
    return client.players_advanced_season_totals(season_end_year=year)
示例#23
0
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import OutputType, Team
print(client.season_schedule(season_end_year=2021))
client.players_advanced_season_totals(
    season_end_year=2018, 
    output_type=OutputType.CSV, 
    output_file_path="./2017_2018_player_season_totals.csv"
)

client.play_by_play(
    home_team=Team.BOSTON_CELTICS, 
    year=2018, month=10, day=16, 
    output_type=OutputType.CSV, 
    output_file_path="./2018_10_06_BOS_PBP.csv"
)

client.regular_season_player_box_scores(
    player_identifier="westbru01", 
    season_end_year=2018, 
    output_type=OutputType.CSV, 
    output_file_path="./2017_2018_russell_westbrook_regular_season_box_scores.csv"
)
示例#24
0
def create_base_df(season_year):
    from basketball_reference_web_scraper import client
    import pandas as pd
    import unicodedata

    advanced_stats = client.players_advanced_season_totals(
        season_end_year=2020)
    season_stats = client.players_season_totals(season_end_year=2020)
    total_df = pd.DataFrame(season_stats)

    total_df['positions'], total_df['team'] = total_df['positions'].astype(
        str), total_df['team'].astype(str)

    positions = {
        "[<Position.CENTER: 'CENTER'>]": "C",
        "[<Position.SHOOTING_GUARD: 'SHOOTING GUARD'>]": "SG",
        "[<Position.POWER_FORWARD: 'POWER FORWARD'>]": "PF",
        "[<Position.SMALL_FORWARD: 'SMALL FORWARD'>]": "SF",
        "[<Position.POINT_GUARD: 'POINT GUARD'>]": "PG",
        "[<Position.GUARD: 'GUARD'>]": "SG"
    }
    team_sub = {"Team.": "", "_": ' '}

    total_df = total_df.replace(positions).replace(team_sub, regex=True)

    total_df = total_df.assign(
        field_goal_percentage=(total_df['made_field_goals'] * 100 /
                               total_df['attempted_field_goals']).round(1),
        three_point_field_goal_percentage=(
            total_df['made_three_point_field_goals'] * 100 /
            total_df['attempted_three_point_field_goals']).round(1),
        free_throw_percentage=(total_df['made_free_throws'] * 100 /
                               total_df['attempted_free_throws']).round(1),
        rebounds=total_df['offensive_rebounds'] +
        total_df['defensive_rebounds']).fillna(0)

    total_df['no_accents'] = total_df['name'].apply(
        lambda x: unicodedata.normalize('NFD', x).encode(
            'ascii', 'ignore').decode('UTF-8').replace(".", ""))
    total_df.no_accents[total_df.no_accents ==
                        'Taurean Waller-Prince'] = 'Taurean Prince'

    total_df = total_df.groupby(['name', 'slug', 'no_accents'],
                                as_index=False).agg({
                                    'field_goal_percentage':
                                    'mean',
                                    'free_throw_percentage':
                                    'mean',
                                    'made_three_point_field_goals':
                                    'sum',
                                    'made_field_goals':
                                    'sum',
                                    'made_free_throws':
                                    'sum',
                                    'games_played':
                                    'sum',
                                    'attempted_field_goals':
                                    'sum',
                                    'attempted_free_throws':
                                    'sum',
                                    'rebounds':
                                    'sum',
                                    'assists':
                                    'sum',
                                    'blocks':
                                    'sum',
                                    'steals':
                                    'sum',
                                    'turnovers':
                                    'sum',
                                    'team':
                                    'last'
                                }).drop_duplicates()

    salaries = pd.read_csv("nba_beta_salary.csv", sep=",", engine='python')

    total_df_with_salaries = total_df.join(salaries[['slug', '2019-20'
                                                     ]].set_index('slug'),
                                           on='slug').dropna()

    total_df_with_salaries = total_df_with_salaries.drop('slug', axis=1)


    total_df_with_salaries['ppg'] = (2 * (
                total_df_with_salaries['made_field_goals'] - total_df_with_salaries['made_three_point_field_goals']) + \
                                     3 * (total_df_with_salaries['made_three_point_field_goals']) +
                                     total_df_with_salaries['made_free_throws']) / \
                                    total_df_with_salaries['games_played']

    return total_df_with_salaries
示例#25
0
 def get_advanced_totals(self, season_year):
     advanced_totals = client.players_advanced_season_totals(season_year)
     return advanced_totals
 def test_2018_players_advanced_season_totals_length(self):
     result = players_advanced_season_totals(season_end_year=2018)
     self.assertEqual(len(result), 605)
 def test_2001_players_advanced_season_totals_csv(self):
     players_advanced_season_totals(
         season_end_year=2001,
         output_type=OutputType.CSV,
         output_file_path="./player_advanced_season_totals_2001.csv",
         output_write_option=OutputWriteOption.WRITE)
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import OutputType
import pandas as pd

# Get 2019-2020 advanced season statistics for all players
filename = "data/player_advanced_stats_2020.csv"
client.players_advanced_season_totals(season_end_year=2020,
    output_type=OutputType.CSV, output_file_path="data/player_advanced_stats_2020.csv")
client.players_season_totals(season_end_year=2020,
    output_type=OutputType.CSV, output_file_path="data/player_stats_2020.csv")

with open('data/player_advanced_stats_2020.csv') as advanced:
    with open('data/player_stats_2020.csv') as regular:
        newdata = open('data/player_combined_data_2020.csv', 'w+')
        i = 0
        reglines = regular.readlines()
        for line in advanced.readlines():
                newdata.write(line.replace('\n','') + reglines[i])
                i += 1
示例#29
0
# season_end_year can only be 2000 - 2020
# Visit https://github.com/jaebradley/basketball_reference_web_scraper for more info

import json

from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import OutputType

season_end_year = 2020

total_season_stats = client.players_season_totals(season_end_year)
advanced_stats = client.players_advanced_season_totals(season_end_year)

for a, b in zip(total_season_stats, advanced_stats):
    a["advanced_stats"] = b
    a["positions"] = a["positions"][0].value
    a["team"] = a["team"].value
    del a["advanced_stats"]['positions']
    del a["advanced_stats"]['team']

season_stats = json.dumps(total_season_stats)

with open('./{year}_season_totals.json'.format(year=season_end_year),
          'w') as outfile:
    json.dump(total_season_stats, outfile)