def test_get_box_scores_from_2001(self): player_box_scores(day=1, month=1, year=2001, output_type=OutputType.CSV, output_file_path="./foo.csv", output_write_option=OutputWriteOption.WRITE)
def test_output_csv_box_scores_to_file(self): client.player_box_scores(day=1, month=1, year=2001, output_type=OutputType.CSV, output_file_path="./foo.csv", output_write_option=OutputWriteOption.WRITE)
def player_box(): print("retrieving player box scores") try: # hard code hell yeh for month in range(10, 13): # print("searching in month {} of 2019".format(month)) for day in range(1, 32): # print("searching on day {}".format(day)) client.player_box_scores( day=day, month=month, year=2019, output_type=OutputType.CSV, output_file_path= "C:\\Users\\NWHAL\\Documents\\nba_project\\player_scores_2019_{}_{}.csv" .format(month, day)) print(msg) except InvalidDate: pass try: for month in range(1, m + 1): # print("searching in month {} of 2020".format(month)) for day in range(1, 32): # print("searching on day {}".format(day)) client.player_box_scores( day=d, month=m, year=2020, output_type=OutputType.CSV, output_file_path= "C:\\Users\\NWHAL\\Documents\\nba_project\\player_scores_2020_{}_{}.csv" .format(month, day)) print(msg) except InvalidDate: pass
def test_get_box_scores_from_2001(self): output_file_path = os.path.join( os.path.dirname(__file__), "./output/2003_10_29_TOR_pbp.csv", ) player_box_scores( day=1, month=1, year=2001, output_type=OutputType.CSV, output_file_path=output_file_path, output_write_option=OutputWriteOption.WRITE)
def test_2001_01_01_player_box_scores(self): player_box_scores( day=1, month=1, year=2001, output_type=OutputType.CSV, output_file_path=self.output_file_path, output_write_option=OutputWriteOption.WRITE, ) with open(self.output_file_path, "r", encoding="utf8") as output_file, \ open(self.expected_output_file_path, "r", encoding="utf8") as expected_output_file: self.assertEqual(output_file.readlines(), expected_output_file.readlines())
def complete_historical_data(self, save=True): # Method to seed feature store box_scores = None last_date = date.today() - timedelta(days=1) while box_scores is None: d = int(last_date.strftime('%d')) m = int(last_date.strftime('%m')) y = int(last_date.strftime('%Y')) try: box_scores = calc_points( pd.DataFrame( client.player_box_scores(day=d, month=m, year=y))) box_scores['date'] = last_date except: print('No games played on ' + str(last_date)) last_date = last_date - timedelta(days=1) # Delete after running correctly (or add some progress bar thing) y_check = y while y > 1950: last_date = last_date - timedelta(days=1) d = int(last_date.strftime('%d')) m = int(last_date.strftime('%m')) y = int(last_date.strftime('%Y')) box_scores = box_scores.append( calc_points( pd.DataFrame( client.player_box_scores(day=d, month=m, year=y)))) box_scores['date'] = last_date if y_check != y: print(str(y_check) + ' complete.') if save: box_scores.to_csv('nba_ref/box_scores_all_of_them.csv') y_check = y if save: box_scores.to_csv('nba_ref/box_scores_all_of_them.csv') return box_scores
def getLastTwoWeeksAveragePoints(playerObj): playerName = playerObj.name dateIndex = 1 #counter to keep track of how many days back we are going totalGamesPlayed = 0 totalFantasyPoints = 0 while dateIndex <= 14: dateToPull = (datetime.datetime.now() - datetime.timedelta(dateIndex)) listOfGames = client.player_box_scores(day=dateToPull.day, month=dateToPull.month, year=dateToPull.year) currentBoxScore = getBoxScoreForPlayerFromLists( playerName, listOfGames) if currentBoxScore != None: totalGamesPlayed += 1 singleGameScore = FantasyScoreFromSingleGame(currentBoxScore) totalFantasyPoints += singleGameScore # print currentBoxScore dateIndex += 1 if totalGamesPlayed == 0: print "{} played 0 games!".format(playerName) averagePoints = 0 else: averagePoints = (totalFantasyPoints / totalGamesPlayed) # print "{} played {} games and averaged {} points in last two weeks".format(playerName,totalGamesPlayed,averagePoints) return averagePoints
def get_player_box_score(name, logger, date_obj=None, timeout=3): """ Gets the box score for the desired player. :param str name: Name of the player to search for. :param logger: Logging object. :param datetime.datetime date_obj: Datetime object for starting day to search. :param int timeout: Number of days to search before giving up. :return: Box score for the player if found. :rtype: dict """ name = name.lower() if date_obj is None: date_obj = datetime.datetime.today() bs = None while True: if timeout > 0: logger.info('Attempting date: %s' % date_obj.strftime('%y-%m-%d')) found = False box_scores = client.player_box_scores(day=date_obj.day, month=date_obj.month, year=date_obj.year) for box_score in box_scores: if name in box_score['name'].lower(): bs = box_score found = True break if found: break date_obj -= datetime.timedelta(days=1) timeout -= 1 else: logger.info("Timeout reached.") break return bs, date_obj
def pull_boxscores(day): boxscores = client.player_box_scores(day=day.day, month=day.month, year=day.year) for item in boxscores: item.update({"date": datetime.strftime(day.date(), format='%Y-%m-%d')}) item.update({"season_year": '2019-2020'}) item.update({"season_type": 'regular'}) boxscores_df = pd.DataFrame(boxscores) boxscores_df[ 'rebounds'] = boxscores_df.offensive_rebounds + boxscores_df.defensive_rebounds boxscores_df.rename(columns={ 'attempted_field_goals': 'fga', 'attempted_free_throws': 'fta', 'made_three_point_field_goals': 'threes', 'made_field_goals': 'fgm', 'made_free_throws': 'ftm' }, inplace=True) boxscores_df['twos'] = boxscores_df.fgm - boxscores_df.threes boxscores_df['points'] = (boxscores_df.threes * 3) + ( boxscores_df.twos * 2) + (boxscores_df.ftm * 1) boxscores_df.drop(columns=[ 'attempted_three_point_field_goals', 'defensive_rebounds', 'offensive_rebounds', 'game_score', 'slug', 'turnovers', 'outcome', 'twos', 'personal_fouls', 'location' ], inplace=True) boxscores_df['opponent'] = boxscores_df.opponent.apply(lambda x: x.name) boxscores_df['team'] = boxscores_df.team.apply(lambda x: x.name) boxscores_df.to_sql('boxscores', con=engine, if_exists='append', index=False) print(boxscores_df.shape)
def complete_historical_data(self): last_gameday = self.last_gameday() box_scores = last_gameday[0] y = last_gameday[1] m = last_gameday[2] d = last_gameday[3] # Delete after running correctly (or add some progress bar thing) y_check = y while y > 1945: last_date = date(y, m, d) - timedelta(days=1) d = int(last_date.strftime('%d')) m = int(last_date.strftime('%m')) y = int(last_date.strftime('%Y')) new = add_calc( pd.DataFrame(client.player_box_scores(day=d, month=m, year=y))) new['date'] = last_date box_scores = box_scores.append(new) if y_check != y: print(str(y_check) + ' complete.') box_scores.to_csv('basketball/box_scores_all_of_them.csv') y_check = y box_scores.to_csv('basketball/box_scores_all_of_them.csv') return box_scores
def get_daily_box_scores(date_obj=None, timeout=1): """ Gets all player box scores for a specific day. The default for this is only the one date specified. :param datetime.datetime date_obj: Datetime object for starting day to search. :param int timeout: Number of days to search before giving up. :return: All box scores sorted by team. :rtype: OrderedDict """ team_dict = OrderedDict() if date_obj is None: date_obj = datetime.datetime.today() while True: if timeout > 0: teams = get_teams_played_on_date(date_obj=date_obj) if len(teams) > 0: all_box_scores = client.player_box_scores(day=date_obj.day, month=date_obj.month, year=date_obj.year) for team in teams: team_dict[team] = [] for player in all_box_scores: team_dict[player['team'].name].append(player) break date_obj -= datetime.timedelta(days=1) timeout -= 1 else: break return team_dict, date_obj
def __init__(self, date): day = date[0] month = date[1] year = date[2] self.game_scores = client.player_box_scores(day=day, month=month, year=year)
def getBoxScoreForPlayer(playerObj): playerName = playerObj.name dateToPull=(datetime.datetime.now() - datetime.timedelta(1)) yesterdayGames = client.player_box_scores(day=dateToPull.day, month=dateToPull.month, year=dateToPull.year) for boxscore in yesterdayGames: if boxscore['name'] in playerName or playerName in boxscore['name']: return boxscore return None
def getDataLocal(val, d=curd, m=curm, y=cury): global finstr if (val == 0): fstr = str(d) + "_" + str(m) + "_" + str( y) + "_" + "player_box_scores.json" finstr = finstr + fstr client.player_box_scores( day=d, month=m, year=y, output_type=OutputType.JSON, output_file_path= "/Users/pranavkrishnakumar/repos/BeardBot/Data/player_box_scores/" + fstr) elif (val == 1): fstr = str(d) + "_" + str(m) + "_" + str( y) + "_" + "team_box_scores.json" finstr = finstr + fstr client.team_box_scores( day=d, month=m, year=y, output_type=OutputType.JSON, output_file_path= "/Users/pranavkrishnakumar/repos/BeardBot/Data/team_box_scores/" + fstr) elif (val == 2): fstr = str(y) + "_" + "season_schedule" + ".json" finstr = finstr + fstr client.season_schedule( season_end_year=y, output_type=OutputType.JSON, output_file_path= "/Users/pranavkrishnakumar/repos/BeardBot/Data/season_schedule/" + fstr) elif (val == 3): fstr = str(y) + "_" + "season_standings" + ".json" finstr = finstr + fstr client.standings( season_end_year=y, output_type=OutputType.JSON, output_file_path= "/Users/pranavkrishnakumar/repos/BeardBot/Data/season_standings/" + fstr) return val
def test_output_json_box_scores_to_memory(self): january_first_box_scores = client.player_box_scores( day=1, month=1, year=2001, output_type=OutputType.JSON, ) self.assertIsNotNone(january_first_box_scores)
def BackToBack(playerObj): playerName = playerObj.name dateToPull = (datetime.datetime.now() - datetime.timedelta(1)) yesterdayGames = client.player_box_scores(day=dateToPull.day, month=dateToPull.month, year=dateToPull.year) for boxscore in yesterdayGames: if boxscore['name'] in playerName or playerName in boxscore['name']: return True return False
def getData(val, d=curd, m=curm, y=cury): global finstr if (val == 0): return client.player_box_scores(day=d, month=m, year=y) elif (val == 1): return client.team_box_scores(day=d, month=m, year=y) elif (val == 2): return client.season_schedule(season_end_year=y) elif (val == 3): return client.standings(season_end_year=y)
def update(connection, gameday=(date.today()-timedelta(1))): #convert to string and split to individual date elements gameday = str(gameday) year, month, day = gameday.split('-', 2) #grab all scores on given date scores = client.player_box_scores(day, month, year) #input each score to DB for score in scores: parse_and_input(score, connection, gameday)
def get_box_score(self): est = tz('US/Eastern') self.date = self.date.astimezone(est) player_scores = client.player_box_scores(self.date.day, self.date.month, self.date.year) self.box_score = list( filter(lambda person: person['name'] == self.name, player_scores)) #make sure box_score is not empty if self.box_score: return True else: return False
def test_2001_01_01_player_box_scores(self): box_scores = player_box_scores( day=1, month=1, year=2001, output_type=OutputType.JSON, ) with open(self.expected_output_file_path, "r", encoding="utf8") as expected_output_file: self.assertEqual( json.loads(box_scores), json.load(expected_output_file), )
def scrapeBoxscore(userid, dateteam): month = { 'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12 } datelist = dateteam.split(' ') datelist[1] = datelist[1][:1] gameMonth = month[datelist[0]] gameDay = int(datelist[1]) gameYear = int(datelist[2]) searchTeam = getTeam(datelist[3:]) opponentTeam = "" result = "" players = client.player_box_scores(day=gameDay, month=gameMonth, year=gameYear) for player in players: if(player['team'].name == searchTeam): opponentTeam = player['opponent'].name break result += ("\U0001f3c0\U0001f3c0\U0001f3c0 {}\n\n" .format(searchTeam)) for player in players: if(player['team'].name == searchTeam): result += ("\U0001F525\U000026f9\U0001F525 {} {}:{}\n" .format(player['name'], int(player['seconds_played'] / 60), player['seconds_played'] % 60)) result += ("{} PTS, {} AST, {} REB, {} STL, {} BLK, {} TOV\n\n" .format(score(player), player['assists'], player['offensive_rebounds'] + player['defensive_rebounds'], player['steals'], player['blocks'], player['turnovers'])) result += ("\U0001f3c0\U0001f3c0\U0001f3c0 {}\n\n" .format(opponentTeam)) for player in players: if(player['team'].name == opponentTeam): result += ("\U0001F525\U000026f9\U0001F525 {} {}:{}\n" .format(player['name'], int(player['seconds_played'] / 60), player['seconds_played'] % 60)) result += ("{} PTS, {} AST, {} REB, {} STL, {} BLK, {} TOV\n\n" .format(score(player), player['assists'], player['offensive_rebounds'] + player['defensive_rebounds'], player['steals'], player['blocks'], player['turnovers'])) push_message(userid, result)
def last_gameday(self): # Method to seed feature store box_scores = None last_date = date.today() - timedelta(days=1) while box_scores is None: d = int(last_date.strftime('%d')) m = int(last_date.strftime('%m')) y = int(last_date.strftime('%Y')) box_scores = add_calc( pd.DataFrame(client.player_box_scores(day=d, month=m, year=y))) box_scores['date'] = last_date return [box_scores, y, m, d]
import csv from basketball_reference_web_scraper import client from basketball_reference_web_scraper.data import OutputType # Get all player box scores for January 1st, 2017 day112017 = client.player_box_scores(day=1, month=1, year=2017) first = {} i = 0 box_score = [] d = 0 with open('box_score_test.csv', mode='w') as csv_file: fieldnames = ['slug', 'name', 'team', 'location', 'opponent', 'outcome', 'seconds_played', 'made_field_goals', 'attempted_field_goals', 'made_three_point_field_goals', 'attempted_three_point_field_goals', 'made_free_throws', 'attempted_free_throws', 'offensive_rebounds', 'defensive_rebounds', 'assists', 'steals', 'blocks', 'turnovers', 'personal_fouls', 'game_score', 'game_date'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for months in range(1, 13): for days in range(1, 32): date = str(months)+"-"+str(days)+"-2019" i = 0 game = client.player_box_scores(day=int(days), month=int(months), year=2019) for rows in game: x = game[i] x["game_date"] = date #box_score.append(x)
from basketball_reference_web_scraper import client from basketball_reference_web_scraper.data import OutputType # Get all player box scores for January 1st, 2017 day112017 = client.player_box_scores(day=1, month=1, year=2017) #testing player = [] team = [] game_date = [] location = [] opponent = [] outcome = [] seconds_played = [] made_field_goals = [] attempted_field_goals = [] made_three_point_field_goals = [] attempted_three_point_field_goals = [] made_free_throws = [] attempted_free_throws = [] box_score = {} i = 0 # first = day112017[0] # player.append(first["name"]) # first = day112017[1] # player.append(first["name"]) # player.append(first["team"])
def test_get_box_scores(self): result = player_box_scores(day=1, month=1, year=2018) self.assertIsNotNone(result)
def main(): print("\n#######################################################################") print("#######################################################################") print("######################### NBA Report exporter #########################") print("#######################################################################") print("#######################################################################\n") while (True): print( "1. Players box scores by a date\ \n2. Players season statistics for a season\ \n3. Players advanced season statistics for a season\ \n4. All Team box scores by a date\ \n5. Schedule for a season\ \n6. Exit" ) reportObject = input("\nPlease select a option: ") # Players box scores by a date if (reportObject == "1"): inputDate = input("\nEnter a date (use this format 1-1-2018): ") fileName = "all-player-box-report-" + inputDate + ".csv" dateList = inputDate.split("-") print("Exporting report please wait..........") # Call Export function client.player_box_scores( day=dateList[0], month=dateList[1], year=dateList[2], output_type=OutputType.CSV, output_file_path="exported_files/" + fileName ) print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n") # Players season statistics for a season elif (reportObject == "2"): endYear = input("\nEnter season end year: ") fileName = "all-player-season-report-" + endYear + ".csv" print("Exporting report please wait..........") # Call Export function client.players_season_totals( season_end_year=endYear, output_type=OutputType.CSV, output_file_path="exported_files/" + fileName ) print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n") # Players advanced season statistics for a season elif (reportObject == "3"): endYear = input("\nEnter season end year: ") fileName = "all-player-advanced-season-report-" + endYear + ".csv" print("Exporting report please wait..........") # Call Export function client.players_advanced_season_totals( season_end_year=endYear, output_type=OutputType.CSV, output_file_path="exported_files/" + fileName ) print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n") # All Team box scores by a date elif (reportObject == "4"): inputDate = input("\nEnter a date (use this format 1-1-2018): ") fileName = "all-team-report-" + inputDate + ".csv" dateList = inputDate.split("-") print("Exporting report please wait..........") # Call Export function client.team_box_scores( day=dateList[0], month=dateList[1], year=dateList[2], output_type=OutputType.CSV, output_file_path="exported_files/" + fileName ) print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n") # Schedule for a season elif (reportObject == "5"): endYear = input("\nEnter season end year: ") fileName = "season-schedule-" + endYear + ".csv" print("Exporting report please wait..........") # Call Export function client.season_schedule( season_end_year=endYear, output_type=OutputType.CSV, output_file_path="exported_files/" + fileName ) print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n") # Exit elif (reportObject == "6"): print("\n#######################################################################") print("################################# Bye #################################") print("#######################################################################\n") break # Error else: print("Invalid option!!\n\n")
def get_rolling_player_season_stats(players, season): schedule = client.season_schedule(season_end_year=season) start_date, end_date = get_reg_season_start_end_dates(season) HOME_GAME = 1 AWAY_GAME = 0 #TODO: move to own file class rolling_player_stats(): def __init__(self): self.num_games = 0 self.rolling_avg_pts = [-1.] self.game_pts = [] self.home_not_away = [] self.opp_allowed_rating = [] def update_stats(self, points): self.game_pts.append(points) new_avg_pts = \ (self.rolling_avg_pts[-1] * self.num_games + points) / (self.num_games + 1) self.rolling_avg_pts.append(new_avg_pts) self.num_games += 1 #can be used for both individual and league average stats class rolling_team_stats(): def __init__(self): self.num_games = 0 self.avg_points_allowed = 0. def update_stats(self, points): self.avg_points_allowed = ( (self.avg_points_allowed * self.num_games) + points) / (self.num_games + 1) self.num_games += 1 player_stats_dict = {player: rolling_player_stats() for player in players} team_stats_dict = {} avg_team_stats = rolling_team_stats() game_number = 0 for ts in pd.date_range(start=start_date, end=end_date, freq='D'): date = ts.date() print("Processing date: {}".format(date)) #get all games for this date box_scores = client.player_box_scores(day=date.day, month=date.month, year=date.year) for player_box_score in box_scores: player = player_box_score[PLAYER_ID] if player not in player_stats_dict: continue #TODO: put calculations in rolling_player_stats player_stats = player_stats_dict[player] points = get_total_points(player_box_score) player_stats.home_not_away.append( HOME_GAME if player_box_score[LOCATION] == data.Location.HOME else AWAY_GAME) opposing_team = player_box_score[OPPONENT] if opposing_team in team_stats_dict: player_stats.opp_allowed_rating.append( team_stats_dict[opposing_team].avg_points_allowed / avg_team_stats.avg_points_allowed) else: player_stats.opp_allowed_rating.append(-1.) player_stats.update_stats(points) #update team stats while utils.get_pacific_date( schedule[game_number][START_TIME]) <= date: game = schedule[game_number] away_team = game[AWAY_TEAM] home_team = game[HOME_TEAM] if away_team not in team_stats_dict: team_stats_dict[away_team] = rolling_team_stats() if home_team not in team_stats_dict: team_stats_dict[home_team] = rolling_team_stats() team_stats_dict[away_team].update_stats(game[HOME_TEAM_SCORE]) team_stats_dict[home_team].update_stats(game[AWAY_TEAM_SCORE]) #update league avg team stats avg_team_stats.update_stats(game[HOME_TEAM_SCORE]) avg_team_stats.update_stats(game[AWAY_TEAM_SCORE]) game_number += 1 #convert to dataframe #Don't consider stats prior to this cutoff ROLLING_AVG_CUTOFF = 15 points_data = [] rolling_avg_pts_data = [] opp_allowed_rating_data = [] home_not_away_data = [] player_id_data = [] for player_id, player_stats in player_stats_dict.items(): num_games = player_stats.num_games - ROLLING_AVG_CUTOFF if num_games < MIN_GAMES or player_stats.rolling_avg_pts[-1] < MIN_PPG: continue player_id_data += (num_games * [player_id]) points_data += (player_stats.game_pts[ROLLING_AVG_CUTOFF:]) opp_allowed_rating_data += ( player_stats.opp_allowed_rating[ROLLING_AVG_CUTOFF:]) home_not_away_data += (player_stats.home_not_away[ROLLING_AVG_CUTOFF:]) #don't include last/final ppg rolling_avg_pts_data += ( player_stats.rolling_avg_pts[ROLLING_AVG_CUTOFF:-1]) return pd.DataFrame({ constants.PLAYER_ID_COL: player_id_data, constants.PPG_COL: rolling_avg_pts_data, constants.PTS_COL: points_data, constants.OPP_ALLOWED_RATING_COL: opp_allowed_rating_data, constants.HOME_NOT_AWAY_COL: home_not_away_data })
import json from datetime import date, timedelta from configparser import ConfigParser import pika from basketball_reference_web_scraper import client from basketball_reference_web_scraper.data import OutputType CONFIG = ConfigParser() CONFIG.read('nba_app.properties') RABBIT_HOST = CONFIG['nba']['RABBIT_HOST'] EXCHANGE = CONFIG['nba']['EXCHANGE'] ROUTING_KEY = CONFIG['nba']['ROUTING_KEY'] YESTERDAY = date.today() - timedelta(days=1) BOX_SCORE_JSON = client.player_box_scores(day=YESTERDAY.day, month=YESTERDAY.month, year=YESTERDAY.year, output_type=OutputType.JSON) MESSAGE = json.dumps(BOX_SCORE_JSON).encode('utf-8') CONNECTION = pika.BlockingConnection(pika.ConnectionParameters(RABBIT_HOST)) CHANNEL = CONNECTION.channel() CHANNEL.basic_publish(exchange=EXCHANGE, routing_key=ROUTING_KEY, body=MESSAGE) CONNECTION.close()
def test_2001_01_01_player_box_scores_length(self): result = player_box_scores(day=1, month=1, year=2001) self.assertEqual(len(result), 39)
import datetime from datetime import timedelta import pandas as pd from basketball_reference_web_scraper import client stat_df = pd.DataFrame() start_year = 2018 run_date = datetime.date(year=start_year, day=1, month=1) i = 1 while run_date <= datetime.datetime.today().date(): df = pd.DataFrame.from_dict( client.player_box_scores(day=run_date.day, month=run_date.month, year=run_date.year)) df['date'] = run_date stat_df = stat_df.append(df) run_date = run_date + timedelta(days=1) stat_df.to_csv('~\Documents\BasketballBetting/LatestData.csv')