def everything(): year = '2021' today = datetime.date.today() yesterday = today - datetime.timedelta(days=2) tomorrow = today + datetime.timedelta(days=1) sched = statsapi.schedule(start_date=yesterday, end_date=tomorrow) for game in sched: gameId = game["game_id"] boxscore = statsapi.boxscore_data(gameId) gameDate = game["game_date"] if game['doubleheader'] != 'N': gameDate = game["game_date"] + "(" + str( game["game_num"] ) + ")" #adds number to the back of the game date if the game is a part of a doubleheader homeId = game["home_id"] awayId = game["away_id"] homeAbbrev = statsapi.get( 'team', {'teamId': homeId})['teams'][0]['abbreviation'] awayAbbrev = statsapi.get( 'team', {'teamId': awayId})['teams'][0]['abbreviation'] if game['game_type'] == "R": createDir(homeAbbrev, year) #if needed createDir(awayAbbrev, year) #if needed hit(homeAbbrev, awayAbbrev, year, gameDate, game, boxscore) pitch(homeAbbrev, awayAbbrev, year, gameDate, game, boxscore) else: print(game['game_type'])
def get_season_game_pks(season: int) -> List[int]: """ Returns a list of gamePks for a given season :param season: int for the year of the season :returns list of gamePks as integers """ season = mlb.get('season', { 'sportId': 1, 'seasonId': season })['seasons'][0] start = season['seasonStartDate'] end = season['seasonEndDate'] schedule = mlb.get('schedule', { 'startDate': start, 'endDate': end, 'sportId': 1 }) pks = list( reversed([ game['gamePk'] for dates in schedule['dates'] for game in dates['games'] ])) pks = list(reversed(pks)) return pks
def test_get_server_error(mocker): # mock the ENDPOINTS dictionary mocker.patch.dict("statsapi.ENDPOINTS", fake_dict(), clear=True) responses.add(responses.GET, "http://www.foo.com?bar=baz", status=500) with pytest.raises(requests.exceptions.HTTPError): statsapi.get("foo", {"bar": "baz"})
def update(self, force=False) -> UpdateStatus: if force or self.__should_update(): self.starttime = time.time() try: debug.log("Fetching data for game %s", str(self.game_id)) self._data = statsapi.get("game", { "gamePk": self.game_id, "fields": API_FIELDS }) self._status = self._data["gameData"]["status"] if self._data["gameData"]["datetime"][ "officialDate"] > self.date: # this is odd, but if a game is postponed then the 'game' endpoint gets the rescheduled game debug.log( "Getting game status from schedule for game with strange date!" ) try: scheduled = statsapi.get( "schedule", { "gamePk": self.game_id, "sportId": 1, "fields": SCHEDULE_API_FIELDS }) self._status = next(g["games"][0]["status"] for g in scheduled["dates"] if g["date"] == self.date) except: debug.error("Failed to get game status from schedule") return UpdateStatus.SUCCESS except: debug.exception( "Networking Error while refreshing the current game data.") return UpdateStatus.FAIL return UpdateStatus.DEFERRED
def test_get_calls_correct_url(mocker): # mock the ENDPOINTS dictionary mocker.patch.dict("statsapi.ENDPOINTS", fake_dict(), clear=True) # mock the requests object mock_req = mocker.patch("statsapi.requests", autospec=True) statsapi.get("foo", {"bar": "baz"}) mock_req.get.assert_called_with("http://www.foo.com?bar=baz")
def test_get_invalid_endpoint(mocker): # mock the ENDPOINTS dictionary mocker.patch.dict("statsapi.ENDPOINTS", fake_dict(), clear=True) # mock the requests object mock_req = mocker.patch("statsapi.requests", autospec=True) # invalid endpoint with pytest.raises(ValueError): statsapi.get("bar", {"foo": "baz"})
def get_gamePks(seasons, target_directory=None): """ Takes in a list of seasons as strings representing their year e.g. ['2018','2019'] Queries the MLB API to find gamePks for each season and writes them to CSV files if a target directory for the gamePks is not specified, a directory called 'gamePks' will be added to the current directory. """ if target_directory: gamePks_path = target_directory else: #create a directory to store CSVs try: os.mkdir(os.getcwd() + '/gamePks') except FileExistsError: pass gamePks_path = os.getcwd() + '/gamePks' #walk the gamePks directory to see if we've already added any seasons f = [] for (dirpath, dirnames, filenames) in walk(gamePks_path): f.extend(filenames) break years = [re.findall('[^.csv]+', x) for x in f] already_added = [ item for sublist in years for item in sublist if item[0] in ['1', '2'] ] seasons = list(set(seasons) - set(already_added)) #query the API to get start dates and end dates for all seasons all_seasons = mlb.get('seasons', {'sportId': 1, 'all': True})['seasons'] #filter out the ones we don't care about right now seasons = list(filter(lambda x: x['seasonId'] in seasons, all_seasons)) gamePks = {} for season in seasons: year = season['seasonId'] startDate = convert_date(season['seasonStartDate']) endDate = convert_date(season['seasonEndDate']) #returns a list of dicts for each date in the range #each dict has a 'games' key with a list of dicts for each game in that day as values dates = mlb.get('schedule', { 'sportId': 1, 'startDate': startDate, 'endDate': endDate })['dates'] #for each date, and for each game in that date, get the gamePk gamePks[year] = [ game['gamePk'] for date in dates for game in date['games'] ] #store the gamePks as CSVs with open(gamePks_path + f"/{year}.csv", 'w', newline='') as myfile: wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) wr.writerow(gamePks[year])
def handle(self, *args, **options): season = '2020' al_standings = statsapi.get('standings', { 'sportIds': 1, 'leagueId': 103, 'season': season }) nl_standings = statsapi.get('standings', { 'sportIds': 1, 'leagueId': 104, 'season': season }) for division in al_standings['records']: for team in division['teamRecords']: team_id_and_season = str(team['team']['id']) + (season) team_id = team['team']['id'] wins = team['wins'] losses = team['losses'] division_rank = team['divisionRank'] league_rank = team['leagueRank'] team = Team.objects.get(team_id=team_id) record = TeamRecord(team_id_and_season=team_id_and_season, team=team, season=season, wins=wins, losses=losses, division_rank=division_rank, league_rank=league_rank) record.save() print("Saved Successfully") for division in nl_standings['records']: for team in division['teamRecords']: team_id_and_season = str(team['team']['id']) + (season) team_id = team['team']['id'] wins = team['wins'] losses = team['losses'] division_rank = team['divisionRank'] league_rank = team['leagueRank'] team = Team.objects.get(team_id=team_id) record = TeamRecord(team_id_and_season=team_id_and_season, team=team, season=season, wins=wins, losses=losses, division_rank=division_rank, league_rank=league_rank) record.save() print("Saved Successfully")
def update(self, force=False) -> UpdateStatus: if force or self.__should_update(): self.date = self.__parse_today() debug.log("Refreshing standings for %s", self.date.strftime("%m/%d/%Y")) self.starttime = time.time() try: if not self.is_postseason(): season_params = { "standingsTypes": "regularSeason", "leagueId": "103,104", "hydrate": "division,team,league", "season": self.date.strftime("%Y"), "fields": API_FIELDS, } if self.date != datetime.today().date(): season_params["date"] = self.date.strftime("%m/%d/%Y") divisons_data = statsapi.get("standings", season_params) self.standings = [ Division(division_data) for division_data in divisons_data["records"] ] if self.wild_cards: season_params["standingsTypes"] = "wildCard" wc_data = statsapi.get("standings", season_params) self.standings += [ Division(data, wc=True) for data in wc_data["records"] ] else: postseason_data = statsapi.get( "schedule_postseason_series", { "season": self.date.strftime("%Y"), "hydrate": "league,team", "fields": "series,id,gameType,games,description,teams,home,away,team,isWinner,name", }, ) self.leagues["AL"] = League(postseason_data, "AL") self.leagues["NL"] = League(postseason_data, "NL") except: debug.exception("Failed to refresh standings.") return UpdateStatus.FAIL else: return UpdateStatus.SUCCESS return UpdateStatus.DEFERRED
def get_gamePks(seasons): """ uses the 'season' endpoint of the mlb api to get gamePks and write them to csvs for each season. """ import statsapi as mlb import csv import time import sys gamePks_path = "/Users/schlinkertc/code/mlb_predictions/gamePks" from os import walk import re # walk the gamePks directory to find the seasons that we've already added f = [] for (dirpath, dirnames, filenames) in walk(gamePks_path): f.extend(filenames) break years = [re.findall('[^.csv]+', x) for x in f] already_added = [ int(item) for sublist in years for item in sublist if item[0] in ['1', '2'] ] seasons = list(set(seasons) - set(already_added)) gamePks = {} for season in seasons: mlb.get('season', {'sportId': 1, 'seasonId': str(season)}) try: games = mlb.schedule(start_date=f'02/01/{season}', end_date=f'11/30/{season}', sportId=1) pks = [x['game_id'] for x in games] print(pks[0]) gamePks[season] = pks print(len(gamePks)) with open(gamePks_path + f'/{season}.csv', 'w', newline='') as myfile: wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) wr.writerow(gamePks[season]) except ValueError as err: print( f'{season} failed. Error: {err} Waiting 10 seconds before resuming' ) time.sleep(10) #seasons.append(season) except: print("Unexpected error:", sys.exc_info()[0]) raise return gamePks
def __init__(self, year: int): try: data = statsapi.get("season", {"sportId": 1, "seasonId": year}) self.__parse_important_dates(data["seasons"][0], year) now = datetime.now() if year == now.year and self.season_ends_date < now: data = statsapi.get("season", {"sportId": 1, "seasonId": year + 1}) self.__parse_important_dates(data["seasons"][0], year + 1) except: debug.exception("Failed to refresh important dates") self.playoffs_start_date = datetime(3000, 10, 1) self.important_dates = [{"text": "None", "date": datetime(3000, 1, 1), "max_days": 1}]
def test_get_raises_errors(mocker): # mock the ENDPOINTS dictionary mocker.patch.dict("statsapi.ENDPOINTS", fake_dict(), clear=True) # mock the requests object mock_req = mocker.patch("statsapi.requests", autospec=True) # mock the status code to always be 200 mock_req.get.return_value.status_code = 0 # bad status code with pytest.raises(ValueError): statsapi.get("foo", {"bar": "baz"}) # invalid endpoint with pytest.raises(ValueError): statsapi.get("bar", {"foo": "baz"})
def twoStrikeStats(): sched = statsapi.schedule(start_date='07/23/2020', end_date='09/27/2020', team=137) twoStrikeHits = 0 for i in range(len(sched)): gameId = sched[i]["game_id"] game_date = sched[i]["game_date"] game_result = sched[i]["summary"] game_status = sched[i]["status"] game = statsapi.get('game', {'gamePk': gameId}) allPlays = game['liveData']['plays']['allPlays'] if game_status != 'Postponed': for pa in allPlays: atTwoStrikes = False pitcherName = pa['matchup']['pitcher']['fullName'] hitterName = pa['matchup']['batter']['fullName'] playEvents = pa['playEvents'] if hitterName == 'Mike Yastrzemski': resultOfPlay = pa['result']['event'] resultOfPlayDescription = pa['result']['description'] lastPitch = playEvents[-1] lastPitchStrikesInCount = lastPitch['count']['strikes'] if 'Caught' not in resultOfPlay and 'Pickoff' not in resultOfPlay: if lastPitchStrikesInCount >= 2: if resultOfPlay == 'Single' or resultOfPlay == 'Double' or resultOfPlay == 'Triple' or resultOfPlay == 'Home Run': print(resultOfPlayDescription) twoStrikeHits += 1 else: input(pa) print(twoStrikeHits)
def player_got_hit_in_game(player_id: int, game_id: int, home_or_away: str) -> bool: """ This function generates labels for training data. Checks if a player got a hit in a specified game. Parameters -----–----------- player: int The 6-digit ID of a batter, which can be fetched using get_player_id_from_name('Hitter Name'). game_id: int The 6-digit ID for a game, can be fetched from statsapi.schedule(). home_or_away: str Indicates whether the player was on the home team or the away team for the specified game. Value is either "home" or "away". """ params = {'gamePk':game_id, 'fields': 'gameData,teams,teamName,shortName,teamStats,batting,atBats,runs,hits,rbi,strikeOuts,baseOnBalls,leftOnBase,players,boxscoreName,liveData,boxscore,teams,players,id,fullName,batting,avg,ops,era,battingOrder,info,title,fieldList,note,label,value'} r = statsapi.get('game', params) player_stats = r['liveData']['boxscore']['teams'][home_or_away]['players'].get('ID' + str(player_id), False) if not player_stats: return False else: return player_stats['stats']['batting'].get('hits', 0) > 0
def box_upload_get(game_id): game = mlb.get('game', {'gamePk': game_id}) game_dict = { 'gameid': game_id, 'home_team_runs': game['liveData']['boxscore']['teams']['home']['teamStats']['batting'] ['runs'], 'away_team_runs': game['liveData']['boxscore']['teams']['away']['teamStats']['batting'] ['runs'], 'venue_id': game['gameData']['venue'].get('id', 'null'), 'weather_category': game['gameData']['weather'].get('condition', 'null').lower(), 'temp': int(game['gameData']['weather'].get('temp', 'null')), 'wind_mph': int(game['gameData']['weather'].get( 'wind', 'null').split(',')[0].split(' ')[0]), 'wind_direction': game['gameData']['weather'].get('wind', 'null').split(',')[1].strip().lower() } return game_dict
def get_current_season_stats(player_name: str, current_team: str, date: str, season=CURR_SEASON) -> bool: """ One of the main data retrieval functions. Returns a dictionary mapping the names of different statistics to the values of those statistics. Only includes overall season statistics for the player passed in. Parameters -----–----------- player_name: str The name of a player as a string (i.e. "Buster Posey") """ if not check_pos_player(player_name): raise ValueError("Player name entered is not a position player") player_id = get_player_id_from_name(player_name) stats_dict = OrderedDict({"Name": player_name, "ID": player_id, "Team": current_team}) # Look up the player's current season hitting stats stats_hydration = f'stats(group=[hitting],type=[byDateRange],startDate={CURR_SEASON_START},endDate={date},sportId=1)' get_player_stats = statsapi.get('person', {'personId': player_id, 'hydrate': stats_hydration}) stats_dict.update(get_player_stats['people'][0]['stats'][0]['splits'][0]['stat']) return stats_dict
def build(self, game_id): feed = statsapi.get('game', {'gamePk': game_id}) game = self.game_parser.parse(feed['liveData']['plays']['allPlays']) game.away = feed['gameData']['teams']['away']['fileCode'] game.home = feed['gameData']['teams']['home']['fileCode'] game.players = self.parse_players(feed['gameData']['players']) self.game_enhancer.execute(game) return game
def team_info(): # teams = statsapi.get('teams',{'sportIds':1,'activeStatus':'Yes','fields':'teams,name,id,division,league'}) team_dict = {} teams = statsapi.get('teams', {'sportIds': 1, 'activeStatus': 'Yes'}) for team in teams['teams']: team_dict[team['id']] = Team(team) return team_dict
def hitting_everything2020(): today = datetime.date.today() yesterday = today - datetime.timedelta(days=2) tomorrow = today + datetime.timedelta(days=1) team = 137 info = statsapi.get('team', {'teamId': team}) name = info['teams'][0]['name'] abbrev = info['teams'][0]['abbreviation'] sched = statsapi.schedule(start_date='07/23/2020', end_date='9/27/2020', team=team) for i in range(len(sched)): gameId = sched[i]["game_id"] scoredata = statsapi.boxscore_data(gameId) game_date = sched[i]["game_date"] if sched[i]['doubleheader'] == 'Y': game_date = sched[i]["game_date"] + "(" + str( sched[i]["game_num"]) + ")" if path.exists("Teams/" + abbrev + "/2020/h_dates.txt"): with open("Teams/" + abbrev + "/2020/h_dates.txt", "r") as FILE: content = FILE.read() try: content_dict = eval(content) except Exception as e: print("we got an error ", e) print("Database Error ") else: with open("Teams/" + abbrev + "/2020/h_dates.txt", "w") as FILE: FILE.write("{'dates':[]}") with open("Teams/" + abbrev + "/2020/h_dates.txt", "r") as FILE: content = FILE.read() try: content_dict = eval(content) except Exception as e: print("we got an error ", e) print("Database Error ") if game_date not in content_dict['dates'] and ( sched[i]['status'] == "Final" or sched[i]['status'] == "Game Over"): if sched[i]["game_type"] == "R": for ID in scoredata['playerInfo']: if sched[i]['home_name'] == name: if ID in scoredata['home']['players']: if scoredata['home']['players'][ID]['stats'][ 'batting'] != {}: h_add(game_date, scoredata, ID, abbrev) else: if ID in scoredata['away']['players']: if scoredata['away']['players'][ID]['stats'][ 'batting'] != {}: a_add(game_date, scoredata, ID, abbrev) with open("Teams/" + abbrev + "/2020/h_dates.txt", "w") as f: try: content_dict['dates'].append(game_date) f.write(str(content_dict)) except Exception as e: print("we got an error ", e) print("Database Error ")
def collect_game_data(start_date, end_date): '''Calls StatsApi to collect game IDs for games played during defined period''' schedule = statsapi.schedule(start_date=start_date, end_date=end_date) full = json_normalize(schedule) gamepks = full['game_id'] '''Iterates through play-by-play data, normalizes nested .json, and adds data back to columns defined below.''' list_for_final_df = [] for game in gamepks: curr_game = statsapi.get('game_playByPlay', {'gamePk': game}) curr_plays = curr_game.get('allPlays') curr_plays_df = pd.DataFrame(curr_plays) curr_plays_norm = json_normalize(curr_plays) all_plays_cols = [ 'about.atBatIndex', 'about.halfInning', 'about.inning', 'count.balls', 'count.strikes', 'matchup.batSide.code', 'matchup.batter.fullName', 'matchup.batter.id', 'matchup.pitchHand.code', 'matchup.splits.menOnBase', 'matchup.pitcher.fullName', 'matchup.pitcher.id', 'result.eventType' ] play_events_cols = [ 'count.balls', 'count.strikes', 'details.ballColor', 'details.call.code', 'details.call.description', 'details.type.description', 'details.call.code', 'details.description', 'details.code', 'details.type.code', 'index', 'pitchData.nastyFactor', 'pitchData.zone', 'pitchNumber', 'type' ] i = 1 for index, row in curr_plays_norm.iterrows(): play_events = json_normalize(row['playEvents']) for play_events_idx, play_events_row in play_events.iterrows(): game_dict = {} game_dict['gamepk'] = game game_dict['pitch_id'] = str(game) + '_' + str( row['about.atBatIndex']) + '_' + str(i) game_dict['prior_pitch'] = str(game) + '_' + ( str(row['about.atBatIndex']) + '_' + str(i - 1)) for col_all_plays in all_plays_cols: if col_all_plays in curr_plays_norm.columns: game_dict[col_all_plays] = row[col_all_plays] else: game_dict[col_all_plays] = np.nan for col_play_events in play_events_cols: if col_play_events in play_events.columns: game_dict[col_play_events] = play_events_row[ col_play_events] else: game_dict[col_play_events] = np.nan list_for_final_df.append(game_dict) i += 1 return pd.DataFrame(list_for_final_df)
def pbp(): sched = statsapi.schedule(start_date='09/01/2019', team=137) gameId = sched[0]["game_id"] game_date = sched[0]["game_date"] game_result = sched[0]["summary"] test = [] play = statsapi.get('game_playByPlay', {'gamePk': gameId}) for i in range(len(play['allPlays'])): test.append(play['allPlays'][i]['result']['description']) return play['allPlays'][8]
def get_pks(season): season = mlb.get('season', { 'sportId': 1, 'seasonId': season })['seasons'][0] start = season['seasonStartDate'] end = season['seasonEndDate'] schedule = mlb.get('schedule', { 'startDate': start, 'endDate': end, 'sportId': 1 }) pks = [] for date in schedule['dates']: for game in date['games']: pks.append(game['gamePk']) pks = list(reversed(pks)) return pks
def test_get_returns_dictionary(mocker): # mock the ENDPOINTS dictionary mocker.patch.dict("statsapi.ENDPOINTS", fake_dict(), clear=True) # mock the requests object mock_req = mocker.patch("statsapi.requests", autospec=True) # mock the status code to always be 200 mock_req.get.return_value.status_code = 200 result = statsapi.get("foo", {"bar": "baz"}) # assert that result is the same as the return value from calling the json method of a response object assert result == mock_req.get.return_value.json.return_value
def generateTeam(teamName, year): teamLookup = statsapi.lookup_team(teamName, activeStatus="B", season=year, sportIds=1) teamId = teamLookup[0]['id'] team = statsapi.get("team_roster", {"teamId": teamId, "season": year}) roster = [] for plyr in team['roster']: if plyr['status']['code'] == "A": pObj = generatePlayer("", year, playerId=plyr['person']['id']) roster.append(pObj) return Team(teamName, roster)
def get_weather(game_ids): game_weather = [] for game_id in game_ids: APIcall = mlb.get('game', {"gamePk": game_id}) weather = APIcall['gameData']['weather'] weather['pk'] = game_id game_weather.append(weather) return pd.DataFrame(game_weather)
def win_prob(games): # game_winProbability game = games[0]['game_id'] wp = statsapi.get('game_winProbability', {'gamePk': game}) # print(wp) wp_play = [] for play in wp: wp_play.append(play['homeTeamWinProbability']) index = list(range(0, len(wp_play))) # print(index, wp_play) # print(wp_play) graph(wp_play, index)
def __init__(self, game_id): self.game_id = game_id self.boxscore = statsapi.get('game', {'gamePk': game_id}) self.home_team = Team( self.boxscore['gameData']['teams']['home']['id'], self.boxscore['gameData']['teams']['home']['name']) self.away_team = Team( self.boxscore['gameData']['teams']['away']['id'], self.boxscore['gameData']['teams']['away']['name']) self.box_plays = self.boxscore['liveData']['plays']['allPlays'] self.clean_plays = [] self.plays = [] self.half_inning_starts = self.get_half_inning_starts()
def getStatus(gameId): #boxData = statsapi.boxscore_data(mostRecentGameId) params = {} params.update( { "gamePk": gameId, }) gumbo = statsapi.get("game", params) gameData = gumbo.get('gameData') status = gameData.get('status') return status['abstractGameState']
def update_gamePlayerLinks(game): roster_inputs = get_roster_inputs(game) players = {} for team in game.teams: params = get_roster_inputs(call) params.update({"teamId":team['id']}) roster = mlb.get('team_roster',params) team_players = [x['person']['id'] for x in roster['roster']] players[team['id']] = team_players for d in game.game_player_links: for k in players.keys(): if d['player'] in players[k]: d.update({'teamId':k})
def get_h2h_vs_pitcher(batter_id, opponent_id): """ Returns a dictionary containing a limited amount of head-to-head batting statistics between the hitter (batter_id) and pitcher (opponent_id) specified. One of the main data retrieval functions. Parameters -----–----------- batter_id: int The 6-digit ID of a batter, which can be fetched using get_player_id_from_name('Hitter Name'). opponent_id: int The 6-digit ID of a pitcher, which can be fetched using get_player_id_from_name('Pitcher Name'). """ hydrate = 'stats(group=[hitting],type=[vsPlayer],opposingPlayerId={},season=2019,sportId=1)'.format( opponent_id) params = {'personId': batter_id, 'hydrate': hydrate, 'sportId': 1} r = statsapi.get('person', params) # Look up batting stats versus pitcher, if atBats_h2h == 0 return # a dictionary of empty stats. try: batting_stats = r['people'][0]['stats'][1]['splits'][0]['stat'] except KeyError: return OrderedDict({ 'atBats_h2h': 0.0, 'avg_h2h': 0.0, 'hits_h2h': 0.0, 'obp_h2h': 0.0, 'ops_h2h': 0.0, 'slg_h2h': 0.0 }) # Only get rate stats vs pitcher filtered = { (k + "_h2h"): (float(v) if v != "-.--" and v != ".---" and v != "*.**" else 0.0) for k, v in batting_stats.items() if type(v) == str and k != 'stolenBasePercentage' and k != 'atBatsPerHomeRun' or k == 'hits' or k == 'atBats' } # Making sure the keys are in the same order regardless of players entered filtered = OrderedDict(sorted(filtered.items())) return filtered