def game_id(gid): """ Checks a game ID exists in the provided dataset. Will prompt until a valid game ID is provided Parameters ---------- gid: an integer of a game_id Returns ------- gid: a validated integer of a game_id """ # Load in the schedule information games = load.games_data() # Get a list of all possible game IDs valid_game_ids = games['game_id'].tolist() game_id_valid = False while not game_id_valid: if type(gid) != int: try: gid = int(gid) except: pass # If the game ID is a valid game ID, break out of the loop if gid in valid_game_ids: game_id_valid = True # If not, force the user to supply a new game ID else: print(f'{gid} is not a valid game ID.') week = int( input('If the week number is known, enter it now, or ' 'else enter 0 to see a list of all games: ')) # If user knows what week the game took place, allow them to only # see games from that week if week != 0: week = week_number(week) week_games = games[games['week'] == week] print(f'\nGAMES IN WEEK {week}:\n') for i, game in week_games.iterrows(): print(f'{game.game_id} -- {game.away} @ {game.home}') gid = int(input('Game ID: ')) # Otherwise, show all games else: print('\nALL GAMES:\n') for i, game in games.iterrows(): print(f'{game.game_id} -- {game.away} @ {game.home}') gid = int(input('Game ID: ')) return gid
def game_id(home, away): """ Finds the game_id of a game between the home and away team. The function checks whether or not the teams are valid, and if the teams are reversed, will provide the correct game_id for the meeting of these two teams Parameters ---------- home: a string of the home team's code away: a string of the away team's code Returns ------- desired_game_id: the game_id of the game in which home hosted away """ # Validate that the home and away team codes supplied are valid team codes home = check.team_code(home) away = check.team_code(away) game_found = False while not game_found: # Bring in the schedule information games = load.games_data() # Check if the game existed as supplied desired_game = games[(games['home'] == home) & (games['away'] == away)] # If it did, break out of the loop if len(desired_game) == 1: game_found = True # Otherwise, alert user that the home team did not host the away team else: print(f'{home} did not host {away}. Checking if {away} hosted ' f'{home}') # Check if away team hosted home team desired_game = games[(games['home'] == away) & (games['away'] == home)] # If they did, break out of the loop if len(desired_game) == 1: game_found = True else: # Otherwise, prompt user to supply two new team codes print(f'{home} and {away} did not play each other in this ' 'dataset') home = check.team_code('') away = check.team_code('') # Once a game has been identified, give back the game ID for the game desired_game_id = desired_game['game_id'].iloc[0] return desired_game_id
def game_week(gid): """ Finds the week in which a particular game was played Parameters ---------- gid: an integer of a game_id Returns ------- week: an integer representing the week the game was played in """ # Validate the game ID gid = check.game_id(gid) # Bring in the schedule data games = load.games_data() # Get the week corresponding to the game ID provided week = games.loc[games['game_id'] == gid, 'week'].iloc[0] return week
def game_teams(gid): """ Finds the teams that played in a specified game (via game_id) Parameters ---------- gid: an integer of a game_id Returns ------- home: a string of the home team's code away: a string of the away team's code """ # Validate the game ID gid = check.game_id(gid) # Bring in the schedule data games = load.games_data() # Get the home and away team codes home = games.loc[games['game_id'] == gid, 'home'].iloc[0] away = games.loc[games['game_id'] == gid, 'away'].iloc[0] return home, away
def tracking_and_plays(gid=0, pid=0, tracking=pd.DataFrame(), play=pd.DataFrame()): """ Merges play and tracking data together to centralize data source Parameters ---------- gid: an integer of a game_id pid: an integer of a play_id tracking: a dataframe of tracking data that can be used to speed up data loading play: a dataframe of play-level data that can be used to speed up data loading Returns ------- tracking_and_plays: a merged dataframe of tracking and play-level data """ # If no tracking data is provided... if tracking.empty: # If a game ID and play ID are both provided, load the tracking data # for the play from that game if gid != 0 and pid != 0: week = find.game_week(gid) tracking = load.tracking_data(gid=gid, pid=pid, week=week) # If a game ID is provided but not a play ID, load all tracking for the # game elif gid != 0 and pid == 0: week = find.game_week(gid) tracking = load.tracking_data(gid=gid, week=week) # If a play ID is provided but not a game ID, load all tracking for # plays with matching play IDs elif gid == 0 and pid != 0: tracking = load.tracking_data(pid=pid, week=0) # If no game ID is provided and no play ID is provided, then load all # tracking data from all weeks else: tracking = load.tracking_data() # If no play data is provided... if play.empty: # If a game ID and play ID are both provided, load the plays data for # the play from that game if gid != 0 and pid != 0: play = load.plays_data(gid=gid, pid=pid) # If a game ID is provided but not a play ID, load all plays data for # the game elif gid != 0 and pid == 0: play = load.plays_data(gid=gid) # If a play ID is provided but not a game ID, load all plays data for # plays with matching play IDs elif gid == 0 and pid != 0: play = load.plays_data(pid=pid) # If no game ID is provided and no play ID is provided, then load all # plays data else: play = load.plays_data() tracking_and_plays = pd.merge(left=tracking, right=play, how='inner', on=['game_id', 'play_id']) games_data = load.games_data()[['game_id', 'home', 'away', 'week']] tracking_and_plays = pd.merge(left=tracking_and_plays, right=games_data, how='inner', on='game_id') tracking_and_plays['offensive_team'] = \ tracking_and_plays['possession_team'] tracking_and_plays['defensive_team'] = np.where( tracking_and_plays['offensive_team'] == tracking_and_plays['home'], tracking_and_plays['away'], tracking_and_plays['home']) return tracking_and_plays
def plays_and_games(gid=0, home='', away='', prechecked_gid=False): """ Merges play and game data together to better illustrate what plays are being run by which team and against which opponent Parameters ---------- gid: an integer of a game_id home: a string representing the home team's team code away: a string representing the away team's team code play_info: a dictionary of parameters to use for subsetting. The keys MUST be columns in the plays data to be used. If not, they will be ignored prechecked_gid: a boolean of whether or not the game ID has been prechecked Returns ------- plays_from_game: a merged dataframe of play and game data """ if gid != 0: # If the game ID is not already checked, check the game ID first if not prechecked_gid: gid = check.game_id(gid) prechecked_gid = True # If the game ID is not passed, then try to get a game ID based on the home # and away team. If this yields nothing, then load all games if home != '' or away != '': home = check.team_code(home) away = check.team_code(away) gid = find.game_id(home, away) prechecked_gid = True # Load in plays from the identified game, or from all games if game ID = 0 plays_from_game = load.plays_data(gid=gid, prechecked_gid=prechecked_gid) # Load in the games data to merge games_data = load.games_data( gid, prechecked_gid)[['game_id', 'home', 'away', 'week']] plays_from_game = pd.merge(left=plays_from_game, right=games_data, how='inner', on='game_id') plays_from_game['offensive_team'] = plays_from_game['possession_team'] plays_from_game['defensive_team'] = np.where( plays_from_game['offensive_team'] == plays_from_game['home'], plays_from_game['away'], plays_from_game['home']) plays_from_game = plays_from_game[[ 'game_id', 'play_id', 'play_description', 'quarter', 'down', 'yds_to_go', 'possession_team', 'play_type', 'yardline_side', 'yardline_number', 'offense_formation', 'personnel_offense', 'defenders_in_box', 'n_pass_rushers', 'personnel_defense', 'type_dropback', 'presnap_away_score', 'presnap_home_score', 'game_clock', 'absolute_yard_line', 'penalty_code', 'penalty_player', 'pass_result', 'offensive_play_result', 'play_result', 'epa', 'is_defensive_pi', 'down_dist_summary', 'home', 'away', 'offensive_team', 'defensive_team', 'week' ]] return plays_from_game
def first_down_line(gid, pid, tracking=pd.DataFrame(), prechecked_gid=False, prechecked_pid=False): """ Finds what yardline is needed to be gained to achieve a first down Parameters ---------- gid: an integer of a game_id pid: an integer of a play_id tracking: a set of tracking information pertaining to a particular play. If none is provided, the entire tracking set will be used. This is the default prechecked_gid: a boolean of whether or not the game ID has been checked before being passed to the function prechecked_pid: a boolean of whether or not the play ID has been checked before being passed to the function Returns ------- first_down_yardline: a float representing the absolute yardline needed to achieve a first down """ if not prechecked_gid: # Validate the game ID gid = check.game_id(gid) prechecked_gid = True if not prechecked_pid: # Validate the play ID pid = check.play_id(gid, pid) prechecked_pid = True # Load in the schedule data games = load.games_data(gid, prechecked_gid) # Get the week of the game so that the correct tracking information can be # loaded week = games.loc[games['game_id'] == gid, 'week'].iloc[0] # Get the line of scrimmage and number of yards needed to achieve a first # down los = line_of_scrimmage(gid, pid) distance_to_first = yards_to_go(gid, pid) # Load in the appropriate tracking data, then subset to only be for the # desired play if tracking.empty: tracking = load.tracking_data(gid, pid, week, prechecked_gid=True, prechecked_pid=True, prechecked_week=True) # Get the direction of play. If the play is going right, yards will be # added, otherwise they will be subtracted play_direction = tracking['play_direction'].iloc[0] # Calculate the yardline needed to be gained to achieve a first down if play_direction == 'right': first_down_yardline = los + distance_to_first else: first_down_yardline = los - distance_to_first return first_down_yardline