def get_penalty_counts(game): """ Get penalty counts for specified home or road team, i.e. how many two-, five-, ten-, and twenty-minute penalties have been accumulated by its players. """ game_type = get_game_type_from_season_type(game) pen_counts = dict() pen_counts['home'] = defaultdict(int) pen_counts['road'] = defaultdict(int) game_events_src_path = os.path.join(CONFIG['base_data_dir'], 'game_events', str(game['season']), str(game_type), "%d.json" % game['game_id']) events_data = json.loads(open(game_events_src_path).read()) for period in events_data: for event in events_data[period]: if event['type'] == 'penalty': duration = int(event['data']['duration'] / 60) if event['data']['team'] == 'home': pen_counts['home'][duration] += 1 else: pen_counts['road'][duration] += 1 return pen_counts
def build_interval_tree(game): """ Builds interval tree containing all goalie shifts and penalties from current game. """ game_type = get_game_type_from_season_type(game) game_events_src_path = os.path.join( CONFIG['base_data_dir'], 'game_events', str(game['season']), str(game_type), "%d.json" % game['game_id']) events_data = json.loads(open(game_events_src_path).read()) # setting up interval tree it = intervaltree.IntervalTree() # setting up list to contain all end of period times and all times a goal # has been scored end_period_times = list() goal_times = dict() # setting up dictionary for goalie changes goalie_changes = {'home': list(), 'road': list()} for period in sorted(events_data.keys()): # adding end time of current period to all end of period times if period in ['1', '2', '3']: end_period_times.append(int(period) * 1200) for event in events_data[period]: # retrieving type of event first event_type = event['type'] # adding time of overtime end to list of period end times if period == 'overtime' and (event_type == 'periodEnd' or event_type == 'goal'): end_period_times.append(event['time']) # adding time of goal to list of times a goal has been scored if event_type == 'goal': goal_times[event['time']] = event['data']['balance'] # registering goalie changes if event_type == 'goalkeeperChange': register_goalie_change(event, game, goalie_changes) # adding penalties to interval tree if event_type == 'penalty': create_penalty_interval(event, game, it) # (optionally) adding final outgoing goalie change at end of game for home_road in goalie_changes: # retrieving team and player id for last registered goalie change in current game event_team = goalie_changes[home_road][-1].team player_id = goalie_changes[home_road][-1].player_id if goalie_changes[home_road][-1].type == 'goalie_in': # using the maximum time from all period end times as # time of game end goalie_changes[home_road].append( GoalieChange(max(end_period_times), event_team, home_road, 'goalie_out', player_id)) # creating actual goalie shifts from registered goalie changes create_goalie_shifts(goalie_changes, game, it) return it, goal_times
def get_shootout_stats(game, key, opp_key): """ Gets shootout statistics for specified game and teams. """ # loading events data game_type = get_game_type_from_season_type(game) game_events_src_path = os.path.join(CONFIG['base_data_dir'], 'game_events', str(game['season']), str(game_type), "%d.json" % game['game_id']) events_data = json.loads(open(game_events_src_path).read()) team_shootout_stats = dict() if 'shootout' in events_data and events_data['shootout']: # setting initial values for param in [ 'so_rounds', 'so_a', 'so_g', 'so_pctg', 'opp_so_a', 'opp_so_g', 'opp_so_pctg', 'so_sv_pctg' ]: team_shootout_stats[param] = 0 for attempt in events_data['shootout']: if attempt['type'] != 'shootout': continue # dirty way to get indication whether the 'home' or 'road' team was shooting so_team = attempt['data']['team'].replace("visitor", "road") # re-calculating current shootout round team_shootout_stats['so_rounds'] = (attempt['data']['order'] + 1) // 2 # registering shootout data per team if so_team == key: team_shootout_stats['so_a'] += 1 if attempt['data']['scored']: team_shootout_stats['so_g'] += 1 else: team_shootout_stats['opp_so_a'] += 1 if attempt['data']['scored']: team_shootout_stats['opp_so_g'] += 1 # finally calculating shootout percentages else: if team_shootout_stats['so_a']: team_shootout_stats['so_pctg'] = round( team_shootout_stats['so_g'] / team_shootout_stats['so_a'] * 100, 2) if team_shootout_stats['opp_so_a']: team_shootout_stats['opp_so_pctg'] = round( team_shootout_stats['opp_so_g'] / team_shootout_stats['opp_so_a'] * 100, 2) team_shootout_stats['so_sv_pctg'] = round( (1 - team_shootout_stats['opp_so_g'] / team_shootout_stats['opp_so_a']) * 100, 2) return team_shootout_stats
def get_shootout_stats(goalie_dict, game): """ Retrieves shootout stats for specified game goaltender statistics item. """ game_type = get_game_type_from_season_type(game) game_events_src_path = os.path.join(CONFIG['base_data_dir'], 'game_events', str(game['season']), str(game_type), "%d.json" % game['game_id']) game_events = json.loads(open(game_events_src_path).read()) if 'shootout' in game_events: shootout = game_events['shootout'] else: shootout = list() # retrieving shootout attempts directed towards current goalie per_goalie_shootout_attempts = list( filter( lambda d: d['type'] == 'shootout' and d['data']['goalkeeper'][ 'playerId'] == goalie_dict['goalie_id'], shootout)) # retrieving game-winning goal in shootout so_winning_goal = list(filter(lambda d: d['type'] == 'goal', shootout)) if so_winning_goal: so_winning_goal = so_winning_goal.pop(0) # setting initial values if per_goalie_shootout_attempts: goalie_dict['so_games_played'] = 1 goalie_dict['so_attempts_a'] = 0 goalie_dict['so_goals_a'] = 0 # not yet sure how to definetely retrieve winning/losing status here # since winning goal data gives no indication about the goalie allowing it # goalie_dict['so_w'] = 0 # goalie_dict['so_l'] = 0 # cumulating shootout statistics for attempt in per_goalie_shootout_attempts: goalie_dict['so_attempts_a'] += 1 if attempt['data']['scored']: goalie_dict['so_goals_a'] += 1 # calculating shootout save percentage else: if 'so_attempts_a' in goalie_dict and goalie_dict['so_attempts_a']: goalie_dict['so_sv_pctg'] = round( (1 - goalie_dict['so_goals_a'] / goalie_dict['so_attempts_a']) * 100, 2) return goalie_dict
def check_pp_goals(game, key, opp_key, gsl): """ Checks power play goals retrieved from team stats by looking at power play goals in event data. """ # loading events data game_type = get_game_type_from_season_type(game) game_events_src_path = os.path.join(CONFIG['base_data_dir'], 'game_events', str(game['season']), str(game_type), "%d.json" % game['game_id']) events_data = json.loads(open(game_events_src_path).read()) pp_goals_from_events = {'home': 0, 'visitor': 0} for period in events_data: for event in events_data[period]: if event['type'] != 'goal': continue # fixing penalty shots erroneously identified as power play goals if event['data']['balance'] == 'PP0': event['data']['balance'] = 'PS' if event['data']['balance'].startswith('PP'): pp_goals_from_events[event['data']['team']] += 1 pp_goals_discrepancy = False pp_goals_from_events['road'] = pp_goals_from_events['visitor'] if pp_goals_from_events[key] != gsl['pp_goals']: pp_goals_discrepancy = True gsl['pp_goals'] = pp_goals_from_events[key] if pp_goals_from_events[opp_key] != gsl['opp_pp_goals']: pp_goals_discrepancy = True gsl['opp_pp_goals'] = pp_goals_from_events[opp_key] if pp_goals_discrepancy: print( "\t+ Found and corrected different number of pp goals retrieved from team stats and game event data" ) return gsl
def get_single_game_player_data(game, shots): """ Retrieves statistics for all players participating in specified game. """ game_stat_lines = list() game_id = game['game_id'] home_id = game['home_id'] road_id = game['road_id'] game_type = get_game_type_from_season_type(game) home_stats_src_path = os.path.join(CONFIG['base_data_dir'], 'game_player_stats', str(game['season']), str(game_type), "%d_%d.json" % (game_id, home_id)) road_stats_src_path = os.path.join(CONFIG['base_data_dir'], 'game_player_stats', str(game['season']), str(game_type), "%d_%d.json" % (game_id, road_id)) game_events_src_path = os.path.join(CONFIG['base_data_dir'], 'game_events', str(game['season']), str(game_type), "%d.json" % game['game_id']) faceoffs_src_path = os.path.join(CONFIG['base_data_dir'], 'faceoffs', str(game['season']), str(game_type), "%d.json" % game['game_id']) home_stats = json.loads(open(home_stats_src_path).read()) road_stats = json.loads(open(road_stats_src_path).read()) period_events = json.loads(open(game_events_src_path).read()) if os.path.isfile(faceoffs_src_path): faceoffs = json.loads(open(faceoffs_src_path).read()) else: faceoffs = list() for home_stat_line in home_stats: player_game = retrieve_single_player_game_stats( home_stat_line, game, 'home') if player_game['games_played']: game_stat_lines.append(player_game) for road_stat_line in road_stats: player_game = retrieve_single_player_game_stats( road_stat_line, game, 'away') if player_game['games_played']: game_stat_lines.append(player_game) assistants, scorers_5v5, empty_net_goals = retrieve_assistants_from_event_data( period_events) penalties = retrieve_penalties_from_event_data(period_events) for gsl in game_stat_lines: # retrieving on-ice statistics gsl = retrieve_on_ice_stats(gsl, shots) # retrieving actual shots per_player_game_shots = list( filter(lambda d: d['player_id'] == gsl['player_id'], shots)) # retrieving shots hitting posts or crossbars (available since 2020) shots_post_crossbar = list( filter(lambda d: 'hit_post' in d and d['hit_post'], per_player_game_shots)) gsl['hit_post'] = len(shots_post_crossbar) shots_5v5 = list( filter(lambda d: d['plr_situation'] == '5v5', per_player_game_shots)) gsl['shots_5v5'] = len(shots_5v5) shots_missed_5v5 = list( filter(lambda d: d['target_type'] == 'missed', shots_5v5)) gsl['shots_missed_5v5'] = len(shots_missed_5v5) shots_on_goal_5v5 = list( filter(lambda d: d['target_type'] == 'on_goal', shots_5v5)) gsl['shots_on_goal_5v5'] = len(shots_on_goal_5v5) goals_5v5 = list( filter(lambda d: d['scored'] is True, shots_on_goal_5v5)) gsl['goals_5v5'] = len(goals_5v5) # retrieving different types of goals # using the score diff parameter calculated for each registered shot go_ahead_goals = list( filter(lambda d: d['scored'] and d['score_diff'] == 0, per_player_game_shots)) gsl['go_ahead_g'] = len(go_ahead_goals) tying_goals = list( filter(lambda d: d['scored'] and d['score_diff'] == -1, per_player_game_shots)) gsl['tying_g'] = len(tying_goals) # clutch goals are goals scored in the last ten minutes of a game or overtime whilst the score difference # was one or zero clutch_goals = list( filter( lambda d: d['scored'] and abs(d['score_diff']) <= 1 and d[ 'time'] >= 3000, per_player_game_shots)) gsl['clutch_g'] = len(clutch_goals) # garbage goals are goals when the score difference is four or higher blowout_goals = list( filter(lambda d: d['scored'] and abs(d['score_diff']) >= 4, per_player_game_shots)) gsl['blowout_g'] = len(blowout_goals) # goals scored whilst the team was losing whilst_losing_goals = list( filter(lambda d: d['scored'] and d['score_diff'] < 0, per_player_game_shots)) gsl['w_losing_g'] = len(whilst_losing_goals) # goals scored whilst the team was winning whilst_winning_goals = list( filter(lambda d: d['scored'] and d['score_diff'] > 0, per_player_game_shots)) gsl['w_winning_g'] = len(whilst_winning_goals) gsl['goals_5v5_from_events'] = scorers_5v5.get(gsl['player_id'], 0) gsl['empty_net_goals'] = empty_net_goals.get(gsl['player_id'], 0) if gsl['player_id'] in assistants: single_assist_dict = assistants[gsl['player_id']] gsl['primary_assists'] = single_assist_dict.get('A1', 0) gsl['secondary_assists'] = single_assist_dict.get('A2', 0) gsl['pp_assists'] = single_assist_dict.get('PPA', 0) gsl['pp_primary_assists'] = single_assist_dict.get('PPA1', 0) gsl['pp_secondary_assists'] = single_assist_dict.get('PPA2', 0) gsl['pp_points'] += gsl['pp_assists'] gsl['sh_assists'] = single_assist_dict.get('SHA', 0) gsl['sh_points'] += gsl['sh_assists'] gsl['assists_5v5'] = single_assist_dict.get('5v5A', 0) gsl['primary_assists_5v5'] = single_assist_dict.get('5v5A1', 0) gsl['secondary_assists_5v5'] = single_assist_dict.get('5v5A2', 0) # calculating primary points gsl['primary_points'] = gsl['goals'] + gsl['primary_assists'] gsl['points_5v5'] = gsl['goals_5v5_from_events'] + gsl['assists_5v5'] gsl['primary_points_5v5'] = (gsl['goals_5v5_from_events'] + gsl['primary_assists_5v5']) # adding penalty information to player's game stat line if gsl['player_id'] in penalties: single_penalty_dict = penalties[gsl['player_id']] gsl['penalties'] = single_penalty_dict.get('penalties', 0) gsl['pim_from_events'] = single_penalty_dict.get('pim', 0) for l in [2, 5, 10, 20]: gsl["_%dmin" % l] = single_penalty_dict['durations'].get(l, 0) gsl['penalty_shots'] = single_penalty_dict.get('penalty_shots') for category in PENALTY_CATEGORIES: gsl[category] = single_penalty_dict['categories'].get( category, 0) # adding linemate information to player's game stat line defense_linemates, forward_linemates, line = get_linemates(gsl, game) gsl['line'] = line gsl['defense'] = defense_linemates gsl['forwards'] = forward_linemates for shot_zone in [ 'slot', 'left', 'right', 'blue_line', 'neutral_zone', 'behind_goal' ]: shots_from_zone = list( filter(lambda d: d['shot_zone'] == shot_zone.upper(), per_player_game_shots)) gsl["%s_shots" % shot_zone] = len(shots_from_zone) missed_from_zone = list( filter(lambda d: d['target_type'] == 'missed', shots_from_zone)) gsl["%s_missed" % shot_zone] = len(missed_from_zone) blocked_from_zone = list( filter(lambda d: d['target_type'] == 'blocked', shots_from_zone)) gsl["%s_blocked" % shot_zone] = len(blocked_from_zone) shots_on_goal_from_zone = list( filter(lambda d: d['target_type'] == 'on_goal', shots_from_zone)) gsl["%s_on_goal" % shot_zone] = len(shots_on_goal_from_zone) goals_from_zone = list( filter(lambda d: d['scored'], shots_on_goal_from_zone)) gsl["%s_goals" % shot_zone] = len(goals_from_zone) gsl['game_score'] = round( 0.75 * gsl['goals'] + 0.7 * gsl['primary_assists'] + 0.55 * gsl['secondary_assists'] + 0.075 * gsl['shots_on_goal'] + 0.05 * gsl['blocked_shots'] - 0.15 * gsl['penalties'] + 0.01 * gsl['faceoffs_won'] - 0.01 * gsl['faceoffs_lost'] + 0.05 * gsl['on_ice_sh_f'] - 0.05 * gsl['on_ice_sh_a'] + 0.15 * gsl['on_ice_goals_f'] - 0.15 * gsl['on_ice_goals_a'], 2) gsl = retrieve_detailed_faceoff_stats(gsl, faceoffs) if 'shootout' in period_events and period_events['shootout']: gsl = retrieve_shootout_stats(gsl, period_events['shootout']) if 'so_gw_goals' not in gsl: gsl['so_gw_goals'] = 0 return game_stat_lines
# retrieving set of games we already have retrieved player stats for registered_games = set([shot['game_id'] for shot in all_shots]) cnt = 0 for game in games[:]: cnt += 1 # skipping already processed games if game['game_id'] in registered_games: continue print("+ Retrieving shots for game %s " % get_game_info(game)) # collecting skater situation for each second of the game and a list # of times when goals has been scored times, goal_times = reconstruct_skater_situation(game) game_type = get_game_type_from_season_type(game) # retrieving raw shot data shots_src_path = os.path.join(CONFIG['base_data_dir'], 'shots', str(game['season']), str(game_type), "%d.json" % game['game_id']) if not os.path.isfile(shots_src_path): print("+ Skipping game since shot data is unavailable") continue shifts_src_path = os.path.join(CONFIG['base_data_dir'], 'shifts', str(game['season']), str(game_type), "%d.json" % game['game_id']) events_src_path = os.path.join(CONFIG['base_data_dir'], 'game_events', str(game['season']), str(game_type),
def get_single_game_team_data(game, grouped_shot_data, pp_sit_data): """ Retrieves statistics for both teams participating in specified game. """ game_stat_lines = list() game_id = game['game_id'] home_id = game['home_id'] road_id = game['road_id'] game_type = get_game_type_from_season_type(game) home_stats_src_path = os.path.join(CONFIG['base_data_dir'], 'game_team_stats', str(game['season']), str(game_type), "%d_%d.json" % (game_id, home_id)) road_stats_src_path = os.path.join(CONFIG['base_data_dir'], 'game_team_stats', str(game['season']), str(game_type), "%d_%d.json" % (game_id, road_id)) # loading raw team game stats (if available) raw_stats = dict() if os.path.isfile(home_stats_src_path): raw_stats['home'] = json.loads(open(home_stats_src_path).read()) else: raw_stats['home'] = dict() if os.path.isfile(road_stats_src_path): raw_stats['road'] = json.loads(open(road_stats_src_path).read()) else: raw_stats['road'] = dict() # counting penalties per team penalty_counts = get_penalty_counts(game) for key in ['home', 'road']: opp_key = 'road' if key == 'home' else 'home' game_stat_line = dict() # basic game information game_stat_line['game_date'] = game['date'] game_stat_line['weekday'] = game['weekday'] game_stat_line['season'] = game['season'] game_stat_line['season_type'] = game['season_type'] game_stat_line['round'] = game['round'] game_stat_line['game_id'] = game_id game_stat_line['team_id'] = game["%s_id" % key] game_stat_line['team'] = game["%s_abbr" % key] game_stat_line['opp_team_id'] = game["%s_id" % opp_key] game_stat_line['opp_team'] = game["%s_abbr" % opp_key] # identifying team's and opposing team's division (if # applicable for current season and season type) if (game['season'], game['season_type']) in divisions: current_divisions = divisions[game['season'], game['season_type']] game_stat_line['division'] = current_divisions[ game_stat_line['team']] game_stat_line['opp_division'] = current_divisions[ game_stat_line['opp_team']] # TODO: reactivate when schedule game id is available again # game_stat_line['schedule_game_id'] = game['schedule_game_id'] game_stat_line['arena'] = correct_name(game['arena']) game_stat_line['attendance'] = game['attendance'] if game_stat_line['arena'] in capacities: game_stat_line['capacity'] = capacities[game_stat_line['arena']] else: print("\t+ Unable to retrieve capacity " + "for '%s'" % game_stat_line['arena']) game_stat_line['capacity'] = 0 # coaches and referees if "%s_coach" % key in game: game_stat_line['coach'] = correct_name(game["%s_coach" % key], game['date']) if game_stat_line['coach'] not in coaches: print("+ Unknown coach '%s'" % game_stat_line['coach']) else: print("\t+ No coach information found for %s in game %d" % (game_stat_line['team'], game_id)) game_stat_line['coach'] = correct_name( "%d_%s" % (game_id, game_stat_line['team'])) print("\t+ Adjusted to '%s'" % game_stat_line['coach']) if "%s_coach" % opp_key in game: game_stat_line['opp_coach'] = correct_name( game["%s_coach" % opp_key], game['date']) if game_stat_line['opp_coach'] not in coaches: print("+ Unknown coach '%s'" % game_stat_line['opp_coach']) else: print("\t+ No opposition coach information found " + "for %s in game %d" % (game_stat_line['opp_team'], game_id)) game_stat_line['opp_coach'] = correct_name( "%d_%s" % (game_id, game_stat_line['opp_team'])) print("\t+ Adjusted to '%s'" % game_stat_line['opp_coach']) game_stat_line['ref_1'] = correct_name(game['referee_1']) game_stat_line['ref_2'] = correct_name(game['referee_2']) game_stat_line['lma_1'] = correct_name(game['linesman_1']) game_stat_line['lma_2'] = correct_name(game['linesman_2']) # outcomes game_stat_line['games_played'] = 1 game_stat_line['home_road'] = key game_stat_line['score'] = game["%s_score" % key] game_stat_line['goals'] = game["%s_score" % key] game_stat_line['opp_score'] = game["%s_score" % opp_key] game_stat_line['opp_goals'] = game["%s_score" % opp_key] # optionally correcting game scores if game_id in game_score_corrections: for team_abbr in game_score_corrections[game_id]: if game_stat_line['team'] == team_abbr: game_stat_line['score'] = game_score_corrections[game_id][ team_abbr] if game_stat_line['opp_team'] == team_abbr: game_stat_line['opp_score'] = game_score_corrections[ game_id][team_abbr] if game['shootout_game']: game_stat_line['game_type'] = 'SO' elif game['overtime_game']: game_stat_line['game_type'] = 'OT' else: game_stat_line['game_type'] = '' for gsl_key in ['w', 'rw', 'ow', 'sw', 'l', 'rl', 'ol', 'sl']: game_stat_line[gsl_key] = 0 if game_stat_line['score'] > game_stat_line['opp_score']: game_stat_line['w'] += 1 if game['shootout_game']: game_stat_line['sw'] += 1 game_stat_line['goals'] -= 1 elif game['overtime_game']: game_stat_line['ow'] += 1 else: game_stat_line['rw'] += 1 else: game_stat_line['l'] += 1 if game['shootout_game']: game_stat_line['sl'] += 1 game_stat_line['opp_goals'] -= 1 elif game['overtime_game']: game_stat_line['ol'] += 1 else: game_stat_line['rl'] += 1 game_stat_line['points'] = (game_stat_line['rw'] * 3 + game_stat_line['ow'] * 2 + game_stat_line['sw'] * 2 + game_stat_line['sl'] * 1 + game_stat_line['ol'] * 1) # per-period goals for period in [1, 2, 3]: game_stat_line["goals_%d" % period] = game["%s_goals_%d" % (key, period)] game_stat_line["opp_goals_%d" % period] = game["%s_goals_%d" % (opp_key, period)] # empty-net and extra-attacker goals game_stat_line['en_goals'] = game["%s_en_goals" % key] game_stat_line['ea_goals'] = game["%s_ea_goals" % key] game_stat_line['opp_en_goals'] = game["%s_en_goals" % opp_key] game_stat_line['opp_ea_goals'] = game["%s_ea_goals" % opp_key] # situation after 20 and 40 minutes respectively for situation in [ 'tied20', 'lead20', 'trail20', 'tied40', 'lead40', 'trail40' ]: game_stat_line[situation] = False if game_stat_line['goals_1'] == game_stat_line['opp_goals_1']: game_stat_line['tied20'] = True elif game_stat_line['goals_1'] > game_stat_line['opp_goals_1']: game_stat_line['lead20'] = True else: game_stat_line['trail20'] = True goals40 = game_stat_line['goals_1'] + game_stat_line['goals_2'] opp_goals40 = (game_stat_line['opp_goals_1'] + game_stat_line['opp_goals_2']) if goals40 == opp_goals40: game_stat_line['tied40'] = True elif goals40 > opp_goals40: game_stat_line['lead40'] = True else: game_stat_line['trail40'] = True # scored first? if game['first_goal'] == game_stat_line['team']: game_stat_line['scored_first'] = True game_stat_line['trailed_first'] = False elif game['first_goal'] == game_stat_line['opp_team']: game_stat_line['scored_first'] = False game_stat_line['trailed_first'] = True # one-goal, two-goal, three-goal, four-goal-game? for goal_game in ['one_goal', 'two_goal', 'three_goal', 'four_goal']: game_stat_line[goal_game] = False score_diff = abs( (game_stat_line['score'] - game_stat_line['en_goals']) - (game_stat_line['opp_score'] - game_stat_line['opp_en_goals'])) # in case the right amount of empty-net goals have been scored, we # may end up with a score differential of zero, see game between STR # and ING on Mar 3, 2019 if not score_diff: game_stat_line['zero_goal'] = True if score_diff == 1: game_stat_line['one_goal'] = True elif score_diff == 2: game_stat_line['two_goal'] = True elif score_diff == 3: game_stat_line['three_goal'] = True elif score_diff > 3: game_stat_line['four_goal'] = True # retrieving score state time spans for current team game_stat_line['time_played'] = game['time_played'] game_stat_line['tied'] = game['tied'] game_stat_line['tied_pctg'] = round( game['tied'] / game['time_played'] * 100, 2) if key == 'home': game_stat_line['leading'] = game['home_leading'] game_stat_line['trailing'] = game['road_leading'] else: game_stat_line['leading'] = game['road_leading'] game_stat_line['trailing'] = game['home_leading'] game_stat_line['leading_pctg'] = round( game_stat_line['leading'] / game['time_played'] * 100, 2) game_stat_line['trailing_pctg'] = round( game_stat_line['trailing'] / game['time_played'] * 100, 2) # retrieving raw stats for team and opposing team for category, raw_category in RAW_STATS_MAPPING: game_stat_line[category] = raw_stats[key].get(raw_category, None) game_stat_line["opp_%s" % category] = raw_stats[opp_key].get( raw_category, None) # checking number of power play goals retrieved from team stats with those registered in event data game_stat_line = check_pp_goals(game, key, opp_key, game_stat_line) # calculating shooting percentages if game_stat_line['shots_on_goal']: game_stat_line['shot_pctg'] = round( game_stat_line['goals'] / game_stat_line['shots_on_goal'] * 100., 2) else: game_stat_line['shot_pctg'] = None if game_stat_line['opp_shots_on_goal']: game_stat_line['opp_shot_pctg'] = round( game_stat_line['opp_goals'] / game_stat_line['opp_shots_on_goal'] * 100., 2) else: game_stat_line['opp_shot_pctg'] = None # calculating save percentages if game_stat_line['opp_shots_on_goal']: game_stat_line['save_pctg'] = round( 100 - game_stat_line['opp_goals'] / game_stat_line['opp_shots_on_goal'] * 100., 2) else: game_stat_line['save_pctg'] = None if game_stat_line['shots_on_goal']: game_stat_line['opp_save_pctg'] = round( 100 - game_stat_line['goals'] / game_stat_line['shots_on_goal'] * 100., 2) else: game_stat_line['opp_save_pctg'] = None # calculating pdo values if (game_stat_line['shot_pctg'] is not None and game_stat_line['save_pctg'] is not None): game_stat_line['pdo'] = round( (game_stat_line['shot_pctg'] + game_stat_line['save_pctg']), 1) game_stat_line['opp_pdo'] = round( (game_stat_line['opp_shot_pctg'] + game_stat_line['opp_save_pctg']), 1) # calculating power play percentages if game_stat_line['pp_opps']: game_stat_line['pp_pctg'] = round( (game_stat_line['pp_goals'] / game_stat_line['pp_opps']) * 100., 1) else: game_stat_line['pp_pctg'] = 0 if game_stat_line['opp_pp_opps']: game_stat_line['opp_pp_pctg'] = round( (game_stat_line['opp_pp_goals'] / game_stat_line['opp_pp_opps']) * 100., 1) else: game_stat_line['opp_pp_pctg'] = 0 # calculating penalty killing percentages if game_stat_line['sh_opps']: game_stat_line['pk_pctg'] = round( 100 - game_stat_line['opp_pp_goals'] / game_stat_line['sh_opps'] * 100., 1) else: game_stat_line['pk_pctg'] = 0 if game_stat_line['opp_sh_opps']: game_stat_line['opp_pk_pctg'] = round( 100 - game_stat_line['pp_goals'] / game_stat_line['opp_sh_opps'] * 100., 1) else: game_stat_line['opp_pk_pctg'] = 0 game_stat_line['ev_goals'] = (game_stat_line['goals'] - game_stat_line['pp_goals'] - game_stat_line['sh_goals']) game_stat_line['opp_ev_goals'] = (game_stat_line['opp_goals'] - game_stat_line['opp_pp_goals'] - game_stat_line['opp_sh_goals']) # faceoffs are treated separately since each of the team game stats # datasets only contains the number of won faceoffs and sometimes this # one is stored as a string (wtf?) game_stat_line['faceoffs_won'] = int(raw_stats[key].get( 'faceOffsWon', 0)) game_stat_line['faceoffs_lost'] = int(raw_stats[opp_key].get( 'faceOffsWon', 0)) # calculating overall number of faceoffs and faceoff percentage game_stat_line['faceoffs'] = (game_stat_line['faceoffs_won'] + game_stat_line['faceoffs_lost']) if game_stat_line['faceoffs']: game_stat_line['faceoff_pctg'] = round( game_stat_line['faceoffs_won'] / game_stat_line['faceoffs'] * 100., 1) else: game_stat_line['faceoff_pctg'] = 0. # best players game_stat_line['best_plr_id'] = game.get("%s_best_player_id" % key, None) game_stat_line['best_plr'] = game.get("%s_best_player" % key, None) game_stat_line['opp_best_plr_id'] = game.get( "%s_best_player_id" % opp_key, None) game_stat_line['opp_best_plr'] = game.get("%s_best_player" % opp_key, None) # game-winning-goal game_stat_line['gw_goal_team'] = game['gw_goal'] game_stat_line['gw_goal_player_id'] = game['gw_goal_player_id'] game_stat_line['gw_goal_first_name'] = game['gw_goal_first_name'] game_stat_line['gw_goal_last_name'] = game['gw_goal_last_name'] shot_zones_to_retain = ['slot', 'left', 'right', 'blue_line'] shot_situations_to_retain = [ 'shots_ev', 'shots_5v5', 'shots_pp', 'shots_sh', 'shots_unblocked', 'shots_unblocked_ev', 'shots_unblocked_5v5', 'shots_unblocked_pp', 'shots_unblocked_sh', 'shots_on_goal_ev', 'shots_on_goal_5v5', 'shots_on_goal_pp', 'shots_on_goal_sh', 'goals_5v5', 'hit_post' ] # retrieving shot data for current game and team shot_data = grouped_shot_data.get((game_id, game_stat_line['team']), list()) for item in shot_data: if item.startswith(tuple(shot_zones_to_retain)): abbr_item = item for zone_key, replacement in SHOT_ZONE_ABBREVIATIONS.items(): abbr_item = abbr_item.replace(zone_key, replacement) game_stat_line[abbr_item] = shot_data[item] elif item in shot_situations_to_retain: game_stat_line[item] = shot_data[item] # retrieving shots against data for current game and team shot_against_data = grouped_shot_data.get( (game_id, game_stat_line['opp_team']), list()) for item in shot_against_data: if item.startswith(tuple(shot_zones_to_retain)): abbr_item = item for zone_key, replacement in SHOT_ZONE_ABBREVIATIONS.items(): abbr_item = abbr_item.replace(zone_key, replacement) game_stat_line["%s_a" % abbr_item] = shot_against_data[item] elif item in shot_situations_to_retain: game_stat_line["opp_%s" % item] = shot_against_data[item] try: game_stat_line['ev_cf_pctg'] = round( game_stat_line['shots_ev'] / (game_stat_line['shots_ev'] + game_stat_line['opp_shots_ev']) * 100, 2) except KeyError: print("\t+Unable to calculate even strength shots for percentage") game_stat_line['ev_cf_pctg'] = None for penalty_duration in [2, 5, 10, 20]: if penalty_counts[key] and penalty_duration in penalty_counts[key]: game_stat_line["penalty_%d" % penalty_duration] = ( penalty_counts[key][penalty_duration]) else: game_stat_line["penalty_%d" % penalty_duration] = 0 game_stat_line['pp_5v4'] = pp_sit_data[key]['pp_sits']['5v4'] game_stat_line['pp_5v3'] = pp_sit_data[key]['pp_sits']['5v3'] game_stat_line['pp_4v3'] = pp_sit_data[key]['pp_sits']['4v3'] game_stat_line['ppg_5v4'] = pp_sit_data[key]['pp_goals']['5v4'] game_stat_line['ppg_5v3'] = pp_sit_data[key]['pp_goals']['5v3'] game_stat_line['ppg_4v3'] = pp_sit_data[key]['pp_goals']['4v3'] game_stat_line['opp_pp_5v4'] = pp_sit_data[opp_key]['pp_sits']['5v4'] game_stat_line['opp_pp_5v3'] = pp_sit_data[opp_key]['pp_sits']['5v3'] game_stat_line['opp_pp_4v3'] = pp_sit_data[opp_key]['pp_sits']['4v3'] game_stat_line['opp_ppg_5v4'] = pp_sit_data[opp_key]['pp_goals']['5v4'] game_stat_line['opp_ppg_5v3'] = pp_sit_data[opp_key]['pp_goals']['5v3'] game_stat_line['opp_ppg_4v3'] = pp_sit_data[opp_key]['pp_goals']['4v3'] # opp_diff = game_stat_line['pp_opps'] - ( # game_stat_line['pp_5v4'] + # game_stat_line['pp_5v3'] + # game_stat_line['pp_4v3'] # ) # if opp_diff: # print("\tpp opp discrepancy of %d for %s" % (opp_diff, key)) # registering shootout stats (if applicable) shootout_stats = get_shootout_stats(game, key, opp_key) if shootout_stats: game_stat_line = {**game_stat_line, **shootout_stats} game_stat_lines.append(game_stat_line) return game_stat_lines