def parse_event(event): """ Parses a single event when the info is in a json format :param event: json of event :return: dictionary with the info """ play = dict() play['period'] = event['about']['period'] play['event'] = str(change_event_name(event['result']['eventTypeId'])) play['seconds_elapsed'] = shared.convert_to_seconds(event['about']['periodTime']) # If there's a players key that means an event occurred on the play. if 'players' in event.keys(): play['p1_name'] = shared.fix_name(event['players'][0]['player']['fullName']) play['p1_ID'] = event['players'][0]['player']['id'] for i in range(len(event['players'])): if event['players'][i]['playerType'] != 'Goalie': play['p{}_name'.format(i + 1)] = shared.fix_name(event['players'][i]['player']['fullName'].upper()) play['p{}_ID'.format(i + 1)] = event['players'][i]['player']['id'] # Coordinates aren't always there try: play['xC'] = event['coordinates']['x'] play['yC'] = event['coordinates']['y'] except KeyError: play['xC'] = '' play['yC'] = '' return play
def combine_players_lists(json_players, roster_players, game_id): """ Combine the json list of players (which contains id's) with the list in the roster html :param json_players: dict of all players with id's :param roster_players: dict with home and and away keys for players :param game_id: id of game :return: dict containing home and away keys -> which contains list of info on each player """ home_players = dict() for player in roster_players['Home']: try: name = shared.fix_name(player[2]) id = json_players[name]['id'] home_players[name] = {'id': id, 'number': player[0]} except KeyError: # This usually means it's the backup goalie (who didn't play) so it's no big deal with them if player[1] != 'G': players_missing_ids.extend([player, game_id]) home_players[name] = {'id': 'NA', 'number': player[0]} away_players = dict() for player in roster_players['Away']: try: name = shared.fix_name(player[2]) id = json_players[name]['id'] away_players[name] = {'id': id, 'number': player[0]} except KeyError: if player[1] != 'G': players_missing_ids.extend([player, game_id]) away_players[name] = {'id': 'NA', 'number': player[0]} return {'Home': home_players, 'Away': away_players}
def parse_event(event): """ Parses a single event when the info is in a json format :param event: json of event :return: dictionary with the info """ play = dict() play['period'] = event['about']['period'] play['event'] = str(change_event_name(event['result']['eventTypeId'])) play['seconds_elapsed'] = shared.convert_to_seconds( event['about']['periodTime']) # If there's a players key that means an event occurred on the play. if 'players' in event.keys(): play['p1_name'] = shared.fix_name( event['players'][0]['player']['fullName']) play['p1_ID'] = event['players'][0]['player']['id'] for i in range(len(event['players'])): if event['players'][i]['playerType'] != 'Goalie': play['p{}_name'.format(i + 1)] = shared.fix_name( event['players'][i]['player']['fullName'].upper()) play['p{}_ID'.format(i + 1)] = event['players'][i]['player']['id'] # Coordinates aren't always there try: play['xC'] = event['coordinates']['x'] play['yC'] = event['coordinates']['y'] except KeyError: play['xC'] = '' play['yC'] = '' """ # Sometimes they record events for shots in the wrong zone (or maybe not)...so change it if play['xC'] != 'Na' and play['yC'] != 'Na': if play['Ev_Team'] == home_team: # X should be negative in 1st and 3rd for home_team if (play['Period'] == 1 or play['Period'] == 3) and play['xC'] > 0: play['xC'] = -int(play['xC']) play['yC'] = -int(play['yC']) elif play['Period'] == 2 and play['xC'] < 0: play['xC'] = -int(play['xC']) play['yC'] = -int(play['yC']) else: # X should be positive in 1st and 3rd for away_team if (play['Period'] == 1 or play['Period'] == 3) and play['xC'] < 0: play['xC'] = -int(play['xC']) play['yC'] = -int(play['yC']) elif play['Period'] == 2 and play['xC'] > 0: play['xC'] = -int(play['xC']) play['yC'] = -int(play['yC']) """ return play
def parse_shift(shift): """ Parse shift for json :param shift: json for shift :return: dict with shift info """ shift_dict = dict() name = shared.fix_name(' '.join([ shift['firstName'].strip(' ').upper(), shift['lastName'].strip(' ').upper() ])) shift_dict['Player'] = name shift_dict['Player_Id'] = shift['playerId'] shift_dict['Period'] = shift['period'] shift_dict['Team'] = fix_team_tricode(shift['teamAbbrev']) # At the end of the json they list when all the goal events happened. They are the only one's which have their # eventDescription be not null if shift['eventDescription'] is None: shift_dict['Start'] = shared.convert_to_seconds(shift['startTime']) shift_dict['End'] = shared.convert_to_seconds(shift['endTime']) shift_dict['Duration'] = shared.convert_to_seconds(shift['duration']) else: shift_dict = dict() return shift_dict
def parse_html(html, player_ids, game_id): """ Parse the html :param html: cleaned up html :param player_ids: dict of home and away players :param game_id: :return: DataFrame with info """ columns = [ 'Game_Id', 'Player', 'Player_Id', 'Period', 'Team', 'Start', 'End', 'Duration' ] df = pd.DataFrame(columns=columns) soup = BeautifulSoup(html.content, "lxml") teams = get_teams(soup) team = teams[0] home_team = teams[1] td = soup.findAll( True, {'class': ['playerHeading + border', 'lborder + bborder']}) """ The list 'td' is laid out with player name followed by every component of each shift. Each shift contains: shift #, Period, begin, end, and duration. The shift event isn't included. """ players = dict() for t in td: t = t.get_text() if ',' in t: # If it has a comma in it we know it's a player's name...so add player to dict name = t # Just format the name normally...it's coded as: 'num last_name, first_name' name = name.split(',') name = ' '.join([name[1].strip(' '), name[0][2:].strip(' ')]) name = shared.fix_name(name) players[name] = dict() players[name]['number'] = name[0][:2].strip() players[name]['Shifts'] = [] else: # Here we add all the shifts to whatever player we are up to players[name]['Shifts'].extend([t]) for key in players.keys(): # Create a list of lists (each length 5)...corresponds to 5 columns in html shifts players[key]['Shifts'] = [ players[key]['Shifts'][i:i + 5] for i in range(0, len(players[key]['Shifts']), 5) ] shifts = [ analyze_shifts(shift, key, team, home_team, player_ids) for shift in players[key]['Shifts'] ] df = df.append(shifts, ignore_index=True) df['Game_Id'] = str(game_id) return df
def get_players_json(json): """ Return dict of players for that game :param json: gameData section of json :return: dict of players->keys are the name (in uppercase) """ players = dict() players_json = json['players'] for key in players_json.keys(): name = shared.fix_name(players_json[key]['fullName'].upper()) players[name] = {'id': ' '} try: players[name]['id'] = players_json[key]['id'] except KeyError: print(name, ' is missing an ID number') players[name]['id'] = 'NA' return players
def parse_event(event, players, home_team, if_plays_in_json, current_score): """ Receievs an event and parses it :param event: :param players: players in game :param home_team: :param if_plays_in_json: If the pbp json contains the plays :param current_score: current score for both teams :return: dict with info """ event_dict = dict() away_players = event[6] home_players = event[7] try: event_dict['Period'] = int(event[1]) except ValueError: event_dict['Period'] = 0 event_dict['Description'] = event[5] event_dict['Event'] = str(event[4]) if event_dict['Event'] in [ 'GOAL', 'SHOT', 'MISS', 'BLOCK', 'PENL', 'FAC', 'HIT', 'TAKE', 'GIVE' ]: event_dict['Ev_Team'] = event[5].split( )[0] # Split the description and take the first thing (which is the team) # If it's a goal change the score if event[4] == 'GOAL': if event_dict['Ev_Team'] == home_team: current_score['Home'] += 1 else: current_score['Away'] += 1 event_dict['Home_Score'] = current_score['Home'] event_dict['Away_Score'] = current_score['Away'] # Populate away and home player info for j in range(6): try: name = shared.fix_name(away_players[j][0].upper()) event_dict['awayPlayer{}'.format(j + 1)] = name event_dict['awayPlayer{}_id'.format( j + 1)] = players['Away'][name]['id'] except KeyError: event_dict['awayPlayer{}_id'.format(j + 1)] = 'NA' except IndexError: event_dict['awayPlayer{}'.format(j + 1)] = '' event_dict['awayPlayer{}_id'.format(j + 1)] = '' try: name = shared.fix_name(home_players[j][0].upper()) event_dict['homePlayer{}'.format(j + 1)] = name event_dict['homePlayer{}_id'.format( j + 1)] = players['Home'][name]['id'] except KeyError: event_dict['homePlayer{}_id'.format(j + 1)] = 'NA' except IndexError: event_dict['homePlayer{}'.format(j + 1)] = '' event_dict['homePlayer{}_id'.format(j + 1)] = '' # Did this because above method assumes the goalie is at end of player list for x in away_players: if x[2] == 'G': event_dict['Away_Goalie'] = shared.fix_name(x[0].upper()) try: event_dict['Away_Goalie_Id'] = players['Away'][ event_dict['Away_Goalie']]['id'] except KeyError: event_dict['Away_Goalie_Id'] = 'NA' else: event_dict['Away_Goalie'] = '' event_dict['Away_Goalie_Id'] = '' for x in home_players: if x[2] == 'G': event_dict['Home_Goalie'] = shared.fix_name(x[0].upper()) try: event_dict['Home_Goalie_Id'] = players['Home'][ event_dict['Home_Goalie']]['id'] except KeyError: event_dict['Home_Goalie_Id'] = 'NA' else: event_dict['Home_Goalie'] = '' event_dict['Home_Goalie_Id'] = '' event_dict['Away_Players'] = len(away_players) event_dict['Home_Players'] = len(home_players) try: home_skaters = event_dict['Home_Players'] - 1 if event_dict[ 'Home_Goalie'] != '' else len(home_players) away_skaters = event_dict['Away_Players'] - 1 if event_dict[ 'Away_Goalie'] != '' else len(away_players) except KeyError: # Getting a key error here means that home/away goalie isn't there..which means home/away players are empty home_skaters = 0 away_skaters = 0 event_dict['Strength'] = 'x'.join([str(home_skaters), str(away_skaters)]) event_dict['Ev_Zone'] = which_zone(event[5]) if 'PENL' in event[4]: event_dict['Type'] = get_penalty(event[5]) else: event_dict['Type'] = shot_type(event[5]).upper() event_dict['Time_Elapsed'] = str(event[3]) if event[3] != '': event_dict['Seconds_Elapsed'] = shared.convert_to_seconds(event[3]) else: event_dict['Seconds_Elapsed'] = '' # I like getting the event players from the json if not if_plays_in_json: if event_dict['Event'] in [ 'GOAL', 'SHOT', 'MISS', 'BLOCK', 'PENL', 'FAC', 'HIT', 'TAKE', 'GIVE' ]: event_dict.update(get_event_players( event, players, home_team)) # Add players involves in event return [event_dict, current_score]
def populate_players(event_dict, players, away_players, home_players): """ Populate away and home player info (and num skaters on each side) NOTE: Could probably do this in a much neater way... :param event_dict: dict with event info :param players: all players in game and info :param away_players: players for away team :param home_players: players for home team :return: None """ for j in range(6): try: name = shared.fix_name(away_players[j][0].upper()) event_dict['awayPlayer{}'.format(j + 1)] = name event_dict['awayPlayer{}_id'.format( j + 1)] = players['Away'][name]['id'] except KeyError: event_dict['awayPlayer{}_id'.format(j + 1)] = 'NA' except IndexError: event_dict['awayPlayer{}'.format(j + 1)] = '' event_dict['awayPlayer{}_id'.format(j + 1)] = '' try: name = shared.fix_name(home_players[j][0].upper()) event_dict['homePlayer{}'.format(j + 1)] = name event_dict['homePlayer{}_id'.format( j + 1)] = players['Home'][name]['id'] except KeyError: event_dict['homePlayer{}_id'.format(j + 1)] = 'NA' except IndexError: event_dict['homePlayer{}'.format(j + 1)] = '' event_dict['homePlayer{}_id'.format(j + 1)] = '' # Did this because above method assumes the goalie is at end of player list for x in away_players: if x[2] == 'G': event_dict['Away_Goalie'] = shared.fix_name(x[0].upper()) try: event_dict['Away_Goalie_Id'] = players['Away'][ event_dict['Away_Goalie']]['id'] except KeyError: event_dict['Away_Goalie_Id'] = 'NA' else: event_dict['Away_Goalie'] = '' event_dict['Away_Goalie_Id'] = '' for x in home_players: if x[2] == 'G': event_dict['Home_Goalie'] = shared.fix_name(x[0].upper()) try: event_dict['Home_Goalie_Id'] = players['Home'][ event_dict['Home_Goalie']]['id'] except KeyError: event_dict['Home_Goalie_Id'] = 'NA' else: event_dict['Home_Goalie'] = '' event_dict['Home_Goalie_Id'] = '' event_dict['Away_Players'] = len(away_players) event_dict['Home_Players'] = len(home_players)