def _get_corsi_timeline_title(season, game): """ Returns the default chart title for corsi timelines. :param season: int, the season :param game: int, the game :return: str, the title """ otso_str = schedules.get_game_result(season, game) if otso_str[:2] == 'OT' or otso_str[:2] == 'SO': otso_str = ' ({0:s})'.format(otso_str[:2]) else: otso_str = '' # Add strings to a list then join them together with newlines titletext = ( 'Shot attempt timeline for {0:d}-{1:s} Game {2:d} ({3:s})'.format( int(season), str(int(season + 1))[2:], int(game), schedules.get_game_date(season, game)), '{0:s} {1:d} at {2:s} {3:d}{4:s} ({5:s})'.format( team_info.team_as_str(schedules.get_road_team(season, game), abbreviation=False), schedules.get_road_score(season, game), team_info.team_as_str(schedules.get_home_team(season, game), abbreviation=False), schedules.get_home_score(season, game), otso_str, schedules.get_game_status(season, game))) return '\n'.join(titletext)
def _get_cf_for_timeline(season, game, homeroad, granularity='sec'): """ Returns a dataframe with columns for time and cumulative CF :param season: int, the season :param game: int, the game :param homeroad: str, 'H' for home and 'R' for road :param granularity: can respond in minutes ('min'), or seconds ('sec'), elapsed in game :return: a dataframe with two columns """ pbp = parse_pbp.get_parsed_pbp(season, game) pbp = manip.filter_for_corsi(pbp) if homeroad == 'H': teamid = schedules.get_home_team(season, game) elif homeroad == 'R': teamid = schedules.get_road_team(season, game) pbp = pbp[pbp.Team == teamid] maxtime = len(parse_toi.get_parsed_toi(season, game)) df = pd.DataFrame({'Time': list(range(maxtime))}) df = df.merge(pbp[['Time']].assign(CF=1), how='left', on='Time') # df.loc[:, 'Time'] = df.Time + 1 df.loc[:, 'CF'] = df.CF.fillna(0) df.loc[:, 'CumCF'] = df.CF.cumsum() # Now let's shift things down. Right now a shot at 30 secs will mean Time = 0 has CumCF = 1. if granularity == 'min': df.loc[:, 'Time'] = df.Time // 60 df = df.groupby('Time').max().reset_index() # I want it soccer style, so Time = 0 always has CumCF = 0, and that first shot at 30sec will register for Time=1 df = pd.concat([pd.DataFrame({'Time': [-1], 'CumCF': [0], 'CF': [0]}), df]) df.loc[:, 'Time'] = df.Time + 1 # But because of this, in case of OT or other last-second goals, need to add 1 to the end df = pd.concat([df, pd.DataFrame({'Time': [df.Time.max() + 1]})]) df = df.fillna(method='ffill') # For every shot, want to plot a point as if that shot hadn't happened, and then one where it did # So every segment of chart has either slope 0 or infinite #shot_mins = df.query('CF > 0') #shot_mins.loc[:, 'CumCF'] = shot_mins.CumCF - shot_mins.CF #df = pd.concat([df, shot_mins]).sort_values(['Time', 'CumCF']) df = df.drop('CF', axis=1) return df
def _get_game_h2h_chart_title(season, game, homecf_diff=None, totaltoi=None): """ Returns the title for the H2H chart :param season: int, the season :param game: int, the game :param homecf_diff: int. The home team corsi advantage :param totaltoi: int. The TOI played so far. :return: """ titletext = [] # Note if a game was OT or SO otso_str = schedules.get_game_result(season, game) if otso_str[:2] == 'OT' or otso_str[:2] == 'SO': otso_str = ' ({0:s})'.format(otso_str[:2]) else: otso_str = '' # Add strings to a list then join them together with newlines titletext.append('H2H Corsi and TOI for {0:d}-{1:s} Game {2:d}'.format( season, str(season + 1)[2:], game)) titletext.append('{0:s} {1:d} at {2:s} {3:d}{4:s} ({5:s})'.format( team_info.team_as_str(schedules.get_road_team(season, game), abbreviation=False), schedules.get_road_score(season, game), team_info.team_as_str(schedules.get_home_team(season, game), abbreviation=False), schedules.get_home_score(season, game), otso_str, schedules.get_game_status(season, game))) if homecf_diff is not None and totaltoi is not None: titletext.append('{0:s} {1:s} in 5v5 attempts in {2:s}'.format( team_info.team_as_str(schedules.get_home_team(season, game)), visualization_helper.format_number_with_plus(int(homecf_diff)), manip.time_to_mss(int(totaltoi)))) return '\n'.join(titletext)
def get_goals_for_timeline(season, game, homeroad, granularity='sec'): """ Returns a list of goal times :param season: int, the season :param game: int, the game :param homeroad: str, 'H' for home and 'R' for road :param granularity: can respond in minutes ('min'), or seconds ('sec'), elapsed in game :return: a list of int, seconds elapsed """ pbp = parse_pbp.get_parsed_pbp(season, game) if homeroad == 'H': teamid = schedules.get_home_team(season, game) elif homeroad == 'R': teamid = schedules.get_road_team(season, game) pbp = pbp[pbp.Team == teamid] if granularity == 'min': pbp.loc[:, 'Time'] = pbp.Time / 60 goals = pbp[pbp.Event == 'Goal'].sort_values('Time') return list(goals.Time)
def add_onice_players_to_df(df, focus_team, season, gamecol, player_output='ids'): """ Uses the _Secs column in df, the season, and the gamecol to join onto on-ice players. :param df: dataframe :param focus_team: str or int, team to focus on. Its players will be listed in first in sheet. :param season: int, the season :param gamecol: str, the column with game IDs :param player_output: str, use 'names' or 'nums' or 'ids'. Currently 'nums' is not supported. :return: dataframe with team and opponent players """ toi = teams.get_team_toi(season, focus_team).rename(columns={ 'Time': '_Secs' }).drop_duplicates() toi = toi[[ 'Game', '_Secs', 'Team1', 'Team2', 'Team3', 'Team4', 'Team5', 'Team6', 'Opp1', 'Opp2', 'Opp3', 'Opp4', 'Opp5', 'Opp6' ]].rename(columns={'Game': gamecol}) # Rename columns toi = toi.rename( columns={ col: '{0:s}{1:s}'.format(focus_team, col[-1]) for col in toi.columns if len(col) >= 4 and col[:4] == 'Team' }) joined = df.merge(toi, how='left', on=['_Secs', gamecol]) # Print missing games by finding nulls in Opp1 # If I actually do have the TOI (which may not have made it into the team log b/c of missing PBP), then use that missings = set(joined[pd.isnull(joined.Opp1)].Game.unique()) hassome = set(joined[pd.notnull(joined.Opp1)].Game.unique()) for game in missings: if game in hassome: print( 'Missing some (not all) data to join on-ice players for {0:d}'. format(int(round(game)))) else: # See if I have its TOI try: gametoi = parse_toi.get_parsed_toi(season, int(round(game))) \ .rename(columns={'Time': '_Secs'}).drop_duplicates() \ .drop({'HomeStrength', 'RoadStrength', 'HG', 'RG'}, axis=1) # Now that I do, need to switch column names, get players in right format, and join hname = team_info.team_as_str( schedules.get_home_team(season, int(round(game)))) if hname == focus_team: gametoi = gametoi.rename(columns={ 'H' + str(x): focus_team + str(x) for x in range(1, 7) }) gametoi = gametoi.rename(columns={ 'R' + str(x): 'Opp' + str(x) for x in range(1, 7) }) else: gametoi = gametoi.rename(columns={ 'R' + str(x): focus_team + str(x) for x in range(1, 7) }) gametoi = gametoi.rename(columns={ 'H' + str(x): 'Opp' + str(x) for x in range(1, 7) }) gametoi = gametoi.assign(Game=int(round(game))) joined = helpers.fill_join(joined, gametoi, on=['_Secs', gamecol]) continue except OSError: pass print('Missing all data to join on-ice players for {0:d}'.format( int(round(game)))) print('Check scrape / parse status and game number') # Now convert to names or numbers for col in joined.columns[-12:]: if player_output == 'ids': pass elif player_output == 'names': joined.loc[:, col] = players.playerlst_as_str( pd.to_numeric(joined[col])) elif player_output == 'nums': pass # TODO return joined.drop('_Secs', axis=1)
def _game_h2h_chart(season, game, corsi, toi, orderh, orderr, numf_h=None, numf_r=None, save_file=None): """ This method actually does the plotting for game_h2h :param season: int, the season :param game: int, the game :param :param corsi: df of P1, P2, Corsi +/- for P1 :param toi: df of P1, P2, H2H TOI :param orderh: list of float, player order on y-axis, top to bottom :param orderr: list of float, player order on x-axis, left to right :param numf_h: int. Number of forwards for home team. Used to add horizontal bold line between F and D :param numf_r: int. Number of forwards for road team. Used to add vertical bold line between F and D. :param save_file: str of file to save the figure to, or None to simply display :return: nothing """ hname = team_info.team_as_str(schedules.get_home_team(season, game), True) homename = team_info.team_as_str(schedules.get_home_team(season, game), False) rname = team_info.team_as_str(schedules.get_road_team(season, game), True) roadname = team_info.team_as_str(schedules.get_road_team(season, game), False) fig, ax = plt.subplots(1, figsize=[11, 7]) # Convert dataframes to coordinates horderdf = pd.DataFrame({ 'PlayerID1': orderh[::-1], 'Y': list(range(len(orderh))) }) rorderdf = pd.DataFrame({ 'PlayerID2': orderr, 'X': list(range(len(orderr))) }) plotdf = toi.merge(corsi, how='left', on=['PlayerID1', 'PlayerID2']) \ .merge(horderdf, how='left', on='PlayerID1') \ .merge(rorderdf, how='left', on='PlayerID2') # Hist2D of TOI # I make the bins a little weird so my coordinates are centered in them. Otherwise, they're all on the edges. _, _, _, image = ax.hist2d(x=plotdf.X, y=plotdf.Y, bins=(np.arange(-0.5, len(orderr) + 0.5, 1), np.arange(-0.5, len(orderh) + 0.5, 1)), weights=plotdf.Min, cmap=plt.cm.summer) # Convert IDs to names and label axes and axes ticks ax.set_xlabel(roadname) ax.set_ylabel(homename) xorder = players.playerlst_as_str(orderr) yorder = players.playerlst_as_str( orderh)[::-1] # need to go top to bottom, so reverse order ax.set_xticks(range(len(xorder))) ax.set_yticks(range(len(yorder))) ax.set_xticklabels(xorder, fontsize=10, rotation=45, ha='right') ax.set_yticklabels(yorder, fontsize=10) ax.set_xlim(-0.5, len(orderr) - 0.5) ax.set_ylim(-0.5, len(orderh) - 0.5) # Hide the little ticks on the axes by setting their length to 0 ax.tick_params(axis='both', which='both', length=0) # Add dividing lines between rows for x in np.arange(0.5, len(orderr) - 0.5, 1): ax.plot([x, x], [-0.5, len(orderh) - 0.5], color='k') for y in np.arange(0.5, len(orderh) - 0.5, 1): ax.plot([-0.5, len(orderr) - 0.5], [y, y], color='k') # Add a bold line between F and D. if numf_r is not None: ax.plot([numf_r - 0.5, numf_r - 0.5], [-0.5, len(orderh) - 0.5], color='k', lw=3) if numf_h is not None: ax.plot([-0.5, len(orderr) - 0.5], [len(orderh) - numf_h - 0.5, len(orderh) - numf_h - 0.5], color='k', lw=3) # Colorbar for TOI cbar = fig.colorbar(image, pad=0.1) cbar.ax.set_ylabel('TOI (min)') # Add trademark cbar.ax.set_xlabel('Muneeb Alam\n@muneebalamcu', labelpad=20) # Add labels for Corsi and circle negatives neg_x = [] neg_y = [] for y in range(len(orderh)): hpid = orderh[len(orderh) - y - 1] for x in range(len(orderr)): rpid = orderr[x] cf = corsi[(corsi.PlayerID1 == hpid) & (corsi.PlayerID2 == rpid)] if len( cf ) == 0: # In this case, player will not have been on ice for a corsi event cf = 0 else: cf = int(cf.HomeCorsi.iloc[0]) if cf == 0: cf = '0' elif cf > 0: cf = '+' + str( cf) # Easier to pick out positives with plus sign else: cf = str(cf) neg_x.append(x) neg_y.append(y) ax.annotate(cf, xy=(x, y), ha='center', va='center') # Circle negative numbers by making a scatterplot with black edges and transparent faces ax.scatter(neg_x, neg_y, marker='o', edgecolors='k', s=200, facecolors='none') # Add TOI and Corsi totals at end of rows/columns topax = ax.twiny() topax.set_xticks(range(len(xorder))) rtotals = pd.DataFrame({'PlayerID2': orderr}) \ .merge(toi[['PlayerID2', 'Secs']].groupby('PlayerID2').sum().reset_index(), how='left', on='PlayerID2') \ .merge(corsi[['PlayerID2', 'HomeCorsi']].groupby('PlayerID2').sum().reset_index(), how='left', on='PlayerID2') rtotals.loc[:, 'HomeCorsi'] = rtotals.HomeCorsi.fillna(0) rtotals.loc[:, 'CorsiLabel'] = rtotals.HomeCorsi.apply( lambda x: visualization_helper.format_number_with_plus(-1 * int(x / 5) )) rtotals.loc[:, 'TOILabel'] = rtotals.Secs.apply( lambda x: manip.time_to_mss(x / 5)) toplabels = [ '{0:s} in {1:s}'.format(x, y) for x, y, in zip(list(rtotals.CorsiLabel), list(rtotals.TOILabel)) ] ax.set_xticks(range(len(xorder))) topax.set_xticklabels(toplabels, fontsize=6, rotation=45, ha='left') topax.set_xlim(-0.5, len(orderr) - 0.5) topax.tick_params(axis='both', which='both', length=0) rightax = ax.twinx() rightax.set_yticks(range(len(yorder))) htotals = pd.DataFrame({'PlayerID1': orderh[::-1]}) \ .merge(toi[['PlayerID1', 'Secs']].groupby('PlayerID1').sum().reset_index(), how='left', on='PlayerID1') \ .merge(corsi[['PlayerID1', 'HomeCorsi']].groupby('PlayerID1').sum().reset_index(), how='left', on='PlayerID1') htotals.loc[:, 'HomeCorsi'] = htotals.HomeCorsi.fillna(0) htotals.loc[:, 'CorsiLabel'] = htotals.HomeCorsi.apply( lambda x: visualization_helper.format_number_with_plus(int(x / 5))) htotals.loc[:, 'TOILabel'] = htotals.Secs.apply( lambda x: manip.time_to_mss(x / 5)) rightlabels = [ '{0:s} in {1:s}'.format(x, y) for x, y, in zip(list(htotals.CorsiLabel), list(htotals.TOILabel)) ] rightax.set_yticks(range(len(yorder))) rightax.set_yticklabels(rightlabels, fontsize=6) rightax.set_ylim(-0.5, len(orderh) - 0.5) rightax.tick_params(axis='both', which='both', length=0) # plt.subplots_adjust(top=0.80) # topax.set_ylim(-0.5, len(orderh) - 0.5) # Add brief explanation for the top left cell at the bottom explanation = [] row1name = yorder.iloc[-1] col1name = xorder.iloc[0] timeh2h = int(toi[(toi.PlayerID1 == orderh[0]) & (toi.PlayerID2 == orderr[0])].Secs.iloc[0]) shoth2h = int(corsi[(corsi.PlayerID1 == orderh[0]) & (corsi.PlayerID2 == orderr[0])].HomeCorsi.iloc[0]) explanation.append( 'The top left cell indicates {0:s} (row 1) faced {1:s} (column 1) for {2:s}.' .format(row1name, col1name, manip.time_to_mss(timeh2h))) if shoth2h == 0: explanation.append( 'During that time, {0:s} and {1:s} were even in attempts.'.format( hname, rname)) elif shoth2h > 0: explanation.append( 'During that time, {0:s} out-attempted {1:s} by {2:d}.'.format( hname, rname, shoth2h)) else: explanation.append( 'During that time, {1:s} out-attempted {0:s} by {2:d}.'.format( hname, rname, -1 * shoth2h)) explanation = '\n'.join(explanation) # Hacky way to annotate: add this to x-axis label ax.set_xlabel(ax.get_xlabel() + '\n\n' + explanation) plt.subplots_adjust(bottom=0.27) plt.subplots_adjust(left=0.17) plt.subplots_adjust(top=0.82) plt.subplots_adjust(right=1.0) # Add title plt.title(_get_game_h2h_chart_title(season, game, corsi.HomeCorsi.sum() / 25, toi.Secs.sum() / 25), y=1.1, va='bottom') plt.gcf().canvas.set_window_title('{0:d} {1:d} H2H.png'.format( season, game)) # fig.tight_layout() if save_file is None: plt.show() elif save_file == 'fig': return plt.gcf() else: plt.savefig(save_file) return None
def game_timeline(season, game, save_file=None): """ Creates a shot attempt timeline as seen on @muneebalamcu :param season: int, the season :param game: int, the game :param save_file: str, specify a valid filepath to save to file. If None, merely shows on screen. Specify 'fig' to return the figure :return: nothing, or the figure """ hname = team_info.team_as_str(schedules.get_home_team(season, game)) rname = team_info.team_as_str(schedules.get_road_team(season, game)) cf = { hname: _get_home_cf_for_timeline(season, game), rname: _get_road_cf_for_timeline(season, game) } pps = { hname: _get_home_adv_for_timeline(season, game), rname: _get_road_adv_for_timeline(season, game) } gs = { hname: _get_home_goals_for_timeline(season, game), rname: _get_road_goals_for_timeline(season, game) } colors = { hname: plt.rcParams['axes.prop_cycle'].by_key()['color'][0], rname: plt.rcParams['axes.prop_cycle'].by_key()['color'][1] } darkercolors = { team: visualization_helper.make_color_darker(hex=col) for team, col in colors.items() } # Create two axes. Use bottom (mins) for labeling but top (secs) for plotting ax = plt.gca() ax2 = ax.twiny() # Corsi lines for team in cf: ax2.plot(cf[team].Time, cf[team].CumCF, label=team, color=colors[team]) # Label goal counts when scored with diamonds for team in gs: xs, ys = _goal_times_to_scatter_for_timeline(gs[team], cf[team]) ax2.scatter(xs, ys, edgecolors='k', marker='D', label='{0:s} goal'.format(team), zorder=3, color=colors[team]) # Bold lines to separate periods _, ymax = ax2.get_ylim() for x in range(0, cf[hname].Time.max(), 1200): ax2.plot([x, x], [0, ymax], color='k', lw=2) # PP highlighting # Note that axvspan works in relative coords (0 to 1), so need to divide by ymax for team in pps: for pptype in pps[team]: if pptype[-2:] == '+1': colors_to_use = colors else: colors_to_use = darkercolors for i, (start, end) in enumerate(pps[team][pptype]): cf_at_time_min = cf[team].loc[ cf[team].Time == start].CumCF.max() # in case there are multiple cf_at_time_max = cf[team][cf[team].Time == end].CumCF.max() if i == 0: ax2.axvspan(start, end, ymin=cf_at_time_min / ymax, ymax=cf_at_time_max / ymax, alpha=0.5, facecolor=colors_to_use[team], label='{0:s} {1:s}'.format(team, pptype)) else: ax2.axvspan(start, end, ymin=cf_at_time_min / ymax, ymax=cf_at_time_max / ymax, alpha=0.5, facecolor=colors[team]) ax2.axvspan(start, end, ymin=0, ymax=0.05, alpha=0.5, facecolor=colors_to_use[team]) # Set limits ax2.set_xlim(0, cf[hname].Time.max()) ax2.set_ylim(0, ymax) ax.set_ylabel('Cumulative CF') plt.legend(loc=2, framealpha=0.5, fontsize=8) # Ticks every 10 min on bottom axis; none on top axis ax.set_xlim(0, cf[hname].Time.max() / 60) ax.set_xticks(range(0, cf[hname].Time.max() // 60 + 1, 10)) ax.set_xlabel('Time elapsed in game (min)') ax2.set_xticks([]) # Set title plt.title(_get_corsi_timeline_title(season, game)) plt.gcf().canvas.set_window_title('{0:d} {1:d} TL.png'.format( season, game)) if save_file is None: plt.show() elif save_file == 'fig': return plt.gcf() else: plt.savefig(save_file) plt.close() return None
def on_success(self, data): if 'text' in data: print(data['text']) if r'https://t.co/' in data['text']: print('This looks like an image') return if data['text'][:3] == 'RT ': print('This looks like a retweet') return global LAST_UPDATE, SCRAPED_NEW try: if player_cf_graphs(data): return try: season, gameid = games.find_playoff_game(data['text']) except ValueError: season = None gameid = None # Get season with a 4-digit regex if season is None: text = data['text'] + ' ' if re.search(r'\s\d{4}\s', text) is not None: season = int(re.search(r'\s\d{4}\s', text).group(0)) if season < 2015 or season > schedules.get_current_season(): tweet_error("Sorry, I don't have data for this season yet", data) print('Invalid season') return else: season = schedules.get_current_season() # Get game with a 5-digit regex if gameid is None: if re.search(r'\s\d{5}\s', text) is not None: gameid = int(re.search(r'\s\d{5}\s', text).group(0)) if not schedules.check_valid_game(season, gameid): tweet_error("Sorry, this game ID doesn't look right", data) print('Game ID not right') return else: pass if gameid is None: # Get team names parts = data['text'].replace('@h2hbot', '').strip().split(' ') teams = [] for part in parts: if re.match(r'[A-z]{3}', part.strip()): part = part.upper() if team_info.team_as_id(part) is not None: teams.append(part) if len(teams) == 0: print('Think this was a tagged discussion') return elif len(teams) != 2: tweet_error("Sorry, I need 2 teams. Found {0:d}. Make sure abbreviations are correct" .format(len(teams)), data) return team1, team2 = teams[:2] gameid = games.most_recent_game_id(team1, team2) h2hfile = 'bot/{0:d}0{1:d}h2h.png'.format(season, gameid) tlfile = 'bot/{0:d}0{1:d}tl.png'.format(season, gameid) oldstatus = schedules.get_game_status(season, gameid) # Scrape only if: # Game is in current season AND # Game is today, and my schedule says it's "scheduled", OR # Game is today, and my schedule doesn't say it's final yet, and it's been at least # 5 min since last scrape, OR # Game was before today and my schedule doesn't say "final" # Update in these cases scrapeagain = False if season == schedules.get_current_season(): today = datetime.datetime.now().strftime('%Y-%m-%d') gdata = schedules.get_game_data_from_schedule(season, gameid) if gdata['Date'] == today: if gdata['Status'] == 'Scheduled': scrapeagain = True elif gdata['Status'] != 'Final' and \ (LAST_UPDATE is None or time.time() - LAST_UPDATE >= 60 * 5): scrapeagain = True elif gdata['Date'] < today and gdata['Status'] != 'Final': scrapeagain = True if scrapeagain: autoupdate.autoupdate(season, update_team_logs=False) LAST_UPDATE = time.time() SCRAPED_NEW = True hname = schedules.get_home_team(season, gameid) rname = schedules.get_road_team(season, gameid) status = schedules.get_game_status(season, gameid) if 'In Progress' in oldstatus or status != oldstatus or not os.path.exists(tlfile): try: game_timeline.game_timeline(season, gameid, save_file=tlfile) game_h2h.game_h2h(season, gameid, save_file=h2hfile) tweet_game_images(h2hfile, tlfile, hname, rname, status, data) print('Success!') except Exception as e: print(data['text'], time.time(), e, e.args) tweet_error("Sorry, there was an unknown error while making the charts (cc @muneebalamcu)", data) except Exception as e: print('Unexpected error') print(time.time(), data['text'], e, e.args)