def get_team_combo_corsi(season, team, games, n_players=2): """ Gets combo Corsi for team for specified games :param season: int, the season :param team: int or str, team :param games: int or iterable of int, games :param n_players: int. E.g. 1 gives you player TOI, 2 gives you 2-player group TOI, 3 makes 3-player groups, etc :return: dataframe """ if helpers.check_number(games): games = [games] teamid = team_info.team_as_id(team) corsi = teams.get_team_pbp(season, team) corsi = corsi.assign(_Secs=corsi.Time) \ .merge(pd.DataFrame({'Game': games}), how='inner', on='Game') \ .pipe(manip.filter_for_five_on_five) \ .pipe(manip.filter_for_corsi) \ [['Game', 'Time', 'Team', '_Secs']] \ .pipe(onice.add_onice_players_to_df, focus_team=team, season=season, gamecol='Game') cols_to_drop = ['Opp{0:d}'.format(i) for i in range(1, 7)] + ['{0:s}6'.format(team_info.team_as_str(team))] corsi = corsi.drop(cols_to_drop, axis=1) \ .pipe(helpers.melt_helper, id_vars=['Game', 'Time', 'Team'], var_name='P', value_name='PlayerID') \ .drop('P', axis=1) corsi2 = None for i in range(n_players): corsitemp = corsi.rename(columns={'PlayerID': 'PlayerID' + str(i+1)}) if corsi2 is None: corsi2 = corsitemp else: corsi2 = corsi2.merge(corsitemp, how='inner', on=['Game', 'Time', 'Team']) # Assign CF and CA teamid = team_info.team_as_id(team) corsi2.loc[:, 'CF'] = corsi2.Team.apply(lambda x: 1 if x == teamid else 0) corsi2.loc[:, 'CA'] = corsi2.Team.apply(lambda x: 0 if x == teamid else 1) corsi2 = corsi2.drop({'Game', 'Time', 'Team'}, axis=1) # Group by players and count groupcols = ['PlayerID' + str(i+1) for i in range(n_players)] grouped = corsi2 \ .groupby(groupcols, as_index=False) \ .sum() \ .rename(columns={'Time': 'Secs'}) # Convert to all columns allcombos = manip.convert_to_all_combos(grouped, 0, *groupcols) return allcombos
def find_recent_games(team1, team2=None, limit=1): """ A convenience function that lists the most recent in progress or final games for specified team(s) :param team1: str, a team :param team2: str, a team (optional) :param limit: How many games to return :return: df with relevant rows """ sch = schedules.get_season_schedule(schedules.get_current_season()) sch = sch[sch.Status != "Scheduled"] t1 = team_info.team_as_id(team1) sch = sch[(sch.Home == t1) | (sch.Road == t1)] if team2 is not None: t2 = team_info.team_as_id(team2) sch = sch[(sch.Home == t2) | (sch.Road == t2)] return sch.sort_values('Game', ascending=False).iloc[:limit, :]
def find_recent_games(team1, team2=None, limit=1, season=None): """ A convenience function that lists the most recent in progress or final games for specified team(s) :param team1: str, a team :param team2: str, a team (optional) :param limit: How many games to return :param season: int, the season :return: df with relevant rows """ if season is None: season = schedules.get_current_season() sch = schedules.get_season_schedule(season) #sch = sch[sch.Status != "Scheduled"] # doesn't work if data hasn't been updated sch = sch[sch.Date <= datetime.datetime.now().strftime('%Y-%m-%d')] t1 = team_info.team_as_id(team1) sch = sch[(sch.Home == t1) | (sch.Road == t1)] if team2 is not None: t2 = team_info.team_as_id(team2) sch = sch[(sch.Home == t2) | (sch.Road == t2)] return sch.sort_values('Game', ascending=False).iloc[:limit, :]
def get_road_team(season, game, returntype='id'): """ Returns the road team from this game :param season: int, the game :param game: int, the season :param returntype: str, 'id' or 'name' :return: float or str, depending on returntype """ road = get_game_data_from_schedule(season, game)['Road'] if returntype.lower() == 'id': return team_info.team_as_id(road) else: return team_info.team_as_str(road)
def filter_5v5_for_team(df, **kwargs): """ This method filters the given dataframe for given team(s), if specified :param df: dataframe :param kwargs: relevant one is team :return: dataframe, filtered for specified players """ if 'team' in kwargs: teamid = team_info.team_as_id(kwargs['team']) df2 = df.query("TeamID == {0:d}".format(teamid)) return df2 return df
def _add_onice_players_to_df(df, focus_team, season, gamecol, player_output): """ Uses the _Secs column in df, the season, and the gamecol to join onto on-ice players. :param df: dataframe :param focus_team: str or int, team to focus on. Its players will be listed in first in sheet. :param season: int, the season :param gamecol: str, the column with game IDs :param player_output: str, use 'names' or 'nums'. Currently only 'names' is supported. :return: dataframe with team and opponent players """ teamid = team_info.team_as_id(focus_team) teamname = team_info.team_as_str(focus_team) toi = teams.get_team_toi(season, focus_team).rename(columns={'Time': '_Secs'}) toi = toi[[ 'Game', '_Secs', 'Team1', 'Team2', 'Team3', 'Team4', 'Team5', 'Team6', 'Opp1', 'Opp2', 'Opp3', 'Opp4', 'Opp5', 'Opp6' ]] # Now convert to names or numbers for col in toi.columns[-12:]: toi.loc[:, col] = players.playerlst_as_str(toi[col]) if player_output == 'nums': pass # TODO # Rename columns toi = toi.rename( columns={ col: '{0:s}{1:s}'.format(focus_team, col[-1]) for col in toi.columns if len(col) >= 4 and col[:4] == 'Team' }) joined = df.merge(toi, how='left', on=['_Secs', 'Game']).drop('_Secs', axis=1) return joined
def update_player_log_file(playerids, seasons, games, teams, statuses): """ Updates the player log file with given players. The player log file notes which players played in which games and whether they were scratched or played. :param playerids: int or str or list of int :param seasons: int, the season, or list of int the same length as playerids :param games: int, the game, or list of int the same length as playerids :param teams: str or int, the team, or list of int the same length as playerids :param statuses: str, or list of str the same length as playerids :return: nothing """ # Change everything to lists first if need be if isinstance(playerids, int) or isinstance(playerids, str): playerids = player_as_id(playerids) playerids = [playerids] if helpers.check_number(seasons): seasons = [seasons for _ in range(len(playerids))] if helpers.check_number(games): games = [games for _ in range(len(playerids))] if helpers.check_types(teams): teams = team_info.team_as_id(teams) teams = [teams for _ in range(len(playerids))] if isinstance(statuses, str): statuses = [statuses for _ in range(len(playerids))] df = pd.DataFrame({ 'ID': playerids, # Player ID 'Team': teams, # Team 'Status': statuses, # P for played, S for scratch. 'Season': seasons, # Season 'Game': games }) # Game if len(get_player_log_file()) == 1: # In this case, the only entry is our original entry for Ovi, that sets the datatypes properly write_player_log_file(df) else: write_player_log_file(pd.concat([get_player_log_file(), df]))
def get_team_schedule(season=None, team=None, startdate=None, enddate=None): """ Gets the schedule for given team in given season. Or if startdate and enddate are specified, searches between those dates. If season and startdate (and/or enddate) are specified, searches that season between those dates. :param season: int, the season :param team: int or str, the team :param startdate: str, YYYY-MM-DD :param enddate: str, YYYY-MM-DD :return: dataframe """ # TODO handle case when only team and startdate, or only team and enddate, are given if season is not None: df = get_season_schedule(season).query('Status != "Scheduled"') if startdate is not None: df = df.query('Date >= "{0:s}"'.format(startdate)) if enddate is not None: df = df.query('Date <= "{0:s}"'.format(enddate)) tid = team_info.team_as_id(team) return df[(df.Home == tid) | (df.Road == tid)] if startdate is not None and enddate is not None: dflst = [] startseason = helpers.infer_season_from_date(startdate) endseason = helpers.infer_season_from_date(enddate) for season in range(startseason, endseason + 1): df = get_team_schedule(season, team) \ .query('Status != "Scheduled"') \ .assign(Season=season) if season == startseason: df = df.query('Date >= "{0:s}"'.format(startdate)) if season == endseason: df = df.query('Date <= "{0:s}"'.format(enddate)) dflst.append(df) df = pd.concat(dflst) return df
def team_score_shot_rate_scatter(team, startseason, endseason=None, save_file=None): """ :param team: str or int, team :param startseason: int, the starting season (inclusive) :param endseason: int, the ending season (inclusive) :return: nothing """ if endseason is None: endseason = startseason df = pd.concat([ manip.team_5v5_shot_rates_by_score(season) for season in range(startseason, endseason + 1) ]) df.loc[:, 'ScoreState'] = df.ScoreState.apply( lambda x: max(min(3, x), -3)) # reduce to +/- 3 df = df.drop('Game', axis=1) \ .groupby(['Team', 'ScoreState'], as_index=False) \ .sum() df.loc[:, 'CF60'] = df.CF * 3600 / df.Secs df.loc[:, 'CA60'] = df.CA * 3600 / df.Secs # get medians medians = df[['ScoreState', 'CF60', 'CA60', 'Secs']].groupby('ScoreState', as_index=False).median() # filter for own team teamdf = df.query('Team == {0:d}'.format(int(team_info.team_as_id(team)))) statelabels = { x: 'Lead {0:d}'.format(x) if x >= 1 else 'Trail {0:d}'.format(abs(x)) for x in range(-3, 4) } statelabels[0] = 'Tied' for state in range(-3, 4): teamxy = teamdf.query('ScoreState == {0:d}'.format(state)) teamx = teamxy.CF60.iloc[0] teamy = teamxy.CA60.iloc[0] leaguexy = medians.query('ScoreState == {0:d}'.format(state)) leaguex = leaguexy.CF60.iloc[0] leaguey = leaguexy.CA60.iloc[0] midx = (leaguex + teamx) / 2 midy = (leaguey + teamy) / 2 rot = _calculate_label_rotation(leaguex, leaguey, teamx, teamy) plt.annotate('', xy=(teamx, teamy), xytext=(leaguex, leaguey), xycoords='data', arrowprops={'arrowstyle': '-|>'}) plt.annotate(statelabels[state], xy=(midx, midy), ha="center", va="center", xycoords='data', size=8, rotation=rot, bbox=dict(boxstyle="round", fc="w", alpha=0.9)) plt.scatter(medians.CF60.values, medians.CA60.values, s=100, color='w') plt.scatter(teamdf.CF60.values, teamdf.CA60.values, s=100, color='w') #bbox_props = dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9) #plt.annotate('Fast', xy=(0.95, 0.95), xycoords='axes fraction', bbox=bbox_props, ha='center', va='center') #plt.annotate('Slow', xy=(0.05, 0.05), xycoords='axes fraction', bbox=bbox_props, ha='center', va='center') #plt.annotate('Good', xy=(0.95, 0.05), xycoords='axes fraction', bbox=bbox_props, ha='center', va='center') #plt.annotate('Bad', xy=(0.05, 0.95), xycoords='axes fraction', bbox=bbox_props, ha='center', va='center') vhelper.add_good_bad_fast_slow() plt.xlabel('CF60') plt.ylabel('CA60') plt.title(_team_score_shot_rate_scatter_title(team, startseason, endseason)) if save_file is None: plt.show() else: plt.savefig(save_file)
def get_dpair_shot_rates(team, startdate, enddate): """ Gets CF/60 and CA/60 by defenseman duo (5v5 only) for this team between given range of dates :param team: int or str, team :param startdate: str, start date :param enddate: str, end date (inclusive) :return: dataframe with PlayerID1, PlayerID2, CF, CA, TOI (in secs), CF/60 and CA/60 """ startseason, endseason = [ helper.infer_season_from_date(x) for x in (startdate, enddate) ] dflst = [] for season in range(startseason, endseason + 1): games_played = schedules.get_team_games(season, team, startdate, enddate) games_played = [g for g in games_played if g >= 20001 and g <= 30417] toi = manip.get_game_h2h_toi( season, games_played).rename(columns={'Secs': 'TOI'}) cf = manip.get_game_h2h_corsi(season, games_played, 'cf').rename(columns={'HomeCorsi': 'CF'}) ca = manip.get_game_h2h_corsi(season, games_played, 'ca').rename(columns={'HomeCorsi': 'CA'}) # TOI, CF, and CA have columns designating which team--H or R # Use schedule to find appropriate ones to filter for sch = schedules.get_team_schedule(season, team, startdate, enddate) sch = helper.melt_helper(sch[['Game', 'Home', 'Road']], id_vars='Game', var_name='HR', value_name='Team') sch = sch.query('Team == {0:d}'.format(int( team_info.team_as_id(team)))) sch.loc[:, 'HR'] = sch.HR.apply(lambda x: x[0]) sch = sch.assign(Team1=sch.HR, Team2=sch.HR).drop({'Team', 'HR'}, axis=1) toi = toi.merge(sch, how='inner', on=['Game', 'Team1', 'Team2']) cf = cf.merge(sch, how='inner', on=['Game', 'Team1', 'Team2']) ca = ca.merge(sch, how='inner', on=['Game', 'Team1', 'Team2']) # CF and CA from home perspective, so switch if necessary cfca = cf.merge( ca, how='outer', on=['Game', 'PlayerID1', 'PlayerID2', 'Team1', 'Team2']) cfca.loc[:, 'tempcf'] = cfca.CF cfca.loc[:, 'tempca'] = cfca.CA cfca.loc[cf.Team1 == 'R', 'CF'] = cfca[cfca.Team1 == 'R'].tempca cfca.loc[ca.Team1 == 'R', 'CA'] = cfca[cfca.Team1 == 'R'].tempcf cfca = cfca.drop({'Team1', 'Team2', 'tempcf', 'tempca'}, axis=1) toi = toi.drop({'Team1', 'Team2', 'Min'}, axis=1) joined = toi.merge(cfca, how='outer', on=['PlayerID1', 'PlayerID2', 'Game']) \ .assign(Season=season) dflst.append(joined) df = pd.concat(dflst) \ .groupby(['PlayerID1', 'PlayerID2'], as_index=False).sum() df.loc[:, 'CF60'] = df.CF * 3600 / df.TOI df.loc[:, 'CA60'] = df.CA * 3600 / df.TOI defensemen = players.get_player_ids_file().query('Pos == "D"')[['ID']] df = df.merge(defensemen.rename(columns={'ID': 'PlayerID1'}), how='inner', on='PlayerID1') \ .merge(defensemen.rename(columns={'ID': 'PlayerID2'}), how='inner', on='PlayerID2') return df
def on_success(self, data): if 'text' in data: print(data['text']) if r'https://t.co/' in data['text']: print('This looks like an image') return if data['text'][:3] == 'RT ': print('This looks like a retweet') return global LAST_UPDATE, SCRAPED_NEW try: if player_cf_graphs(data): return try: season, gameid = games.find_playoff_game(data['text']) except ValueError: season = None gameid = None # Get season with a 4-digit regex if season is None: text = data['text'] + ' ' if re.search(r'\s\d{4}\s', text) is not None: season = int(re.search(r'\s\d{4}\s', text).group(0)) if season < 2015 or season > schedules.get_current_season(): tweet_error("Sorry, I don't have data for this season yet", data) print('Invalid season') return else: season = schedules.get_current_season() # Get game with a 5-digit regex if gameid is None: if re.search(r'\s\d{5}\s', text) is not None: gameid = int(re.search(r'\s\d{5}\s', text).group(0)) if not schedules.check_valid_game(season, gameid): tweet_error("Sorry, this game ID doesn't look right", data) print('Game ID not right') return else: pass if gameid is None: # Get team names parts = data['text'].replace('@h2hbot', '').strip().split(' ') teams = [] for part in parts: if re.match(r'[A-z]{3}', part.strip()): part = part.upper() if team_info.team_as_id(part) is not None: teams.append(part) if len(teams) == 0: print('Think this was a tagged discussion') return elif len(teams) != 2: tweet_error("Sorry, I need 2 teams. Found {0:d}. Make sure abbreviations are correct" .format(len(teams)), data) return team1, team2 = teams[:2] gameid = games.most_recent_game_id(team1, team2) h2hfile = 'bot/{0:d}0{1:d}h2h.png'.format(season, gameid) tlfile = 'bot/{0:d}0{1:d}tl.png'.format(season, gameid) oldstatus = schedules.get_game_status(season, gameid) # Scrape only if: # Game is in current season AND # Game is today, and my schedule says it's "scheduled", OR # Game is today, and my schedule doesn't say it's final yet, and it's been at least # 5 min since last scrape, OR # Game was before today and my schedule doesn't say "final" # Update in these cases scrapeagain = False if season == schedules.get_current_season(): today = datetime.datetime.now().strftime('%Y-%m-%d') gdata = schedules.get_game_data_from_schedule(season, gameid) if gdata['Date'] == today: if gdata['Status'] == 'Scheduled': scrapeagain = True elif gdata['Status'] != 'Final' and \ (LAST_UPDATE is None or time.time() - LAST_UPDATE >= 60 * 5): scrapeagain = True elif gdata['Date'] < today and gdata['Status'] != 'Final': scrapeagain = True if scrapeagain: autoupdate.autoupdate(season, update_team_logs=False) LAST_UPDATE = time.time() SCRAPED_NEW = True hname = schedules.get_home_team(season, gameid) rname = schedules.get_road_team(season, gameid) status = schedules.get_game_status(season, gameid) if 'In Progress' in oldstatus or status != oldstatus or not os.path.exists(tlfile): try: game_timeline.game_timeline(season, gameid, save_file=tlfile) game_h2h.game_h2h(season, gameid, save_file=h2hfile) tweet_game_images(h2hfile, tlfile, hname, rname, status, data) print('Success!') except Exception as e: print(data['text'], time.time(), e, e.args) tweet_error("Sorry, there was an unknown error while making the charts (cc @muneebalamcu)", data) except Exception as e: print('Unexpected error') print(time.time(), data['text'], e, e.args)