Exemplo n.º 1
0
def get_team_combo_corsi(season, team, games, n_players=2):
    """
    Gets combo Corsi for team for specified games

    :param season: int, the season
    :param team: int or str, team
    :param games: int or iterable of int, games
    :param n_players: int. E.g. 1 gives you player TOI, 2 gives you 2-player group TOI, 3 makes 3-player groups, etc

    :return: dataframe
    """

    if helpers.check_number(games):
        games = [games]

    teamid = team_info.team_as_id(team)
    corsi = teams.get_team_pbp(season, team)
    corsi = corsi.assign(_Secs=corsi.Time) \
        .merge(pd.DataFrame({'Game': games}), how='inner', on='Game') \
        .pipe(manip.filter_for_five_on_five) \
        .pipe(manip.filter_for_corsi) \
        [['Game', 'Time', 'Team', '_Secs']] \
        .pipe(onice.add_onice_players_to_df, focus_team=team, season=season, gamecol='Game')
    cols_to_drop = ['Opp{0:d}'.format(i) for i in range(1, 7)] + ['{0:s}6'.format(team_info.team_as_str(team))]
    corsi = corsi.drop(cols_to_drop, axis=1) \
        .pipe(helpers.melt_helper, id_vars=['Game', 'Time', 'Team'], var_name='P', value_name='PlayerID') \
        .drop('P', axis=1)
    corsi2 = None
    for i in range(n_players):
        corsitemp = corsi.rename(columns={'PlayerID': 'PlayerID' + str(i+1)})
        if corsi2 is None:
            corsi2 = corsitemp
        else:
            corsi2 = corsi2.merge(corsitemp, how='inner', on=['Game', 'Time', 'Team'])

    # Assign CF and CA
    teamid = team_info.team_as_id(team)
    corsi2.loc[:, 'CF'] = corsi2.Team.apply(lambda x: 1 if x == teamid else 0)
    corsi2.loc[:, 'CA'] = corsi2.Team.apply(lambda x: 0 if x == teamid else 1)
    corsi2 = corsi2.drop({'Game', 'Time', 'Team'}, axis=1)

    # Group by players and count
    groupcols = ['PlayerID' + str(i+1) for i in range(n_players)]
    grouped = corsi2 \
        .groupby(groupcols, as_index=False) \
        .sum() \
        .rename(columns={'Time': 'Secs'})

    # Convert to all columns
    allcombos = manip.convert_to_all_combos(grouped, 0, *groupcols)
    return allcombos
Exemplo n.º 2
0
def find_recent_games(team1, team2=None, limit=1):
    """
    A convenience function that lists the most recent in progress or final games for specified team(s)

    :param team1: str, a team
    :param team2: str, a team (optional)
    :param limit: How many games to return

    :return: df with relevant rows
    """
    sch = schedules.get_season_schedule(schedules.get_current_season())
    sch = sch[sch.Status != "Scheduled"]

    t1 = team_info.team_as_id(team1)
    sch = sch[(sch.Home == t1) | (sch.Road == t1)]
    if team2 is not None:
        t2 = team_info.team_as_id(team2)
        sch = sch[(sch.Home == t2) | (sch.Road == t2)]

    return sch.sort_values('Game', ascending=False).iloc[:limit, :]
Exemplo n.º 3
0
def find_recent_games(team1, team2=None, limit=1, season=None):
    """
    A convenience function that lists the most recent in progress or final games for specified team(s)

    :param team1: str, a team
    :param team2: str, a team (optional)
    :param limit: How many games to return
    :param season: int, the season

    :return: df with relevant rows
    """
    if season is None:
        season = schedules.get_current_season()
    sch = schedules.get_season_schedule(season)
    #sch = sch[sch.Status != "Scheduled"]  # doesn't work if data hasn't been updated
    sch = sch[sch.Date <= datetime.datetime.now().strftime('%Y-%m-%d')]

    t1 = team_info.team_as_id(team1)
    sch = sch[(sch.Home == t1) | (sch.Road == t1)]
    if team2 is not None:
        t2 = team_info.team_as_id(team2)
        sch = sch[(sch.Home == t2) | (sch.Road == t2)]

    return sch.sort_values('Game', ascending=False).iloc[:limit, :]
Exemplo n.º 4
0
def get_road_team(season, game, returntype='id'):
    """
    Returns the road team from this game

    :param season: int, the game
    :param game: int, the season
    :param returntype: str, 'id' or 'name'

    :return: float or str, depending on returntype
    """
    road = get_game_data_from_schedule(season, game)['Road']
    if returntype.lower() == 'id':
        return team_info.team_as_id(road)
    else:
        return team_info.team_as_str(road)
Exemplo n.º 5
0
def filter_5v5_for_team(df, **kwargs):
    """
    This method filters the given dataframe for given team(s), if specified

    :param df: dataframe

    :param kwargs: relevant one is team

    :return: dataframe, filtered for specified players
    """

    if 'team' in kwargs:
        teamid = team_info.team_as_id(kwargs['team'])
        df2 = df.query("TeamID == {0:d}".format(teamid))
        return df2
    return df
Exemplo n.º 6
0
def _add_onice_players_to_df(df, focus_team, season, gamecol, player_output):
    """
    Uses the _Secs column in df, the season, and the gamecol to join onto on-ice players.

    :param df: dataframe
    :param focus_team: str or int, team to focus on. Its players will be listed in first in sheet.
    :param season: int, the season
    :param gamecol: str, the column with game IDs
    :param player_output: str, use 'names' or 'nums'. Currently only 'names' is supported.

    :return: dataframe with team and opponent players
    """

    teamid = team_info.team_as_id(focus_team)
    teamname = team_info.team_as_str(focus_team)

    toi = teams.get_team_toi(season,
                             focus_team).rename(columns={'Time': '_Secs'})
    toi = toi[[
        'Game', '_Secs', 'Team1', 'Team2', 'Team3', 'Team4', 'Team5', 'Team6',
        'Opp1', 'Opp2', 'Opp3', 'Opp4', 'Opp5', 'Opp6'
    ]]

    # Now convert to names or numbers
    for col in toi.columns[-12:]:
        toi.loc[:, col] = players.playerlst_as_str(toi[col])
        if player_output == 'nums':
            pass  # TODO

    # Rename columns
    toi = toi.rename(
        columns={
            col: '{0:s}{1:s}'.format(focus_team, col[-1])
            for col in toi.columns if len(col) >= 4 and col[:4] == 'Team'
        })

    joined = df.merge(toi, how='left', on=['_Secs', 'Game']).drop('_Secs',
                                                                  axis=1)

    return joined
Exemplo n.º 7
0
def update_player_log_file(playerids, seasons, games, teams, statuses):
    """
    Updates the player log file with given players. The player log file notes which players played in which games
    and whether they were scratched or played.

    :param playerids: int or str or list of int
    :param seasons: int, the season, or list of int the same length as playerids
    :param games: int, the game, or list of int the same length as playerids
    :param teams: str or int, the team, or list of int the same length as playerids
    :param statuses: str, or list of str the same length as playerids

    :return: nothing
    """

    # Change everything to lists first if need be
    if isinstance(playerids, int) or isinstance(playerids, str):
        playerids = player_as_id(playerids)
        playerids = [playerids]
    if helpers.check_number(seasons):
        seasons = [seasons for _ in range(len(playerids))]
    if helpers.check_number(games):
        games = [games for _ in range(len(playerids))]
    if helpers.check_types(teams):
        teams = team_info.team_as_id(teams)
        teams = [teams for _ in range(len(playerids))]
    if isinstance(statuses, str):
        statuses = [statuses for _ in range(len(playerids))]

    df = pd.DataFrame({
        'ID': playerids,  # Player ID
        'Team': teams,  # Team
        'Status': statuses,  # P for played, S for scratch.
        'Season': seasons,  # Season
        'Game': games
    })  # Game
    if len(get_player_log_file()) == 1:
        # In this case, the only entry is our original entry for Ovi, that sets the datatypes properly
        write_player_log_file(df)
    else:
        write_player_log_file(pd.concat([get_player_log_file(), df]))
Exemplo n.º 8
0
def get_team_schedule(season=None, team=None, startdate=None, enddate=None):
    """
    Gets the schedule for given team in given season. Or if startdate and enddate are specified, searches between
    those dates. If season and startdate (and/or enddate) are specified, searches that season between those dates.

    :param season: int, the season
    :param team: int or str, the team
    :param startdate: str, YYYY-MM-DD
    :param enddate: str, YYYY-MM-DD

    :return: dataframe
    """
    # TODO handle case when only team and startdate, or only team and enddate, are given
    if season is not None:
        df = get_season_schedule(season).query('Status != "Scheduled"')
        if startdate is not None:
            df = df.query('Date >= "{0:s}"'.format(startdate))
        if enddate is not None:
            df = df.query('Date <= "{0:s}"'.format(enddate))
        tid = team_info.team_as_id(team)
        return df[(df.Home == tid) | (df.Road == tid)]
    if startdate is not None and enddate is not None:
        dflst = []
        startseason = helpers.infer_season_from_date(startdate)
        endseason = helpers.infer_season_from_date(enddate)
        for season in range(startseason, endseason + 1):
            df = get_team_schedule(season, team) \
                .query('Status != "Scheduled"') \
                .assign(Season=season)
            if season == startseason:
                df = df.query('Date >= "{0:s}"'.format(startdate))
            if season == endseason:
                df = df.query('Date <= "{0:s}"'.format(enddate))
            dflst.append(df)
        df = pd.concat(dflst)
        return df
Exemplo n.º 9
0
def team_score_shot_rate_scatter(team,
                                 startseason,
                                 endseason=None,
                                 save_file=None):
    """

    :param team: str or int, team
    :param startseason: int, the starting season (inclusive)
    :param endseason: int, the ending season (inclusive)

    :return: nothing
    """

    if endseason is None:
        endseason = startseason

    df = pd.concat([
        manip.team_5v5_shot_rates_by_score(season)
        for season in range(startseason, endseason + 1)
    ])

    df.loc[:, 'ScoreState'] = df.ScoreState.apply(
        lambda x: max(min(3, x), -3))  # reduce to +/- 3
    df = df.drop('Game', axis=1) \
        .groupby(['Team', 'ScoreState'], as_index=False) \
        .sum()
    df.loc[:, 'CF60'] = df.CF * 3600 / df.Secs
    df.loc[:, 'CA60'] = df.CA * 3600 / df.Secs

    # get medians
    medians = df[['ScoreState', 'CF60', 'CA60',
                  'Secs']].groupby('ScoreState', as_index=False).median()

    # filter for own team
    teamdf = df.query('Team == {0:d}'.format(int(team_info.team_as_id(team))))

    statelabels = {
        x: 'Lead {0:d}'.format(x) if x >= 1 else 'Trail {0:d}'.format(abs(x))
        for x in range(-3, 4)
    }
    statelabels[0] = 'Tied'
    for state in range(-3, 4):
        teamxy = teamdf.query('ScoreState == {0:d}'.format(state))
        teamx = teamxy.CF60.iloc[0]
        teamy = teamxy.CA60.iloc[0]

        leaguexy = medians.query('ScoreState == {0:d}'.format(state))
        leaguex = leaguexy.CF60.iloc[0]
        leaguey = leaguexy.CA60.iloc[0]

        midx = (leaguex + teamx) / 2
        midy = (leaguey + teamy) / 2

        rot = _calculate_label_rotation(leaguex, leaguey, teamx, teamy)

        plt.annotate('',
                     xy=(teamx, teamy),
                     xytext=(leaguex, leaguey),
                     xycoords='data',
                     arrowprops={'arrowstyle': '-|>'})
        plt.annotate(statelabels[state],
                     xy=(midx, midy),
                     ha="center",
                     va="center",
                     xycoords='data',
                     size=8,
                     rotation=rot,
                     bbox=dict(boxstyle="round", fc="w", alpha=0.9))

    plt.scatter(medians.CF60.values, medians.CA60.values, s=100, color='w')
    plt.scatter(teamdf.CF60.values, teamdf.CA60.values, s=100, color='w')

    #bbox_props = dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9)
    #plt.annotate('Fast', xy=(0.95, 0.95), xycoords='axes fraction', bbox=bbox_props, ha='center', va='center')
    #plt.annotate('Slow', xy=(0.05, 0.05), xycoords='axes fraction', bbox=bbox_props, ha='center', va='center')
    #plt.annotate('Good', xy=(0.95, 0.05), xycoords='axes fraction', bbox=bbox_props, ha='center', va='center')
    #plt.annotate('Bad', xy=(0.05, 0.95), xycoords='axes fraction', bbox=bbox_props, ha='center', va='center')
    vhelper.add_good_bad_fast_slow()

    plt.xlabel('CF60')
    plt.ylabel('CA60')

    plt.title(_team_score_shot_rate_scatter_title(team, startseason,
                                                  endseason))

    if save_file is None:
        plt.show()
    else:
        plt.savefig(save_file)
Exemplo n.º 10
0
def get_dpair_shot_rates(team, startdate, enddate):
    """
    Gets CF/60 and CA/60 by defenseman duo (5v5 only) for this team between given range of dates

    :param team: int or str, team
    :param startdate: str, start date
    :param enddate: str, end date (inclusive)

    :return: dataframe with PlayerID1, PlayerID2, CF, CA, TOI (in secs), CF/60 and CA/60
    """
    startseason, endseason = [
        helper.infer_season_from_date(x) for x in (startdate, enddate)
    ]

    dflst = []
    for season in range(startseason, endseason + 1):
        games_played = schedules.get_team_games(season, team, startdate,
                                                enddate)
        games_played = [g for g in games_played if g >= 20001 and g <= 30417]
        toi = manip.get_game_h2h_toi(
            season, games_played).rename(columns={'Secs': 'TOI'})
        cf = manip.get_game_h2h_corsi(season, games_played,
                                      'cf').rename(columns={'HomeCorsi': 'CF'})
        ca = manip.get_game_h2h_corsi(season, games_played,
                                      'ca').rename(columns={'HomeCorsi': 'CA'})

        # TOI, CF, and CA have columns designating which team--H or R
        # Use schedule to find appropriate ones to filter for
        sch = schedules.get_team_schedule(season, team, startdate, enddate)
        sch = helper.melt_helper(sch[['Game', 'Home', 'Road']],
                                 id_vars='Game',
                                 var_name='HR',
                                 value_name='Team')
        sch = sch.query('Team == {0:d}'.format(int(
            team_info.team_as_id(team))))
        sch.loc[:, 'HR'] = sch.HR.apply(lambda x: x[0])
        sch = sch.assign(Team1=sch.HR, Team2=sch.HR).drop({'Team', 'HR'},
                                                          axis=1)

        toi = toi.merge(sch, how='inner', on=['Game', 'Team1', 'Team2'])
        cf = cf.merge(sch, how='inner', on=['Game', 'Team1', 'Team2'])
        ca = ca.merge(sch, how='inner', on=['Game', 'Team1', 'Team2'])

        # CF and CA from home perspective, so switch if necessary
        cfca = cf.merge(
            ca,
            how='outer',
            on=['Game', 'PlayerID1', 'PlayerID2', 'Team1', 'Team2'])
        cfca.loc[:, 'tempcf'] = cfca.CF
        cfca.loc[:, 'tempca'] = cfca.CA
        cfca.loc[cf.Team1 == 'R', 'CF'] = cfca[cfca.Team1 == 'R'].tempca
        cfca.loc[ca.Team1 == 'R', 'CA'] = cfca[cfca.Team1 == 'R'].tempcf

        cfca = cfca.drop({'Team1', 'Team2', 'tempcf', 'tempca'}, axis=1)
        toi = toi.drop({'Team1', 'Team2', 'Min'}, axis=1)

        joined = toi.merge(cfca, how='outer', on=['PlayerID1', 'PlayerID2', 'Game']) \
            .assign(Season=season)
        dflst.append(joined)

    df = pd.concat(dflst) \
        .groupby(['PlayerID1', 'PlayerID2'], as_index=False).sum()
    df.loc[:, 'CF60'] = df.CF * 3600 / df.TOI
    df.loc[:, 'CA60'] = df.CA * 3600 / df.TOI

    defensemen = players.get_player_ids_file().query('Pos == "D"')[['ID']]
    df = df.merge(defensemen.rename(columns={'ID': 'PlayerID1'}), how='inner', on='PlayerID1') \
        .merge(defensemen.rename(columns={'ID': 'PlayerID2'}), how='inner', on='PlayerID2')

    return df
Exemplo n.º 11
0
    def on_success(self, data):
        if 'text' in data:
            print(data['text'])

            if r'https://t.co/' in data['text']:
                print('This looks like an image')
                return
            if data['text'][:3] == 'RT ':
                print('This looks like a retweet')
                return

            global LAST_UPDATE, SCRAPED_NEW
            try:
                if player_cf_graphs(data):
                    return

                try:
                    season, gameid = games.find_playoff_game(data['text'])
                except ValueError:
                    season = None
                    gameid = None

                # Get season with a 4-digit regex
                if season is None:
                    text = data['text'] + ' '
                    if re.search(r'\s\d{4}\s', text) is not None:
                        season = int(re.search(r'\s\d{4}\s', text).group(0))
                        if season < 2015 or season > schedules.get_current_season():
                            tweet_error("Sorry, I don't have data for this season yet", data)
                            print('Invalid season')
                            return
                    else:
                        season = schedules.get_current_season()

                # Get game with a 5-digit regex
                if gameid is None:
                    if re.search(r'\s\d{5}\s', text) is not None:
                        gameid = int(re.search(r'\s\d{5}\s', text).group(0))
                        if not schedules.check_valid_game(season, gameid):
                            tweet_error("Sorry, this game ID doesn't look right", data)
                            print('Game ID not right')
                            return
                    else:
                        pass

                if gameid is None:
                    # Get team names
                    parts = data['text'].replace('@h2hbot', '').strip().split(' ')
                    teams = []
                    for part in parts:
                        if re.match(r'[A-z]{3}', part.strip()):
                            part = part.upper()
                            if team_info.team_as_id(part) is not None:
                                teams.append(part)
                    if len(teams) == 0:
                        print('Think this was a tagged discussion')
                        return
                    elif len(teams) != 2:
                        tweet_error("Sorry, I need 2 teams. Found {0:d}. Make sure abbreviations are correct"
                                    .format(len(teams)), data)
                        return

                    team1, team2 = teams[:2]
                    gameid = games.most_recent_game_id(team1, team2)

                h2hfile = 'bot/{0:d}0{1:d}h2h.png'.format(season, gameid)
                tlfile = 'bot/{0:d}0{1:d}tl.png'.format(season, gameid)

                oldstatus = schedules.get_game_status(season, gameid)

                # Scrape only if:
                # Game is in current season AND
                # Game is today, and my schedule says it's "scheduled", OR
                # Game is today, and my schedule doesn't say it's final yet, and it's been at least
                #   5 min since last scrape, OR
                # Game was before today and my schedule doesn't say "final"
                # Update in these cases
                scrapeagain = False
                if season == schedules.get_current_season():
                    today = datetime.datetime.now().strftime('%Y-%m-%d')
                    gdata = schedules.get_game_data_from_schedule(season, gameid)
                    if gdata['Date'] == today:
                        if gdata['Status'] == 'Scheduled':
                            scrapeagain = True
                        elif gdata['Status'] != 'Final' and \
                                (LAST_UPDATE is None or time.time() - LAST_UPDATE >= 60 * 5):
                            scrapeagain = True
                    elif gdata['Date'] < today and gdata['Status'] != 'Final':
                        scrapeagain = True
                if scrapeagain:
                    autoupdate.autoupdate(season, update_team_logs=False)
                    LAST_UPDATE = time.time()
                    SCRAPED_NEW = True

                hname = schedules.get_home_team(season, gameid)
                rname = schedules.get_road_team(season, gameid)
                status = schedules.get_game_status(season, gameid)

                if 'In Progress' in oldstatus or status != oldstatus or not os.path.exists(tlfile):
                    try:
                        game_timeline.game_timeline(season, gameid, save_file=tlfile)
                        game_h2h.game_h2h(season, gameid, save_file=h2hfile)
                        tweet_game_images(h2hfile, tlfile, hname, rname, status, data)
                        print('Success!')
                    except Exception as e:
                        print(data['text'], time.time(), e, e.args)
                        tweet_error("Sorry, there was an unknown error while making the charts (cc @muneebalamcu)",
                                    data)

            except Exception as e:
                print('Unexpected error')
                print(time.time(), data['text'], e, e.args)