def get_5v5_df_start_end(**kwargs):
    """
    This method retrieves the correct years of the 5v5 player log and concatenates them.

    :param kwargs: the relevant ones here are startseason and endseason

    :return: dataframe
    """

    startdate, enddate = get_startdate_enddate_from_kwargs(**kwargs)
    startseason, endseason = (helper.infer_season_from_date(x)
                              for x in (startdate, enddate))

    df = []
    for season in range(startseason, endseason + 1):
        temp = manip.get_5v5_player_log(season)
        sch = schedules.get_season_schedule(season)

        temp = temp.merge(sch[['Game', 'Date']], how='left', on='Game')
        temp = temp[(temp.Date >= startdate) & (temp.Date <= enddate)]
        temp = temp.assign(Season=season)
        df.append(temp)
    df = pd.concat(df).sort_values(['Date']).drop(
        'Date', axis=1)  # When games rescheduled, Game ID not in order.
    return df
示例#2
0
def get_team_schedule(season=None, team=None, startdate=None, enddate=None):
    """
    Gets the schedule for given team in given season. Or if startdate and enddate are specified, searches between
    those dates. If season and startdate (and/or enddate) are specified, searches that season between those dates.

    :param season: int, the season
    :param team: int or str, the team
    :param startdate: str, YYYY-MM-DD
    :param enddate: str, YYYY-MM-DD

    :return: dataframe
    """
    # TODO handle case when only team and startdate, or only team and enddate, are given
    if season is not None:
        df = get_season_schedule(season).query('Status != "Scheduled"')
        if startdate is not None:
            df = df.query('Date >= "{0:s}"'.format(startdate))
        if enddate is not None:
            df = df.query('Date <= "{0:s}"'.format(enddate))
        tid = team_info.team_as_id(team)
        return df[(df.Home == tid) | (df.Road == tid)]
    if startdate is not None and enddate is not None:
        dflst = []
        startseason = helpers.infer_season_from_date(startdate)
        endseason = helpers.infer_season_from_date(enddate)
        for season in range(startseason, endseason + 1):
            df = get_team_schedule(season, team) \
                .query('Status != "Scheduled"') \
                .assign(Season=season)
            if season == startseason:
                df = df.query('Date >= "{0:s}"'.format(startdate))
            if season == endseason:
                df = df.query('Date <= "{0:s}"'.format(enddate))
            dflst.append(df)
        df = pd.concat(dflst)
        return df
def get_enddate_from_kwargs(**kwargs):
    """Returns 6/21 of endseason + 1, or enddate"""

    if 'enddate' in kwargs:
        return kwargs['enddate']
    elif 'endseason' in kwargs:
        today = datetime.datetime.now().strftime('%Y-%m-%d')
        return min('{0:d}-06-21'.format(kwargs['endseason']+1), today)
    elif 'startseason' in kwargs:
        return get_enddate_from_kwargs(endseason=kwargs['startseason'])
    elif 'season' in kwargs:
        return get_enddate_from_kwargs(endseason=kwargs['season'])
    elif 'startdate' in kwargs:
        return get_enddate_from_kwargs(endseason=helper.infer_season_from_date(kwargs['startdate']))
    else:
        return get_enddate_from_kwargs(endseason=schedules.get_current_season())
示例#4
0
def get_fline_shot_rates(team, startdate, enddate):
    """
    Gets CF/60 and CA/60 by defenseman duo (5v5 only) for this team between given range of dates

    :param team: int or str, team
    :param startdate: str, start date
    :param enddate: str, end date (inclusive)

    :return: dataframe with PlayerID1, PlayerID2, CF, CA, TOI (in secs), CF/60 and CA/60
    """
    # TODO this method is so slow

    startseason, endseason = [
        helper.infer_season_from_date(x) for x in (startdate, enddate)
    ]

    dflst = []
    for season in range(startseason, endseason + 1):
        games_played = schedules.get_team_games(season, team, startdate,
                                                enddate)
        games_played = [g for g in games_played if 20001 <= g <= 30417]

        toi = combos.get_team_combo_toi(season, team, games_played, n_players=3) \
            .rename(columns={'Secs': 'TOI'})

        cfca = combos.get_team_combo_corsi(season,
                                           team,
                                           games_played,
                                           n_players=3)

        joined = toi.merge(cfca, how='outer', on=['PlayerID1', 'PlayerID2', 'PlayerID3']) \
            .assign(Season=season)
        dflst.append(joined)

    df = pd.concat(dflst) \
        .groupby(['PlayerID1', 'PlayerID2', 'PlayerID3'], as_index=False).sum()
    df.loc[:, 'CF60'] = df.CF * 3600 / df.TOI
    df.loc[:, 'CA60'] = df.CA * 3600 / df.TOI

    forwards = players.get_player_ids_file().query('Pos != "D"')[['ID']]
    df = df.merge(forwards.rename(columns={'ID': 'PlayerID1'}), how='inner', on='PlayerID1') \
        .merge(forwards.rename(columns={'ID': 'PlayerID2'}), how='inner', on='PlayerID2') \
        .merge(forwards.rename(columns={'ID': 'PlayerID3'}), how='inner', on='PlayerID3')

    return df
def get_startdate_enddate_from_kwargs(**kwargs):
    """Returns startseason and endseason kwargs. Defaults to current - 3 and current"""

    enddate = get_enddate_from_kwargs(**kwargs)
    if 'last_n_days' in kwargs:
        enddate2 = datetime.datetime(*[int(x) for x in enddate.split('-')])
        startdate2 = enddate2 - datetime.timedelta(days=kwargs['last_n_days'])
        startdate = startdate2.strftime('%Y-%m-%d')
    elif 'startdate' in kwargs:
        startdate = kwargs['startdate']
    elif 'startseason' in kwargs:
        startdate = '{0:d}-09-15'.format(kwargs['startseason'])
    elif 'season' in kwargs:
        startdate = '{0:d}-09-15'.format(kwargs['season'])
    else:
        startdate = '{0:d}-09-15'.format(helper.infer_season_from_date(enddate) - 3)

    return startdate, enddate
示例#6
0
def get_dpair_shot_rates(team, startdate, enddate):
    """
    Gets CF/60 and CA/60 by defenseman duo (5v5 only) for this team between given range of dates

    :param team: int or str, team
    :param startdate: str, start date
    :param enddate: str, end date (inclusive)

    :return: dataframe with PlayerID1, PlayerID2, CF, CA, TOI (in secs), CF/60 and CA/60
    """
    startseason, endseason = [
        helper.infer_season_from_date(x) for x in (startdate, enddate)
    ]

    dflst = []
    for season in range(startseason, endseason + 1):
        games_played = schedules.get_team_games(season, team, startdate,
                                                enddate)
        games_played = [g for g in games_played if g >= 20001 and g <= 30417]
        toi = manip.get_game_h2h_toi(
            season, games_played).rename(columns={'Secs': 'TOI'})
        cf = manip.get_game_h2h_corsi(season, games_played,
                                      'cf').rename(columns={'HomeCorsi': 'CF'})
        ca = manip.get_game_h2h_corsi(season, games_played,
                                      'ca').rename(columns={'HomeCorsi': 'CA'})

        # TOI, CF, and CA have columns designating which team--H or R
        # Use schedule to find appropriate ones to filter for
        sch = schedules.get_team_schedule(season, team, startdate, enddate)
        sch = helper.melt_helper(sch[['Game', 'Home', 'Road']],
                                 id_vars='Game',
                                 var_name='HR',
                                 value_name='Team')
        sch = sch.query('Team == {0:d}'.format(int(
            team_info.team_as_id(team))))
        sch.loc[:, 'HR'] = sch.HR.apply(lambda x: x[0])
        sch = sch.assign(Team1=sch.HR, Team2=sch.HR).drop({'Team', 'HR'},
                                                          axis=1)

        toi = toi.merge(sch, how='inner', on=['Game', 'Team1', 'Team2'])
        cf = cf.merge(sch, how='inner', on=['Game', 'Team1', 'Team2'])
        ca = ca.merge(sch, how='inner', on=['Game', 'Team1', 'Team2'])

        # CF and CA from home perspective, so switch if necessary
        cfca = cf.merge(
            ca,
            how='outer',
            on=['Game', 'PlayerID1', 'PlayerID2', 'Team1', 'Team2'])
        cfca.loc[:, 'tempcf'] = cfca.CF
        cfca.loc[:, 'tempca'] = cfca.CA
        cfca.loc[cf.Team1 == 'R', 'CF'] = cfca[cfca.Team1 == 'R'].tempca
        cfca.loc[ca.Team1 == 'R', 'CA'] = cfca[cfca.Team1 == 'R'].tempcf

        cfca = cfca.drop({'Team1', 'Team2', 'tempcf', 'tempca'}, axis=1)
        toi = toi.drop({'Team1', 'Team2', 'Min'}, axis=1)

        joined = toi.merge(cfca, how='outer', on=['PlayerID1', 'PlayerID2', 'Game']) \
            .assign(Season=season)
        dflst.append(joined)

    df = pd.concat(dflst) \
        .groupby(['PlayerID1', 'PlayerID2'], as_index=False).sum()
    df.loc[:, 'CF60'] = df.CF * 3600 / df.TOI
    df.loc[:, 'CA60'] = df.CA * 3600 / df.TOI

    defensemen = players.get_player_ids_file().query('Pos == "D"')[['ID']]
    df = df.merge(defensemen.rename(columns={'ID': 'PlayerID1'}), how='inner', on='PlayerID1') \
        .merge(defensemen.rename(columns={'ID': 'PlayerID2'}), how='inner', on='PlayerID2')

    return df