Exemplo n.º 1
0
def team_preprocessing():
    """
    Get the Data and process it how I want it
    
    :return DataFrame
    """
    cols = [
        "Team", "Season", "Game.ID", "Date", "Opponent", "Venue", "TOI", "GF",
        'GA'
    ]

    # Get Game Data for Teams
    df = pd.read_csv("./data/teams_all_sits.csv")
    for team_col in ['Team', "Opponent", "Venue"]:
        df = helpers.fix_team(df, team_col)
    df = df[cols]

    # Get "correct" game id
    df['game_id'] = df.apply(
        lambda x: int(str(x['Season']) + "0" + str(x['Game.ID'])), axis=1)
    df = df.drop(['Game.ID'], axis=1)

    # Sort by game_id...lowest is first
    df = df.sort_values(by=['game_id'])

    # Merge in game outcomes:
    df = helpers.merge_outcomes(df)

    # Only keeps games from the home team perspective!!!!!!!!
    df = df[df['Team'] == df['Venue']]

    return df
Exemplo n.º 2
0
def get_pp_data():
    """
    Get and prepare the power play data
    
    :return: DataFrame of data
    """
    cols = [
        "Team", "Season", "Game.ID", "Date", "TOI", 'GF', 'FF', 'xGF', 'CF',
        'wshF'
    ]

    df = pd.read_csv("../projection_data/teams/teams_pp.csv")
    df = helpers.fix_team(df, "Team")
    df = df.sort_values(by=['Season', 'Game.ID', 'Team'])

    df['wshF'] = ((df['CF'] - df['GF']) * .2 + df['GF'])

    df = df[cols]
    df = df.rename(index=str, columns={col: col + "_pp" for col in cols[4:]})

    return df
Exemplo n.º 3
0
def get_all_sits_data():
    """
    Process the All Situations DataFrame
    
    :return: Processed All Situations DataFrame
    """
    cols = [
        'player', 'player_id', 'season', 'game_id', 'date', 'position', 'team',
        'opponent', 'venue', 'toi_on', 'goals', 'a1', 'a2', 'icors', 'iblocks',
        'pend', 'pent', 'ifac_win', 'ifac_loss', 'games'
    ]

    # Get All Data
    dfs = []
    for season in range(2007, 2018):
        for pos in ['forwards', 'defensemen']:
            print("all", pos, season)
            with open("../projection_data/skaters/{}_all_sits_{}.json".format(
                    pos, season)) as file:
                dfs.append(pd.DataFrame(json.load(file)['data'], columns=cols))

    # Combine All Forward and Defensemen Data
    df = pd.concat(dfs)

    # Fix the F*****g Names!!!!!
    for team_col in ['team', "opponent", "venue"]:
        df = helpers.fix_team(df, team_col)

    # Idk
    df['toi_on'] = df['toi_on'].astype(float)

    # Change over some names for merging
    df = df.rename(index=str, columns={"toi_on": "toi_on_all"})

    # Get the correct game_id
    df['game_id'] = df.apply(
        lambda x: str(x['season']) + "0" + str(x['game_id']), axis=1)

    return df
Exemplo n.º 4
0
def get_all_sits_data():
    """
    Get and prepare the All Situations data
    
    :return: DataFrame of data
    """
    cols = [
        "Team", "Season", "Game.ID", "Date", "Opponent", "Venue", "TOI_all",
        "PENT_all", "PEND_all"
    ]

    df = pd.read_csv("./data/teams_all_sits.csv")
    df = df.sort_values(by=['Season', 'Game.ID', 'Team'])
    for team_col in ['Team', "Opponent", "Venue"]:
        df = helpers.fix_team(df, team_col)

    df = df.rename(index=str,
                   columns={
                       "TOI": "TOI_all",
                       "PENT": "PENT_all",
                       "PEND": "PEND_all"
                   })

    return df[cols]