def find_missing_player_observations(player_results_df):
    player_gid = player_results_df[['player', 'gid', 'team', 'year', 'wk']]
    player_gid.rename(columns={'gid': 'gid_player'}, inplace=True)

    team_lag_games = create_team_lagged_games(player_results_df)

    use_cpu = cpu_count()
    pool = Pool(use_cpu)

    player_gid = player_gid.sort_values(['team', 'year', 'player'], ascending=True)

    player_input_list = []
    for data_chunk in chunks(player_gid['player'].unique(), use_cpu):
        temp_player_df = player_gid[player_gid['player'].isin(data_chunk)]
        player_input_list.append([temp_player_df,
                                  team_lag_games[team_lag_games[
                                      'team'].isin(temp_player_df['team'].unique())]])

    df_pool = pool.map(identify_first_sequence_games_missed, player_input_list)
    player_missing_games = pd.concat(df_pool).drop(['gid(t-1)',
                                                    'gid_player',
                                                    'wk',
                                                    'year'], axis=1).fillna(0)

    pool.close()
    pool.join()

    return player_missing_games
def assign_lagged_out_vars(player_results_df, n_games_):

    df = player_results_df.copy()
    out_game_stats = [col for col in df.columns if '_out' in col]

    use_cpu = cpu_count()
    pool = Pool(use_cpu)

    df = df[['team', 'gid'] + out_game_stats].drop_duplicates()
    unique_teams = list(df['team'].unique())
    df = df.sort_values('gid', ascending=True)
    df = df.reset_index(drop=True)

    team_input_list = []
    for data_chunk in chunks(unique_teams, use_cpu):
        team_input_list.append([df[df['team'].isin(data_chunk)],
                                data_chunk, out_game_stats,
                                n_games_])

    df_pool = pool.map(create_stat_out_lag_variables, team_input_list)
    df_concat = pd.concat(df_pool)

    pool.close()
    pool.join()

    player_results_df = pd.merge(player_results_df,
                                 df_concat,
                                 on=['team', 'gid'],
                                 how='left')

    return player_results_df
Пример #3
0
def calculate_opp_lags(def_sos_merged, n_games_):

    def_sos_index_cols = ['year', 'wk', 'team']
    def_sos_cols = list(def_sos_merged.columns)

    for sos_index_col in def_sos_index_cols:
        if sos_index_col in def_sos_cols:
            def_sos_cols.remove(sos_index_col)

    #values = def_sos_merged[def_sos_cols].values
    # ensure all data is float
    #values = values.astype('float32')

    #[def_scaled_df,
    # cols_scaled] = scale_cols(values, def_sos_cols)

    #def_sos_merged_scaled_df = pd.concat((def_sos_merged, def_scaled_df), axis=1).drop(def_sos_cols, axis=1)

    df = def_sos_merged.copy()

    use_cpu = cpu_count()
    pool = Pool(use_cpu)

    df = df.sort_values(['year', 'wk'], ascending=True)
    unique_teams = list(df['team'].unique())

    team_input_list = []
    for data_chunk in chunks(unique_teams, use_cpu):
        team_input_list.append([
            df[df['team'].isin(data_chunk)], data_chunk, def_sos_cols,
            def_sos_index_cols, n_games_
        ])

    df_pool = map(create_stat_defopp_lag_variables, team_input_list)
    df_concat = pd.concat(df_pool)

    pool.close()
    pool.join()

    def_cols = df_concat.columns[3:]
    updated_cols = []
    for col in def_cols:
        updated_cols.append('opp_var_' + col)

    def_cols_rename = dict(zip(def_cols, updated_cols))
    df_concat.rename(columns=def_cols_rename, inplace=True)

    return df_concat
Пример #4
0
def identify_nl_plays_pool(pbp_df):

    use_cpu = cpu_count()
    pool = Pool(use_cpu)

    nl_index = pbp_df[pbp_df['loc'] == 'NL'].index

    loc_nl_list = []
    for nl_chunk in chunks(nl_index, use_cpu):
        loc_nl_list.append([pbp_df[pbp_df.index.isin(nl_chunk)],
                            detail_loc_mapping])

    df_pool = pool.map(identify_nl_plays, loc_nl_list)
    df_concat = pd.concat(df_pool).fillna(0)

    pbp_df = pd.concat((pbp_df[~pbp_df.index.isin(nl_index)],
                        df_concat), axis=1)

    return pbp_df
Пример #5
0
def normalize_def_stats(def_sos_merged, def_cols, year_wk_dict):

    use_cpu = cpu_count()
    pool = Pool(use_cpu)

    def_sos_normalized_input = []
    for year_wk_chunk in chunks(year_wk_dict.keys(), use_cpu):
        yr_wk_values = [year_wk_dict[a] for a in year_wk_chunk]
        year_wk_chunk_dict = dict(zip(year_wk_chunk, yr_wk_values))
        def_sos_normalized_input.append([
            def_sos_merged[def_cols + ['team', 'wk', 'year']],
            year_wk_chunk_dict
        ])

    df_pool = pool.map(normalize_locs, def_sos_normalized_input)
    df_concat = pd.concat(df_pool)  # .fillna(0)

    pool.close()
    pool.join()

    return df_concat
Пример #6
0
def calculate_def_window_sos_pool(pbp_data_team_def, year_wk_dict,
                                  team_def_stats):
    use_cpu = cpu_count()
    pool = Pool(use_cpu)

    def_sos_input = []
    for year_wk_chunk in chunks(year_wk_dict.keys(), use_cpu):
        yr_wk_values = [year_wk_dict[a] for a in year_wk_chunk]
        year_wk_chunk_dict = dict(zip(year_wk_chunk, yr_wk_values))
        # yr_list = list(np.unique([a.split('_')[0] for a in year_wk_chunk]))

        def_sos_input.append(
            [pbp_data_team_def, year_wk_chunk_dict, team_def_stats])

    df_pool = pool.map(calculate_def_window_sos, def_sos_input)
    df_concat = pd.concat(df_pool).fillna(0)

    pool.close()
    pool.join()

    return df_concat
Пример #7
0
def calculate_def_ftp_window_sos_pool(pbp_data,
                                      player_results_df,
                                      year_wk_dict,
                                      ftps_stats,
                                      groupby_off_cols,
                                      index_cols,
                                      rename_cols,
                                      start_yr=2010):

    posd_cols = [col for col in player_results_df.columns if 'posd_' in col]

    pbp_data_team = pd.merge(
        pbp_data[['gid', 'seas_wk', 'player', 'loc', 'dk_diff', 'fd_diff']],
        player_results_df[['gid', 'player', 'team', 'opp'] + posd_cols],
        on=['gid', 'player'],
        how='left').groupby(['gid', 'seas_wk', 'loc', 'team',
                             'opp'])[['dk_diff',
                                      'fd_diff']].sum().reset_index()
    # +  posd_cols)

    pbp_data_team = pbp_data_team[(pbp_data_team['seas_wk'] >= start_yr - 1)]
    pbp_data_team[['year',
                   'wk']] = (pbp_data_team['seas_wk'].astype(str).str.pad(
                       7, side='right',
                       fillchar='0').str.split('.', expand=True).astype(int))
    use_cpu = cpu_count()
    pool = Pool(use_cpu)

    def_sos_input = []
    for year_wk_chunk in chunks(year_wk_dict.keys(), use_cpu):
        yr_wk_values = [year_wk_dict[a] for a in year_wk_chunk]
        year_wk_chunk_dict = dict(zip(year_wk_chunk, yr_wk_values))
        #yr_list = list(np.unique([a.split('_')[0] for a in year_wk_chunk]))

        def_sos_input.append([
            pbp_data_team, year_wk_chunk_dict, ftps_stats, groupby_off_cols,
            index_cols, rename_cols, posd_cols
        ])

    df_pool = pool.map(calculate_def_ftp_window_sos, def_sos_input)
    df_concat = pd.concat(df_pool)  # .fillna(0)

    pool.close()
    pool.join()

    use_cpu = cpu_count()
    pool = Pool(use_cpu)

    def_sos_normalized_input = []
    for year_wk_chunk in chunks(year_wk_dict.keys(), use_cpu):
        yr_wk_values = [year_wk_dict[a] for a in year_wk_chunk]
        year_wk_chunk_dict = dict(zip(year_wk_chunk, yr_wk_values))
        def_sos_normalized_input.append([df_concat, year_wk_chunk_dict])

    df_pool = pool.map(normalize_locs, def_sos_normalized_input)
    df_concat = pd.concat(df_pool)  # .fillna(0)

    pool.close()
    pool.join()

    return df_concat
Пример #8
0
def calculate_exp_points(pbp_data, n_periods_):

    use_cpu = cpu_count()
    pool = Pool(use_cpu)

    unique_states = pbp_data[['dwn',
                              'ytg',
                              'yfog']].drop_duplicates().reset_index().drop('index', axis=1)

    unique_states = unique_states.sort_values('dwn', ascending=False)
    unique_states_list = []

    for state_chunk in chunks(unique_states, use_cpu):
        dwn_list = list(state_chunk['dwn'].unique())
        unique_states_list.append([pbp_data[(pbp_data['dwn'].isin(dwn_list)) &
                                            (pbp_data['dk_ftps_field_bin'] >= -.10)],
                                   state_chunk, n_periods_])

    df_pool = pool.map(calc_exp_state_pts_by_year, unique_states_list)
    df_concat = pd.concat(df_pool).fillna(0)

    pool.close()
    pool.join()

    df_concat = pd.merge(pbp_data,
                         df_concat,
                         left_on=['seas_wk', 'dwn', 'ytg', 'yfog', 'loc'],
                         right_on=['seas_wk', 'dwn_state',
                                   'ytg_state', 'yfog_state', 'loc'],
                         how='left')

    df_concat.rename(columns={'expected_dk_ftps_field_bin': 'dk_expected',
                              'expected_fd_ftps_field_bin': 'fd_expected',
                              'dk_ftps_field_bin': 'dk_actual',
                              'fd_ftps_field_bin': 'fd_actual',
                              'value': 'player',
                              }, inplace=True)

    df_concat = df_concat[df_concat['seas_wk'] >
                          (2000 + (n_periods_ / 21)) - 2]

    df_concat.loc[df_concat['dk_expected'] < 0, 'dk_expected'] = 0

    seas_list = list(df_concat['seas_wk'].unique())
    unique_states_agg = unique_states.drop('yfog', axis=1)
    unique_states_agg.loc[unique_states_agg['ytg'] > 15., 'ytg'] = 15.
    unique_states_agg = unique_states_agg.drop_duplicates().reset_index().drop('index', axis=1)

    smooth_exp_state_pts = []
    for seas_chunk in chunks(seas_list[21:], use_cpu):
        min_seas_idx = seas_list.index(np.min(seas_chunk))
        smooth_exp_state_pts.append([df_concat[(df_concat['seas_wk'] > min_seas_idx)],
                                     unique_states_agg, seas_chunk, seas_list])

    pool = Pool(use_cpu)

    df_pool = pool.map(smooth_exp_states_values, smooth_exp_state_pts)
    df_concat = pd.concat(df_pool)

    pool.close()
    pool.join()

    df_concat.loc[:, 'dk_diff'] = (df_concat['dk_actual'] -
                                   df_concat['dk_expected_smoothed'])
    df_concat.loc[:, 'fd_diff'] = (df_concat['fd_actual'] -
                                   df_concat['fd_expected_smoothed'])

    df_concat = df_concat.drop(['yfog_state', 'ytg_state', 'dwn_state',
                                'dk_expected', 'fd_expected',
                                'dk_expected_mean', 'fd_expected_mean'], axis=1)

    df_concat.rename(columns={'dk_expected_smoothed': 'dk_expected',
                              'fd_expected_smoothed': 'fd_expected'}, inplace=True)

    df_concat = df_concat.groupby(['player', 'gid',
                                   'seas_wk', 'loc'])[['dk_expected', 'fd_expected',
                                                       'dk_actual', 'fd_actual',
                                                       'dk_diff', 'fd_diff']].sum().reset_index()

    return df_concat