def retrieve_data():
    y_val = 'pts_scored'
    y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
    def_data = pull_data.pull_model_features(y_val, 'defensive_stats',
                                             update_dbs.mongodb_client)
    def_data = hfa_patch(def_data, update_dbs.mysql_client())
    off_data = pull_data.pull_model_features(y_val, 'offensive_stats',
                                             update_dbs.mongodb_client)
    off_feats = [i for i in list(off_data) if i not in list(def_data)]
    off_data = off_data[off_feats]
    off_data = hfa_patch(off_data, update_dbs.mysql_client())
    poss_data = pull_data.pull_model_features(y_val, 'possessions',
                                              update_dbs.mongodb_client)
    poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
    tar_data = pull_data.pull_model_features(y_val, 'target',
                                             update_dbs.mongodb_client)
    tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
    x_data = def_data.join(off_data, how='inner')
    x_data = x_data.join(poss_data, how='inner')
    x_data = x_data.join(tar_data, how='inner')
    train_index = pull_data.pull_train_index(update_dbs.mysql_client())
    x_data = x_data.loc[x_data.index.isin(train_index)]
    x_data = x_data.join(y_data, how='inner')[list(x_data)]
    def_data = None
    off_data = None
    poss_data = None
    tar_data = None
    data = x_data.join(y_data, how='inner')
    data = data.reset_index()
    Y = data['pts']
    x_feats = [
        'expected_pts_pg_for', '75_g_HAspread_for_floor-percentage',
        'pregame_pts_pg_for', 'expected_poss_pg_for', 'expected_ppp_for',
        '50_game_avg_15_g_HAweight_allow_assist--per--turnover-ratio',
        '75_g_HAspread_allow_points-per-game',
        '100_g_HAspread_allow_block-pct', 'pregame_poss_pg_for',
        '10_game_avg_30_g_HAweight_allow_personal-foul-pct',
        'expected_turnovers-per-possession_for',
        'expected_offensive-rebounding-pct_for',
        '30_g_HAspread_for_floor-percentage',
        'expected_ftm-per-100-possessions_for',
        'expected_effective-field-goal-pct_for',
        'pregame_effective-field-goal-pct_for',
        '100_g_HAspread_allow_assist--per--turnover-ratio',
        '30_g_HAspread_allow_floor-percentage',
        '10_game_avg_30_g_HAweight_allow_two-point-rate',
        '5_game_avg_50_g_HAweight_for_points-per-game`/`possessions-per-game',
        '10_game_avg_50_g_Tweight_for_effective-field-goal-pct',
        '30_game_avg_5_g_Tweight_for_points-per-game`/`possessions-per-game'
    ]
    X = data[x_feats]
    return X, Y
Пример #2
0
def raw_data():
    def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats',
                                             update_dbs.mongodb_client)
    def_data = hfa_patch(def_data, update_dbs.mysql_client())
    off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats',
                                             update_dbs.mongodb_client)
    off_feats = [i for i in list(off_data) if i not in list(def_data)]
    off_data = off_data[off_feats]
    off_data = hfa_patch(off_data, update_dbs.mysql_client())
    poss_data = pull_data.pull_model_features('pts_scored', 'possessions',
                                              update_dbs.mongodb_client)
    poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
    tar_data = pull_data.pull_model_features('pts_scored', 'target',
                                             update_dbs.mongodb_client)
    tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
    x_data = def_data.join(off_data, how='inner')
    x_data = x_data.join(poss_data, how='inner')
    x_data = x_data.join(tar_data, how='inner')
    x_data = x_data.loc[x_data.index.isin(validation_index)]
    y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
    team_data = x_data.join(y_data, how='inner')[list(x_data)]
    def_data = None
    off_data = None
    poss_data = None
    tar_data = None

    def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats',
                                             update_dbs.mongodb_client)
    def_data = hfa_patch(def_data, update_dbs.mysql_client())
    off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats',
                                             update_dbs.mongodb_client)
    off_feats = [i for i in list(off_data) if i not in list(def_data)]
    off_data = off_data[off_feats]
    off_data = hfa_patch(off_data, update_dbs.mysql_client())
    poss_data = pull_data.pull_model_features('pts_allowed', 'possessions',
                                              update_dbs.mongodb_client)
    poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
    tar_data = pull_data.pull_model_features('pts_allowed', 'target',
                                             update_dbs.mongodb_client)
    tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
    x_data = def_data.join(off_data, how='inner')
    x_data = x_data.join(poss_data, how='inner')
    opponent_data = x_data.join(tar_data, how='inner')
    def_data = None
    off_data = None
    poss_data = None
    tar_data = None

    cnx = update_dbs.mysql_client()
    cursor = cnx.cursor()
    query = 'SELECT * from gamedata;'
    cursor.execute(query)
    switch = pd.DataFrame(cursor.fetchall(),
                          columns=['teamname', 'date', 'opponent', 'location'])
    idx_switch = {}
    for t, d, o, l in np.array(switch):
        idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_')
    idx = []
    for idxx in opponent_data.index:
        idx.append(idx_switch[idxx])
    opponent_data['idx'] = idx
    opponent_data = opponent_data.set_index('idx')
    opponent_data *= -1
    opponent_data = opponent_data.rename(
        columns={i: '-' + i
                 for i in list(opponent_data)})
    data = opponent_data.join(team_data)
    data = data.join(y_data, how='inner')
    data = data.replace([np.inf, -np.inf], np.nan)
    data = data.replace('NULL', np.nan)
    data = data.dropna(how='any')
    return data