def update(name, data): # name, data = 'offensive_preds', update_df cnx = update_dbs.mysql_client() cursor = cnx.cursor() insertlist = [] continuance = 0 for idx, entry in zip(list(data.index), np.array(data)): insert = list(entry) # idx = insert[0] date = '"' + idx[:10] + '"' tname = '"' + idx[10:].replace('_', ' ') + '"' # insert = insert[1:] sql_insert = [] sql_insert.append(tname) sql_insert.append(date) for each in insert: sql_insert.append(str(each)) sql_insert = '(' + ', '.join(sql_insert) + ')' insertlist.append(sql_insert) continuance += 1 if continuance == 500: # break insertlist = ', '.join(insertlist) oddslist = ['INSERT INTO %s VALUES ' % (name), insertlist, ';'] initialoddsinsert = ' '.join(oddslist) add_odds = initialoddsinsert cursor.execute('SET foreign_key_checks = 0;') try: cursor.execute(add_odds) cnx.commit() print(entry) except: print(entry) pass cursor.execute('SET foreign_key_checks = 1;') insertlist = [] continuance = 0 insertlist = ', '.join(insertlist) oddslist = ['INSERT INTO %s VALUES ' % (name), insertlist, ';'] initialoddsinsert = ' '.join(oddslist) add_odds = initialoddsinsert cursor.execute('SET foreign_key_checks = 0;') try: cursor.execute(add_odds) cnx.commit() print(entry) except: pass cursor.execute('SET foreign_key_checks = 1;') insertlist = [] continuance = 0
def retrieve_data(): y_val = 'pts_scored' y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) def_data = pull_data.pull_model_features(y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features(y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features(y_val, 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features(y_val, 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] x_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None data = x_data.join(y_data, how='inner') data = data.reset_index() Y = data['pts'] x_feats = [ 'expected_pts_pg_for', '75_g_HAspread_for_floor-percentage', 'pregame_pts_pg_for', 'expected_poss_pg_for', 'expected_ppp_for', '50_game_avg_15_g_HAweight_allow_assist--per--turnover-ratio', '75_g_HAspread_allow_points-per-game', '100_g_HAspread_allow_block-pct', 'pregame_poss_pg_for', '10_game_avg_30_g_HAweight_allow_personal-foul-pct', 'expected_turnovers-per-possession_for', 'expected_offensive-rebounding-pct_for', '30_g_HAspread_for_floor-percentage', 'expected_ftm-per-100-possessions_for', 'expected_effective-field-goal-pct_for', 'pregame_effective-field-goal-pct_for', '100_g_HAspread_allow_assist--per--turnover-ratio', '30_g_HAspread_allow_floor-percentage', '10_game_avg_30_g_HAweight_allow_two-point-rate', '5_game_avg_50_g_HAweight_for_points-per-game`/`possessions-per-game', '10_game_avg_50_g_Tweight_for_effective-field-goal-pct', '30_game_avg_5_g_Tweight_for_points-per-game`/`possessions-per-game' ] X = data[x_feats] return X, Y
def save(): train_index = pull_data.pull_train_index(update_dbs.mysql_client()) random.seed(86) random.shuffle(train_index) def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat) patch_data = x[patch_stats] keep_data = x[keep_stats] cursor = cnx.cursor() query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;' cursor.execute(query) patch = pd.DataFrame(cursor.fetchall(), columns=['date', 't1', 't2', 'location']) cursor.close() loc_adj = {} for d, t1, t2, l in np.array(patch): if l == 0: loc_adj[str(d) + t1.replace(' ', '_')] = 1 loc_adj[str(d) + t2.replace(' ', '_')] = -1 else: loc_adj[str(d) + t1.replace(' ', '_')] = -1 loc_adj[str(d) + t2.replace(' ', '_')] = 1 patch = None patch_data = patch_data.join(pd.DataFrame.from_dict( list(loc_adj.items())).set_index(0), how='left') away_data = patch_data[patch_data[1] == -1] away_data *= -1 home_data = patch_data[patch_data[1] == 1] patch_data = home_data.append(away_data) del patch_data[1] x = patch_data.join(keep_data) print('Completed HFA Patch') return x def raw_data(): def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_scored', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_scored', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) team_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_allowed', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_allowed', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') opponent_data = x_data.join(tar_data, how='inner') def_data = None off_data = None poss_data = None tar_data = None cnx = update_dbs.mysql_client() cursor = cnx.cursor() query = 'SELECT * from gamedata;' cursor.execute(query) switch = pd.DataFrame( cursor.fetchall(), columns=['teamname', 'date', 'opponent', 'location']) idx_switch = {} for t, d, o, l in np.array(switch): idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_') idx = [] for idxx in opponent_data.index: idx.append(idx_switch[idxx]) opponent_data['idx'] = idx opponent_data = opponent_data.set_index('idx') opponent_data *= -1 opponent_data = opponent_data.rename( columns={i: '-' + i for i in list(opponent_data)}) data = opponent_data.join(team_data) data = data.join(y_data, how='inner') data = data.replace([np.inf, -np.inf], np.nan) data = data.replace('NULL', np.nan) data = data.dropna(how='any') return data raw_x = raw_data() x_score = pull_data.score(update_dbs.mysql_client()) y_wl = pull_data.pull_wl(update_dbs.mysql_client()) x_ou = pull_data.ou_preds(update_dbs.mysql_client()) y_ou = pull_data.ou_wl(update_dbs.mysql_client()) y_line = pull_data.line_wl(update_dbs.mysql_client()) x_line = pull_data.line_preds(update_dbs.mysql_client()) all_x_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner'), 'raw': raw_x.join(y_wl, how='inner'), }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner'), 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'), }, 'ou': { '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner'), 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner'), }, } all_y_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner')['outcome'], 'raw': raw_x.join(y_wl, how='inner')['outcome'], }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner')['line'], 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'], }, 'ou': { '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner')['ou'], 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'], }, } raw_x = None x_score = None y_wl = None x_ou = None y_ou = None y_line = None x_line = None random.seed(86) for sort in ['ou', 'winner', 'line']: print('... starting %s' % (sort)) for kind in ['raw', '+pts']: print('... starting %s' % (kind)) for model_name, model_details in saved_models.stored_models[sort][ kind].items(): if model_name == 'keras': continue if not os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (sort, kind, model_name))): print('...storing %s' % (model_name)) model = model_details['model'] scale = model_details['scale'] scale.fit( all_x_data[sort][kind][model_details['features']]) joblib.dump( scale, os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (sort, kind, model_name))) model.fit( scale.transform( all_x_data[sort][kind][model_details['features']]), np.ravel(all_y_data[sort][kind])) joblib.dump( model, os.path.join( model_storage, '%s_%s_%s_model.pkl' % (sort, kind, model_name))) print('Stored %s' % (model_name)) print('Finished %s' % (kind)) print('Finished %s' % (sort))
def save(): train_index = pull_data.pull_train_index(update_dbs.mysql_client()) for x_vals in ['offense', 'defense']: for y_val in ['pace', 'ppp']: if y_val == 'ppp': data = pull_data.ppp(update_dbs.mysql_client(), x_vals) y_data = data[[y_val]] x_feats = list(data) x_feats.remove(y_val) x_data = data[x_feats] data = x_data.join(y_data, how='inner') data = data.loc[data.index.isin(train_index)] x_data = data[x_feats] y_data = data[[y_val]] elif y_val == 'pace': data = pull_data.pace(update_dbs.mysql_client(), x_vals) y_data = data[['possessions']] x_feats = list(data) x_feats.remove('possessions') x_data = data[x_feats] data = x_data.join(y_data, how='inner') data = data.loc[data.index.isin(train_index)] x_data = data[x_feats] y_data = data[['possessions']] if not os.path.isfile( os.path.join( model_storage, '%s_%s_regression_model.pkl' % (y_val, x_vals))): print('Loading %s_%s' % (x_vals, y_val)) model = saved_models.stored_models[x_vals][y_val]['model'] scale = saved_models.stored_models[x_vals][y_val]['scale'] scale.fit(x_data[saved_models.stored_models[x_vals][y_val] ['features']]) joblib.dump( scale, os.path.join( model_storage, '%s_%s_regression_scaler.pkl' % (y_val, x_vals))) model.fit( scale.transform(x_data[saved_models.stored_models[x_vals] [y_val]['features']]), np.ravel(y_data)) joblib.dump( model, os.path.join( model_storage, '%s_%s_regression_model.pkl' % (y_val, x_vals))) print('Stored %s_%s' % (x_vals, y_val)) def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat) patch_data = x[patch_stats] keep_data = x[keep_stats] cursor = cnx.cursor() query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;' cursor.execute(query) patch = pd.DataFrame(cursor.fetchall(), columns=['date', 't1', 't2', 'location']) cursor.close() loc_adj = {} for d, t1, t2, l in np.array(patch): if l == 0: loc_adj[str(d) + t1.replace(' ', '_')] = 1 loc_adj[str(d) + t2.replace(' ', '_')] = -1 else: loc_adj[str(d) + t1.replace(' ', '_')] = -1 loc_adj[str(d) + t2.replace(' ', '_')] = 1 patch = None patch_data = patch_data.join(pd.DataFrame.from_dict( list(loc_adj.items())).set_index(0), how='left') away_data = patch_data[patch_data[1] == -1] away_data *= -1 home_data = patch_data[patch_data[1] == 1] patch_data = home_data.append(away_data) del patch_data[1] x = patch_data.join(keep_data) print('Completed HFA Patch') return x def raw_data(): def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_scored', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_scored', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) team_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_allowed', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_allowed', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') opponent_data = x_data.join(tar_data, how='inner') def_data = None off_data = None poss_data = None tar_data = None cnx = update_dbs.mysql_client() cursor = cnx.cursor() query = 'SELECT * from gamedata;' cursor.execute(query) switch = pd.DataFrame( cursor.fetchall(), columns=['teamname', 'date', 'opponent', 'location']) idx_switch = {} for t, d, o, l in np.array(switch): idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_') idx = [] for idxx in opponent_data.index: idx.append(idx_switch[idxx]) opponent_data['idx'] = idx opponent_data = opponent_data.set_index('idx') opponent_data *= -1 opponent_data = opponent_data.rename( columns={i: '-' + i for i in list(opponent_data)}) data = opponent_data.join(team_data) data = data.join(y_data, how='inner') data = data.replace([np.inf, -np.inf], np.nan) data = data.replace('NULL', np.nan) data = data.dropna(how='any') return data data = raw_data() x_data_stable = pull_data.share(update_dbs.mysql_client()) data = data.join(x_data_stable, how='inner') data = data.reset_index() x_vals = 'share' for y_val in ['+pts', 'keras']: if not os.path.isfile( os.path.join(model_storage, '%s_%s_regression_model.pkl' % (x_vals, y_val))) and not os.path.isfile( os.path.join( model_storage, '%s_%s_regression_model.h5' % (x_vals, y_val))): print('Loading %s_%s' % (x_vals, y_val)) model = saved_models.stored_models[x_vals][y_val]['model'] scale = saved_models.stored_models[x_vals][y_val]['scale'] scale.fit( data[saved_models.stored_models[x_vals][y_val]['features']]) joblib.dump( scale, os.path.join(model_storage, '%s_%s_regression_scaler.pkl' % (y_val, x_vals))) model.fit( scale.transform(data[saved_models.stored_models[x_vals][y_val] ['features']]), np.ravel(data['share'])) if y_val != 'keras': joblib.dump( model, os.path.join( model_storage, '%s_%s_regression_model.pkl' % (y_val, x_vals))) else: model.model.save( os.path.join(model_storage, '%s_%s_regression_model.h5' % (y_val, x_vals))) print('Stored %s_%s' % (x_vals, y_val))
away_data = patch_data[patch_data[1]==-1] away_data *= -1 home_data = patch_data[patch_data[1]==1] patch_data = home_data.append(away_data) del patch_data[1] x = patch_data.join(keep_data) print('Completed HFA Patch') return x for x_vals in ['defensive_stats', 'offensive_stats', 'full-team', 'all', 'possessions', 'target']: for y_val in ['pts_scored', 'pts_allowed']: #for each in [('all', 'pts_allowed'), ('full-team', 'pts_allowed'), ('offensive_stats', 'pts_scored'), ('all', 'pts_scored')]: #for each in [('target', 'pts_allowed'), ('target', 'pts_scored')]: # x_vals, y_val = each if x_vals == 'possessions': y_data = pull_data.pull_possessions(y_val, update_dbs.mysql_client()) x_data = pull_data.pull_model_features(y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how = 'inner')['possessions'] x_data = x_data.join(y_data, how = 'inner')[list(x_data)] elif x_vals in ['target', 'defensive_stats', 'offensive_stats', 'full-team', 'all']: y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client()) if x_vals == 'full-team': def_data = pull_data.pull_model_features(y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features(y_val, 'offensive_stats', update_dbs.mongodb_client)
sys.path.insert(-1, os.path.join(cur_path, 'model_conf')) sys.path.insert(-1, os.path.join(cur_path, 'db_utils')) sys.path.insert(-1, os.path.join(cur_path, 'model_tuning')) output_folder = os.path.join(cur_path, 'model_results') model_storage = os.path.join(cur_path, 'saved_models') import numpy as np import pull_data import update_dbs import random import pandas as pd import saved_models from sklearn.externals import joblib future_index = pull_data.future_idx(update_dbs.mysql_client()) random.seed(86) def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat) patch_data = x[patch_stats]
def raw_data(): def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_scored', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_scored', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') x_data = x_data.loc[x_data.index.isin(validation_index)] y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) team_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_allowed', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_allowed', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') opponent_data = x_data.join(tar_data, how='inner') def_data = None off_data = None poss_data = None tar_data = None cnx = update_dbs.mysql_client() cursor = cnx.cursor() query = 'SELECT * from gamedata;' cursor.execute(query) switch = pd.DataFrame(cursor.fetchall(), columns=['teamname', 'date', 'opponent', 'location']) idx_switch = {} for t, d, o, l in np.array(switch): idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_') idx = [] for idxx in opponent_data.index: idx.append(idx_switch[idxx]) opponent_data['idx'] = idx opponent_data = opponent_data.set_index('idx') opponent_data *= -1 opponent_data = opponent_data.rename( columns={i: '-' + i for i in list(opponent_data)}) data = opponent_data.join(team_data) data = data.join(y_data, how='inner') data = data.replace([np.inf, -np.inf], np.nan) data = data.replace('NULL', np.nan) data = data.dropna(how='any') return data
sys.path.insert(-1, os.path.join(cur_path, 'model_conf')) sys.path.insert(-1, os.path.join(cur_path, 'db_utils')) sys.path.insert(-1, os.path.join(cur_path, 'model_tuning')) output_folder = os.path.join(cur_path, 'model_results') features_folder = os.path.join(cur_path, 'feature_dumps') model_storage = os.path.join(cur_path, 'saved_models') import numpy as np import pull_data import update_dbs import random import saved_models import pandas as pd from sklearn.model_selection import cross_validate, StratifiedKFold train_index = pull_data.pull_train_index(update_dbs.mysql_client()) random.seed(86) random.shuffle(train_index) derived_data = {} def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat)
def update(): for x_vals in ['line', 'ou']: train_index = pull_data.update_idx(update_dbs.mysql_client(), '%s_preds' % (x_vals)) if len(train_index) == 0: continue update_df = pd.DataFrame() update_df['idx'] = train_index update_df = update_df.set_index('idx') y_val = 'result' print('Loading rolling betting stats') x_data_stable = vegas_watson.rolling_vegas(x_vals) print('... Loaded rolling betting stats') x_data_stable = x_data_stable.loc[x_data_stable.index.isin( train_index)] x_cols = list(x_data_stable) x_cols.remove(y_val) x_data_stable = x_data_stable[x_cols] for model_name, model_details in saved_models.stored_models[y_val][ x_vals].items(): if os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))): print('Loading %s Values' % (model_name)) model = joblib.load( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))) scale = joblib.load( os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (y_val, x_vals, model_name))) preds = model.predict( scale.fit_transform( x_data_stable[model_details['features']])) indy_pred = pd.DataFrame() indy_pred[model_name + '_' + x_vals] = preds indy_pred['idx'] = list(x_data_stable.index) indy_pred = indy_pred.set_index('idx') update_df = update_df.join(indy_pred, how='inner') print('Loaded %s' % (model_name)) for model_name in ['PCA', "TSVD"]: if os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))): print('Loading %s Values' % (model_name)) model = joblib.load( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))) if x_vals == 'ou': feats = [ '10_game_avg', '15_game_avg', '50_game_avg', '30_game_avg', 'streak', '5_game_avg', '3_game_avg' ] elif x_vals == 'line': feats = ['10_game_avg', 'ha', 'streak', '50_game_avg'] preds = model.fit_transform(x_data_stable[feats]) indy_pred = pd.DataFrame() indy_pred['idx'] = list(x_data_stable.index) indy_pred[model_name + '_' + x_vals] = preds indy_pred = indy_pred.set_index('idx') update_df = update_df.join(indy_pred, how='inner') print('Loaded %s' % (model_name)) if x_vals == 'line': update_df = update_df[[ 'PCA_line', 'TSVD_line', 'lasso_line', 'lightgbm_line', 'ridge_line' ]] add_derived.update('%s_preds' % (x_vals), update_df) elif x_vals == 'ou': update_df = update_df[[ 'PCA_ou', 'TSVD_ou', 'lasso_ou', 'lightgbm_ou', 'ridge_ou' ]] add_derived.update('%s_preds' % (x_vals), update_df)
def rolling_vegas(result): odds = pull_data.pull_odds_data(update_dbs.mysql_client()) teams = bb_odds.teamnames aou_data = {} atl_data = {} aou_streak = {} atl_streak = {} vegas_data_line = {} vegas_data_ou = {} for team in teams: aou_data[team] = [] atl_data[team] = [] aou_streak[team] = 0 atl_streak[team] = 0 for date, fav, dog, line, overunder, favscore, dogscore, homeaway in np.array( odds[[ 'date', 'fav', 'dog', 'line', 'overunder', 'fav-score', 'dog-score', 'ha' ]]): if len(aou_data[fav]) > 0 and len(atl_data[fav]) > 0: vegas_data_line[str(date) + fav.replace(' ', '_')] = {} vegas_data_ou[str(date) + fav.replace(' ', '_')] = {} for n_games in [3, 5, 10, 15, 30, 50]: vegas_data_ou[str(date) + fav.replace(' ', '_')][ '%s_game_avg' % (n_games)] = np.mean( aou_data[fav][-n_games:]) if n_games in [10, 50]: vegas_data_line[str(date) + fav.replace(' ', '_')][ '%s_game_avg' % (n_games)] = np.mean( atl_data[fav][-n_games:]) if homeaway == 0: vegas_data_line[str(date) + fav.replace(' ', '_')]['ha'] = 1 elif homeaway == 1: vegas_data_line[str(date) + fav.replace(' ', '_')]['ha'] = 0 vegas_data_ou[str(date) + fav.replace(' ', '_')]['streak'] = aou_streak[fav] vegas_data_line[str(date) + fav.replace(' ', '_')]['streak'] = atl_streak[fav] if favscore + dogscore > overunder: vegas_data_ou[str(date) + fav.replace(' ', '_')]['result'] = 1 else: vegas_data_ou[str(date) + fav.replace(' ', '_')]['result'] = -1 if favscore + line > dogscore: vegas_data_line[str(date) + fav.replace(' ', '_')]['result'] = 1 else: vegas_data_line[str(date) + fav.replace(' ', '_')]['result'] = -1 if len(aou_data[dog]) > 0 and len(atl_data[dog]) > 0: vegas_data_line[str(date) + dog.replace(' ', '_')] = {} vegas_data_ou[str(date) + dog.replace(' ', '_')] = {} for n_games in [3, 5, 10, 15, 30, 50]: vegas_data_ou[str(date) + dog.replace(' ', '_')][ '%s_game_avg' % (n_games)] = np.mean( aou_data[dog][-n_games:]) if n_games in [10, 50]: vegas_data_line[str(date) + dog.replace(' ', '_')][ '%s_game_avg' % (n_games)] = np.mean( atl_data[dog][-n_games:]) if homeaway == 0: vegas_data_line[str(date) + dog.replace(' ', '_')]['ha'] = 0 elif homeaway == 1: vegas_data_line[str(date) + dog.replace(' ', '_')]['ha'] = 1 vegas_data_ou[str(date) + dog.replace(' ', '_')]['streak'] = aou_streak[dog] vegas_data_line[str(date) + dog.replace(' ', '_')]['streak'] = atl_streak[dog] if favscore + dogscore > overunder: vegas_data_ou[str(date) + dog.replace(' ', '_')]['result'] = 1 else: vegas_data_ou[str(date) + dog.replace(' ', '_')]['result'] = -1 if favscore + line > dogscore: vegas_data_line[str(date) + dog.replace(' ', '_')]['result'] = -1 else: vegas_data_line[str(date) + dog.replace(' ', '_')]['result'] = 1 if dogscore + favscore < overunder: aou_data[fav].append(-1) aou_data[dog].append(-1) if aou_streak[fav] > 0: aou_streak[fav] = -1 else: aou_streak[fav] -= 1 if aou_streak[dog] > 0: aou_streak[dog] = -1 else: aou_streak[dog] -= 1 elif dogscore + favscore > overunder: aou_data[fav].append(1) aou_data[dog].append(1) if aou_streak[fav] < 0: aou_streak[fav] = 1 else: aou_streak[fav] += 1 if aou_streak[dog] < 0: aou_streak[dog] = 1 else: aou_streak[dog] += 1 if (favscore - dogscore) + line > 0: atl_data[fav].append(1) atl_data[dog].append(-1) if atl_streak[fav] < 0: atl_streak[fav] = 1 else: atl_streak[fav] += 1 if atl_streak[dog] > 0: atl_streak[dog] = -1 else: atl_streak[dog] -= 1 elif (favscore - dogscore) + line < 0: atl_data[fav].append(-1) atl_data[dog].append(1) if atl_streak[dog] < 0: atl_streak[dog] = 1 else: atl_streak[dog] += 1 if atl_streak[fav] > 0: atl_streak[fav] = -1 else: atl_streak[fav] -= 1 for source in (atl_data, aou_data): for tm in (fav, dog): if len(source[tm]) > 50: source[tm] = source[tm][-50:] if result == 'line': vegas_data_line = pd.DataFrame.from_dict(vegas_data_line) vegas_data_line = vegas_data_line.T return vegas_data_line if result == 'ou': vegas_data_ou = pd.DataFrame.from_dict(vegas_data_ou) vegas_data_ou = vegas_data_ou.T return vegas_data_ou
derived_folder = os.path.join(cur_path, 'derived_data') import pull_data import update_dbs import pandas as pd import numpy as np import log_tuning import lgclass_tuning import linsvc_tuning import knn_tuning import feature_lists import rbfsvc_tuning import polysvc_tuning import random train_index = pull_data.pull_train_index(update_dbs.mysql_client()) #cnx = update_dbs.mysql_client() random.seed(86) random.shuffle(train_index) derived_data = {} x_vals = 'points' y_val = '+pts' x_data_stable = pull_data.score(update_dbs.mysql_client()) x_cols = list(x_data_stable) x_cols.remove('+pts') x_cols.remove('+possessions') x_cols.remove('-possessions') y_data_stable = pull_data.pull_wl(update_dbs.mysql_client()) alldata = y_data_stable.join(x_data_stable, how = 'inner') y_data = alldata['outcome']
derived_folder = os.path.join(cur_path, 'derived_data') import pull_data import update_dbs import pandas as pd import numpy as np import log_tuning import lgclass_tuning import linsvc_tuning import knn_tuning import feature_lists import rbfsvc_tuning import polysvc_tuning import random train_index = pull_data.pull_train_index(update_dbs.mysql_client()) #cnx = update_dbs.mysql_client() random.seed(86) random.shuffle(train_index) derived_data = {} x_vals = 'points' y_val = 'line' x_data_stable = pull_data.score(update_dbs.mysql_client()) line_preds = pull_data.line_preds(update_dbs.mysql_client()) x_data_stable = x_data_stable.join(line_preds, how='inner') x_cols = list(x_data_stable) x_cols.remove('+pts') x_cols.remove('+possessions') x_cols.remove('-possessions') y_data = pull_data.line_wl(update_dbs.mysql_client())
cur_path = os.path.abspath(os.path.join(cur_path, os.pardir)) sys.path.insert(-1, os.path.join(cur_path, 'model_conf')) sys.path.insert(-1, os.path.join(cur_path, 'db_utils')) sys.path.insert(-1, os.path.join(cur_path, 'model_tuning')) model_storage = os.path.join(cur_path, 'saved_models') import pull_data import update_dbs import saved_models from sklearn.externals import joblib import vegas_watson import numpy as np from sklearn.decomposition import PCA, TruncatedSVD import pickle train_index = pull_data.pull_train_index(update_dbs.mysql_client()) def save(): for x_vals in ['line', 'ou']: y_val = 'result' print('Loading rolling betting stats') x_data_stable = vegas_watson.rolling_vegas(x_vals) print('... Loaded rolling betting stats') x_data_stable = x_data_stable.loc[x_data_stable.index.isin(train_index)] y_data = x_data_stable[[y_val]] x_cols = list(x_data_stable) x_cols.remove(y_val) x_data_stable = x_data_stable[x_cols] for model_name, model_details in saved_models.stored_models[y_val][x_vals].items(): if not os.path.isfile(os.path.join(model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))): print('Loading %s Values'%(model_name))
'20_game_avg_30_g_HAweight_for_defensive-rebounds-per-game', '-20_game_avg_50_g_Tweight_for_floor-percentage', '20_game_avg_10_g_HAweight_for_possessions-per-game', '-20_game_avg_50_g_Tweight_allow_points-per-game', '-100_g_HAspread_allow_assist--per--turnover-ratio', '-10_game_avg_10_g_HAweight_allow_points-per-game', '75_g_HAspread_allow_percent-of-points-from-3-pointers', '-15_g_HAspread_allow_block-pct', '-20_game_avg_25_g_Tweight_allow_possessions-per-game', '-10_game_avg_15_g_HAweight_allow_defensive-rebounds-per-game', '-20_game_avg_50_g_HAweight_allow_defensive-efficiency', '50_game_avg_50_g_HAweight_for_assists-per-game', '-30_game_avg_25_g_Tweight_allow_points-per-game', '-25_g_HAspread_allow_possessions-per-game' ] y_data = pull_data.ou_wl(update_dbs.mysql_client()) ou_preds = pull_data.ou_preds(update_dbs.mysql_client()) all_data = data.join(y_data, how='inner') all_data = all_data.join(ou_preds, how='inner') y_data = np.ravel(all_data[['ou']]) for pred in list(ou_preds): x_cols.append(pred) x_data_stable = all_data[x_cols] #import linsvc_tuning #import lgclass_tuning #import log_tuning #import knn_tuning x_vals = 'raw' y_val = 'ou'
def sklearn_preds(): raw_x = raw_data() x_score = pull_data.score(update_dbs.mysql_client()) y_wl = pull_data.pull_wl(update_dbs.mysql_client()) x_ou = pull_data.ou_preds(update_dbs.mysql_client()) y_ou = pull_data.ou_wl(update_dbs.mysql_client()) y_line = pull_data.line_wl(update_dbs.mysql_client()) x_line = pull_data.line_preds(update_dbs.mysql_client()) all_x_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner'), 'raw': raw_x.join(y_wl, how='inner'), }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner'), 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'), }, 'ou': { '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner'), 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner'), }, } all_y_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner')['outcome'], 'raw': raw_x.join(y_wl, how='inner')['outcome'], }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner')['line'], 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'], }, 'ou': { '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner')['ou'], 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'], }, } raw_x = None x_score = None y_wl = None x_ou = None y_ou = None y_line = None x_line = None random.seed(86) for sort in ['ou', 'winner', 'line']: outcomes = pd.DataFrame() # outcomes[sort] = np.ravel(all_y_data[sort]['raw']) outcomes['idx'] = list(all_y_data[sort]['raw'].index) outcomes = outcomes.set_index('idx') print('... starting %s' % (sort)) for kind in ['raw', '+pts']: print('... starting %s' % (kind)) for model_name, model_details in saved_models.stored_models[sort][ kind].items(): if model_name == 'keras': continue if os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (sort, kind, model_name))): print('Evaluating %s ' % (model_name)) model = joblib.load( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (sort, kind, model_name))) scale = joblib.load( os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (sort, kind, model_name))) preds = model.predict_proba( scale.transform( all_x_data[sort][kind][model_details['features']])) model_outcome = pd.DataFrame() winner = [] confidence = [] for game in preds: if game[0] > game[1]: winner.append(0) confidence.append(game[0]) else: winner.append(1) confidence.append(game[1]) # print('Accuracy: %s' % (accuracy_score(np.ravel(all_y_data[sort][kind]), winner))) # print('Log Loss: %s' % (log_loss(np.ravel(all_y_data[sort][kind]), preds))) model_outcome['idx'] = list(all_x_data[sort][kind][ model_details['features']].index) model_outcome['%s_%s_prediction' % (kind, model_name)] = winner model_outcome['%s_%s_confidence' % (kind, model_name)] = confidence model_outcome = model_outcome.set_index('idx') outcomes = outcomes.join(model_outcome, how='inner') print('Finished %s' % (kind)) print('Finished %s' % (sort)) outcomes.to_csv(os.path.join(output_folder, '%s_results.csv' % (sort)))
cur_path = os.getcwd() while cur_path.split('/')[-1] != 'bb_preds': cur_path = os.path.abspath(os.path.join(cur_path, os.pardir)) sys.path.insert(-1, os.path.join(cur_path, 'model_conf')) sys.path.insert(-1, os.path.join(cur_path, 'db_utils')) sys.path.insert(-1, os.path.join(cur_path, 'model_tuning')) output_folder = os.path.join(cur_path, 'model_results') model_storage = os.path.join(cur_path, 'saved_models') import pandas as pd import pull_data import update_dbs import numpy as np vegas_data = pull_data.pull_odds_data(update_dbs.mysql_client()) #stored_results = {} #for sort in ['ou', 'winner', 'line']: def moneyline_analysis(): ml_data = vegas_data[[ 'fav-ml', 'dog-ml', 'fav_idx', 'dog_idx', 'fav-score', 'dog-score' ]] ml_data = ml_data.dropna(how='any') vegas_target_1 = 'fav-ml' vegas_target_2 = 'dog-ml' print('------ vegas: money-line') data = pd.read_csv(os.path.join(output_folder, 'winner_results.csv')) data = data.set_index('idx')
def save(): def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat) patch_data = x[patch_stats] keep_data = x[keep_stats] cursor = cnx.cursor() query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;' cursor.execute(query) patch = pd.DataFrame(cursor.fetchall(), columns=['date', 't1', 't2', 'location']) cursor.close() loc_adj = {} for d, t1, t2, l in np.array(patch): if l == 0: loc_adj[str(d) + t1.replace(' ', '_')] = 1 loc_adj[str(d) + t2.replace(' ', '_')] = -1 else: loc_adj[str(d) + t1.replace(' ', '_')] = -1 loc_adj[str(d) + t2.replace(' ', '_')] = 1 patch = None patch_data = patch_data.join(pd.DataFrame.from_dict( list(loc_adj.items())).set_index(0), how='left') away_data = patch_data[patch_data[1] == -1] away_data *= -1 home_data = patch_data[patch_data[1] == 1] patch_data = home_data.append(away_data) del patch_data[1] x = patch_data.join(keep_data) print('...Completed HFA Patch') return x for y_val in ['pts_scored', 'pts_allowed']: for x_vals in [ 'defensive_stats', 'offensive_stats', 'full-team', 'all', 'possessions', 'target' ]: if x_vals in ['defensive_stats', 'offensive_stats' ] and y_val == 'pts_allowed': continue if x_vals in ['full-team', 'defensive_stats' ] and y_val == 'pts_scored': continue if x_vals == 'possessions': y_data = pull_data.pull_possessions(y_val, update_dbs.mysql_client()) x_data = pull_data.pull_model_features( y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['possessions'] x_data = x_data.join(y_data, how='inner')[list(x_data)] elif x_vals in [ 'target', 'defensive_stats', 'offensive_stats', 'full-team', 'all' ]: y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client()) if x_vals == 'full-team': def_data = pull_data.pull_model_features( y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features( y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [ i for i in list(off_data) if i not in list(def_data) ] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] off_data = None def_data = None elif x_vals == 'all': def_data = pull_data.pull_model_features( y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features( y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [ i for i in list(off_data) if i not in list(def_data) ] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features( y_val, 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features( y_val, 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None elif x_vals in ['target', 'defensive_stats', 'offensive_stats']: x_data = pull_data.pull_model_features( y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] for model_name, model_details in saved_models.stored_models[y_val][ x_vals].items(): if not os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))): print('Loading %s Values' % (model_name)) model = model_details['model'] scale = model_details['scale'] scale.fit(x_data[model_details['features']]) joblib.dump( scale, os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (y_val, x_vals, model_name))) model.fit( scale.transform(x_data[model_details['features']]), np.ravel(y_data)) joblib.dump( model, os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))) print('Stored %s' % (model_name))
def update(): for y_val in ['pts_scored', 'pts_allowed']: if y_val == 'pts_scored': train_index = pull_data.update_idx(update_dbs.mysql_client(), 'offensive_preds') if y_val == 'pts_allowed': train_index = pull_data.update_idx(update_dbs.mysql_client(), 'defensive_preds') update_df = pd.DataFrame() if len(train_index) == 0: continue update_df['idx'] = train_index update_df = update_df.set_index('idx') for x_vals in [ 'defensive_stats', 'offensive_stats', 'full-team', 'all', 'possessions', 'target' ]: if x_vals in ['defensive_stats', 'offensive_stats' ] and y_val == 'pts_allowed': continue if x_vals in ['full-team', 'defensive_stats' ] and y_val == 'pts_scored': continue if x_vals == 'possessions': y_data = pull_data.pull_possessions(y_val, update_dbs.mysql_client()) x_data = pull_data.pull_model_features( y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['possessions'] x_data = x_data.join(y_data, how='inner')[list(x_data)] elif x_vals in [ 'target', 'defensive_stats', 'offensive_stats', 'full-team', 'all' ]: y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client()) if x_vals == 'full-team': def_data = pull_data.pull_model_features( y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features( y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [ i for i in list(off_data) if i not in list(def_data) ] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] off_data = None def_data = None elif x_vals == 'all': def_data = pull_data.pull_model_features( y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features( y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [ i for i in list(off_data) if i not in list(def_data) ] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features( y_val, 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features( y_val, 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None elif x_vals in ['target', 'defensive_stats', 'offensive_stats']: x_data = pull_data.pull_model_features( y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] for model_name, model_details in saved_models.stored_models[y_val][ x_vals].items(): if os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))): print('Loading %s Values' % (model_name)) model = joblib.load( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))) scale = joblib.load( os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (y_val, x_vals, model_name))) preds = model.predict( scale.fit_transform(x_data[model_details['features']])) indy_pred = pd.DataFrame() if x_vals == 'offensive_stats': indy_pred[model_name + '_team'] = preds elif y_val == 'pts_allowed' and x_vals == 'full-team': indy_pred[model_name + '_team'] = preds else: indy_pred[model_name + '_' + x_vals] = preds indy_pred['idx'] = list(x_data.index) indy_pred = indy_pred.set_index('idx') update_df = update_df.join(indy_pred, how='inner') print('Loaded %s' % (model_name)) if y_val == 'pts_scored': update_df = update_df[[ 'lightgbm_team', 'linsvm_team', 'linsvm_all', 'ridge_all', 'lasso_possessions', 'lightgbm_possessions', 'linsvm_possessions', 'lightgbm_target', 'linsvm_target', 'ridge_target', 'lasso_target' ]] add_derived.update('offensive_preds', update_df) elif y_val == 'pts_allowed': update_df = update_df[[ 'lightgbm_all', 'ridge_all', 'lasso_team', 'lightgbm_team', 'linsvm_team', 'ridge_team', 'lasso_possessions', 'lightgbm_possessions', 'ridge_possessions', 'lasso_target', 'lightgbm_target' ]] add_derived.update('defensive_preds', update_df)
'30_g_HAspread_allow_free-throw-rate', '-100_g_HAspread_for_defensive-efficiency', 'pregame_turnovers-per-possession_for', '-50_g_HAspread_for_personal-fouls-per-game', '75_g_HAspread_for_defensive-efficiency', '100_g_HAspread_for_defensive-efficiency', '75_g_HAspread_allow_points-per-game', '-75_g_HAspread_allow_floor-percentage', '30_g_HAspread_for_floor-percentage', 'expected_ftm-per-100-possessions_for', '-75_g_HAspread_allow_defensive-efficiency', '-50_g_HAspread_allow_points-per-game`/`possessions-per-game', '-50_game_avg_30_g_Tweight_allow_fta-per-fga', '-50_g_HAspread_for_assist--per--turnover-ratio', '-10_g_HAspread_allow_ftm-per-100-possessions'] y_data = pull_data.pull_wl(update_dbs.mysql_client()) all_data = data.join(y_data, how = 'inner') y_data = np.ravel(all_data[['outcome']]) x_data_stable = all_data[x_cols] import linsvc_tuning import lgclass_tuning import log_tuning import knn_tuning x_vals = 'raw' y_val = 'winner' # x_data = x_data_stable result = lgclass_tuning.execute(y_val, x_vals, X_data = x_data, Y_data = y_data)
sys.path.insert(-1, os.path.join(cur_path, 'model_tuning')) output_folder = os.path.join(cur_path, 'model_results') features_folder = os.path.join(cur_path, 'feature_dumps') model_storage = os.path.join(cur_path, 'saved_models') import numpy as np import pull_data import update_dbs import random import saved_models import pandas as pd from keras.models import load_model from sklearn.externals import joblib validation_index = pull_data.pull_validation_index(update_dbs.mysql_client()) random.seed(86) def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat) patch_data = x[patch_stats]
import os, sys try: # if running in CLI cur_path = os.path.abspath(__file__) except NameError: # if running in IDE cur_path = os.getcwd() while cur_path.split('/')[-1] != 'bb_preds': cur_path = os.path.abspath(os.path.join(cur_path, os.pardir)) sys.path.insert(-1, os.path.join(cur_path, 'model_conf')) sys.path.insert(-1, os.path.join(cur_path, 'db_utils')) sys.path.insert(-1, os.path.join(cur_path, 'model_tuning')) derived_folder = os.path.join(cur_path, 'derived_data') import pandas as pd import update_dbs import numpy as np cnx = update_dbs.mysql_client() for name in ['offensive_preds', 'defensive_preds']: data = pd.read_csv('%s.csv' % (name)) cursor = cnx.cursor() insertlist = [] continuance = 0 for entry in np.array(data): insert = list(entry) idx = insert[0] date = '"' + idx[:10] + '"' tname = '"' + idx[10:].replace('_', ' ') + '"' insert = insert[1:] sql_insert = [] sql_insert.append(tname) sql_insert.append(date) for each in insert:
def keras_preds(): raw_x = raw_data() x_score = pull_data.score(update_dbs.mysql_client()) raw_x = raw_x.join(x_score, how='inner') line = pull_data.pull_odds_data(update_dbs.mysql_client()) idx = [] gameline = [] line_data = line[['fav_idx', 'dog_idx', 'line']] for fix, dix, ln in np.array(line_data): idx.append(fix) idx.append(dix) gameline.append(ln) gameline.append(ln * -1) linedata = pd.DataFrame() linedata['idx'] = idx linedata['vegas_line'] = gameline linedata = linedata.set_index('idx') idx = [] gameou = [] ou_data = line[['fav_idx', 'dog_idx', 'overunder']] for fix, dix, ou in np.array(ou_data): idx.append(fix) idx.append(dix) gameou.append(ou) gameou.append(ou * -1) oudata = pd.DataFrame() oudata['idx'] = idx oudata['vegas_ou'] = gameou oudata = oudata.set_index('idx') raw_x = raw_x.join(oudata, how='inner') raw_x = raw_x.join(linedata, how='inner') y_wl = pull_data.pull_wl(update_dbs.mysql_client()) x_ou = pull_data.ou_preds(update_dbs.mysql_client()) y_ou = pull_data.ou_wl(update_dbs.mysql_client()) y_line = pull_data.line_wl(update_dbs.mysql_client()) x_line = pull_data.line_preds(update_dbs.mysql_client()) all_x_data = { 'winner': { 'raw': raw_x.join(y_wl, how='inner') }, 'line': { 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner') }, 'ou': { 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner') }, } all_y_data = { 'winner': { 'raw': raw_x.join(y_wl, how='inner')['outcome'] }, 'line': { 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'] }, 'ou': { 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'] }, } raw_x = None x_score = None y_wl = None x_ou = None y_ou = None y_line = None x_line = None random.seed(86) for sort in ['keras']: print('... starting %s' % (sort)) for kind in ['winner', 'ou', 'line']: results = pd.read_csv( os.path.join(output_folder, '%s_results.csv' % (kind))) results = results.set_index('idx') print('... starting %s' % (kind)) X = all_x_data[kind]['raw'] save_index = list(X.index) X = X.reset_index() X = X[saved_models.stored_models[kind]['raw'][sort]['features']] Y = all_y_data[kind]['raw'] Y = Y.reset_index() if kind != 'winner': Y = Y[kind] else: Y = Y['outcome'] print('...loading %s' % (kind)) model = load_model( os.path.join(model_storage, '%s_%s_regression_model.h5' % (kind, sort))) scale = joblib.load( os.path.join(model_storage, '%s_%s_regression_scaler.pkl' % (kind, sort))) preds = model.predict(scale.transform(X)) winner = [] confidence = [] for game in preds: if game[0] < .5: winner.append(0) confidence.append(1 - game[0]) else: winner.append(1) confidence.append(game[0]) model_outcome = pd.DataFrame() model_outcome['idx'] = save_index model_outcome['raw_keras_prediction'] = winner model_outcome['raw_keras_confidence'] = confidence model_outcome = model_outcome.set_index('idx') results = results.join(model_outcome, how='inner') results.to_csv( os.path.join(output_folder, '%s_results.csv' % (kind))) print('Finished %s' % (kind)) print('Finished %s' % (sort))
'75_g_HAspread_for_defensive-efficiency', '1_game_avg_10_g_HAweight_for_points-per-game', '-50_game_avg_30_g_Tweight_allow_block-pct', '25_g_HAspread_for_possessions-per-game', '-5_game_avg_10_g_Tweight_allow_possessions-per-game', '100_g_HAspread_for_defensive-efficiency', '-10_game_avg_50_g_Tweight_for_assists-per-game', '-20_game_avg_15_g_Tweight_allow_extra-chances-per-game', 'pregame_ppp_for', '-expected_effective-field-goal-pct_allowed', '-5_game_avg_50_g_HAweight_allow_possessions-per-game', '-10_g_HAspread_allow_points-per-game`/`possessions-per-game', '-50_game_avg_15_g_Tweight_allow_blocks-per-game', '-50_game_avg_50_g_HAweight_for_offensive-rebounding-pct', '-20_game_avg_50_g_Tweight_for_block-pct'] y_data = pull_data.line_wl(update_dbs.mysql_client()) all_data = data.join(y_data, how = 'inner') line_preds = pull_data.line_preds(update_dbs.mysql_client()) all_data = all_data.join(line_preds, how = 'inner') y_data = np.ravel(all_data[['line']]) for pred in list(line_preds): x_cols.append(pred) x_data_stable = all_data[x_cols] import linsvc_tuning import lgclass_tuning #import log_tuning #import knn_tuning x_vals = 'raw' y_val = 'line'