def retrieve_data(): y_val = 'pts_scored' y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) def_data = pull_data.pull_model_features(y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features(y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features(y_val, 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features(y_val, 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] x_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None data = x_data.join(y_data, how='inner') data = data.reset_index() Y = data['pts'] x_feats = [ 'expected_pts_pg_for', '75_g_HAspread_for_floor-percentage', 'pregame_pts_pg_for', 'expected_poss_pg_for', 'expected_ppp_for', '50_game_avg_15_g_HAweight_allow_assist--per--turnover-ratio', '75_g_HAspread_allow_points-per-game', '100_g_HAspread_allow_block-pct', 'pregame_poss_pg_for', '10_game_avg_30_g_HAweight_allow_personal-foul-pct', 'expected_turnovers-per-possession_for', 'expected_offensive-rebounding-pct_for', '30_g_HAspread_for_floor-percentage', 'expected_ftm-per-100-possessions_for', 'expected_effective-field-goal-pct_for', 'pregame_effective-field-goal-pct_for', '100_g_HAspread_allow_assist--per--turnover-ratio', '30_g_HAspread_allow_floor-percentage', '10_game_avg_30_g_HAweight_allow_two-point-rate', '5_game_avg_50_g_HAweight_for_points-per-game`/`possessions-per-game', '10_game_avg_50_g_Tweight_for_effective-field-goal-pct', '30_game_avg_5_g_Tweight_for_points-per-game`/`possessions-per-game' ] X = data[x_feats] return X, Y
def raw_data(): def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_scored', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_scored', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) team_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_allowed', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_allowed', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') opponent_data = x_data.join(tar_data, how='inner') def_data = None off_data = None poss_data = None tar_data = None cnx = update_dbs.mysql_client() cursor = cnx.cursor() query = 'SELECT * from gamedata;' cursor.execute(query) switch = pd.DataFrame(cursor.fetchall(), columns=['teamname', 'date', 'opponent', 'location']) idx_switch = {} for t, d, o, l in np.array(switch): idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_') idx = [] for idxx in opponent_data.index: idx.append(idx_switch[idxx]) opponent_data['idx'] = idx opponent_data = opponent_data.set_index('idx') opponent_data *= -1 opponent_data = opponent_data.rename( columns={i: '-' + i for i in list(opponent_data)}) data = opponent_data.join(team_data) data = data.join(y_data, how='inner') data = data.replace([np.inf, -np.inf], np.nan) data = data.replace('NULL', np.nan) data = data.dropna(how='any') return data
sys.path.insert(-1, os.path.join(cur_path, 'model_conf')) sys.path.insert(-1, os.path.join(cur_path, 'db_utils')) sys.path.insert(-1, os.path.join(cur_path, 'model_tuning')) output_folder = os.path.join(cur_path, 'model_results') features_folder = os.path.join(cur_path, 'feature_dumps') model_storage = os.path.join(cur_path, 'saved_models') import numpy as np import pull_data import update_dbs import random import saved_models import pandas as pd from sklearn.model_selection import cross_validate, StratifiedKFold train_index = pull_data.pull_train_index(update_dbs.mysql_client()) random.seed(86) random.shuffle(train_index) derived_data = {} def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat)
def save(): train_index = pull_data.pull_train_index(update_dbs.mysql_client()) random.seed(86) random.shuffle(train_index) def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat) patch_data = x[patch_stats] keep_data = x[keep_stats] cursor = cnx.cursor() query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;' cursor.execute(query) patch = pd.DataFrame(cursor.fetchall(), columns=['date', 't1', 't2', 'location']) cursor.close() loc_adj = {} for d, t1, t2, l in np.array(patch): if l == 0: loc_adj[str(d) + t1.replace(' ', '_')] = 1 loc_adj[str(d) + t2.replace(' ', '_')] = -1 else: loc_adj[str(d) + t1.replace(' ', '_')] = -1 loc_adj[str(d) + t2.replace(' ', '_')] = 1 patch = None patch_data = patch_data.join(pd.DataFrame.from_dict( list(loc_adj.items())).set_index(0), how='left') away_data = patch_data[patch_data[1] == -1] away_data *= -1 home_data = patch_data[patch_data[1] == 1] patch_data = home_data.append(away_data) del patch_data[1] x = patch_data.join(keep_data) print('Completed HFA Patch') return x def raw_data(): def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_scored', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_scored', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) team_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_allowed', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_allowed', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') opponent_data = x_data.join(tar_data, how='inner') def_data = None off_data = None poss_data = None tar_data = None cnx = update_dbs.mysql_client() cursor = cnx.cursor() query = 'SELECT * from gamedata;' cursor.execute(query) switch = pd.DataFrame( cursor.fetchall(), columns=['teamname', 'date', 'opponent', 'location']) idx_switch = {} for t, d, o, l in np.array(switch): idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_') idx = [] for idxx in opponent_data.index: idx.append(idx_switch[idxx]) opponent_data['idx'] = idx opponent_data = opponent_data.set_index('idx') opponent_data *= -1 opponent_data = opponent_data.rename( columns={i: '-' + i for i in list(opponent_data)}) data = opponent_data.join(team_data) data = data.join(y_data, how='inner') data = data.replace([np.inf, -np.inf], np.nan) data = data.replace('NULL', np.nan) data = data.dropna(how='any') return data raw_x = raw_data() x_score = pull_data.score(update_dbs.mysql_client()) y_wl = pull_data.pull_wl(update_dbs.mysql_client()) x_ou = pull_data.ou_preds(update_dbs.mysql_client()) y_ou = pull_data.ou_wl(update_dbs.mysql_client()) y_line = pull_data.line_wl(update_dbs.mysql_client()) x_line = pull_data.line_preds(update_dbs.mysql_client()) all_x_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner'), 'raw': raw_x.join(y_wl, how='inner'), }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner'), 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'), }, 'ou': { '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner'), 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner'), }, } all_y_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner')['outcome'], 'raw': raw_x.join(y_wl, how='inner')['outcome'], }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner')['line'], 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'], }, 'ou': { '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner')['ou'], 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'], }, } raw_x = None x_score = None y_wl = None x_ou = None y_ou = None y_line = None x_line = None random.seed(86) for sort in ['ou', 'winner', 'line']: print('... starting %s' % (sort)) for kind in ['raw', '+pts']: print('... starting %s' % (kind)) for model_name, model_details in saved_models.stored_models[sort][ kind].items(): if model_name == 'keras': continue if not os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (sort, kind, model_name))): print('...storing %s' % (model_name)) model = model_details['model'] scale = model_details['scale'] scale.fit( all_x_data[sort][kind][model_details['features']]) joblib.dump( scale, os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (sort, kind, model_name))) model.fit( scale.transform( all_x_data[sort][kind][model_details['features']]), np.ravel(all_y_data[sort][kind])) joblib.dump( model, os.path.join( model_storage, '%s_%s_%s_model.pkl' % (sort, kind, model_name))) print('Stored %s' % (model_name)) print('Finished %s' % (kind)) print('Finished %s' % (sort))
def save(): train_index = pull_data.pull_train_index(update_dbs.mysql_client()) for x_vals in ['offense', 'defense']: for y_val in ['pace', 'ppp']: if y_val == 'ppp': data = pull_data.ppp(update_dbs.mysql_client(), x_vals) y_data = data[[y_val]] x_feats = list(data) x_feats.remove(y_val) x_data = data[x_feats] data = x_data.join(y_data, how='inner') data = data.loc[data.index.isin(train_index)] x_data = data[x_feats] y_data = data[[y_val]] elif y_val == 'pace': data = pull_data.pace(update_dbs.mysql_client(), x_vals) y_data = data[['possessions']] x_feats = list(data) x_feats.remove('possessions') x_data = data[x_feats] data = x_data.join(y_data, how='inner') data = data.loc[data.index.isin(train_index)] x_data = data[x_feats] y_data = data[['possessions']] if not os.path.isfile( os.path.join( model_storage, '%s_%s_regression_model.pkl' % (y_val, x_vals))): print('Loading %s_%s' % (x_vals, y_val)) model = saved_models.stored_models[x_vals][y_val]['model'] scale = saved_models.stored_models[x_vals][y_val]['scale'] scale.fit(x_data[saved_models.stored_models[x_vals][y_val] ['features']]) joblib.dump( scale, os.path.join( model_storage, '%s_%s_regression_scaler.pkl' % (y_val, x_vals))) model.fit( scale.transform(x_data[saved_models.stored_models[x_vals] [y_val]['features']]), np.ravel(y_data)) joblib.dump( model, os.path.join( model_storage, '%s_%s_regression_model.pkl' % (y_val, x_vals))) print('Stored %s_%s' % (x_vals, y_val)) def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat) patch_data = x[patch_stats] keep_data = x[keep_stats] cursor = cnx.cursor() query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;' cursor.execute(query) patch = pd.DataFrame(cursor.fetchall(), columns=['date', 't1', 't2', 'location']) cursor.close() loc_adj = {} for d, t1, t2, l in np.array(patch): if l == 0: loc_adj[str(d) + t1.replace(' ', '_')] = 1 loc_adj[str(d) + t2.replace(' ', '_')] = -1 else: loc_adj[str(d) + t1.replace(' ', '_')] = -1 loc_adj[str(d) + t2.replace(' ', '_')] = 1 patch = None patch_data = patch_data.join(pd.DataFrame.from_dict( list(loc_adj.items())).set_index(0), how='left') away_data = patch_data[patch_data[1] == -1] away_data *= -1 home_data = patch_data[patch_data[1] == 1] patch_data = home_data.append(away_data) del patch_data[1] x = patch_data.join(keep_data) print('Completed HFA Patch') return x def raw_data(): def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_scored', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_scored', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) team_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_allowed', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_allowed', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') opponent_data = x_data.join(tar_data, how='inner') def_data = None off_data = None poss_data = None tar_data = None cnx = update_dbs.mysql_client() cursor = cnx.cursor() query = 'SELECT * from gamedata;' cursor.execute(query) switch = pd.DataFrame( cursor.fetchall(), columns=['teamname', 'date', 'opponent', 'location']) idx_switch = {} for t, d, o, l in np.array(switch): idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_') idx = [] for idxx in opponent_data.index: idx.append(idx_switch[idxx]) opponent_data['idx'] = idx opponent_data = opponent_data.set_index('idx') opponent_data *= -1 opponent_data = opponent_data.rename( columns={i: '-' + i for i in list(opponent_data)}) data = opponent_data.join(team_data) data = data.join(y_data, how='inner') data = data.replace([np.inf, -np.inf], np.nan) data = data.replace('NULL', np.nan) data = data.dropna(how='any') return data data = raw_data() x_data_stable = pull_data.share(update_dbs.mysql_client()) data = data.join(x_data_stable, how='inner') data = data.reset_index() x_vals = 'share' for y_val in ['+pts', 'keras']: if not os.path.isfile( os.path.join(model_storage, '%s_%s_regression_model.pkl' % (x_vals, y_val))) and not os.path.isfile( os.path.join( model_storage, '%s_%s_regression_model.h5' % (x_vals, y_val))): print('Loading %s_%s' % (x_vals, y_val)) model = saved_models.stored_models[x_vals][y_val]['model'] scale = saved_models.stored_models[x_vals][y_val]['scale'] scale.fit( data[saved_models.stored_models[x_vals][y_val]['features']]) joblib.dump( scale, os.path.join(model_storage, '%s_%s_regression_scaler.pkl' % (y_val, x_vals))) model.fit( scale.transform(data[saved_models.stored_models[x_vals][y_val] ['features']]), np.ravel(data['share'])) if y_val != 'keras': joblib.dump( model, os.path.join( model_storage, '%s_%s_regression_model.pkl' % (y_val, x_vals))) else: model.model.save( os.path.join(model_storage, '%s_%s_regression_model.h5' % (y_val, x_vals))) print('Stored %s_%s' % (x_vals, y_val))
def save(): def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat) patch_data = x[patch_stats] keep_data = x[keep_stats] cursor = cnx.cursor() query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;' cursor.execute(query) patch = pd.DataFrame(cursor.fetchall(), columns=['date', 't1', 't2', 'location']) cursor.close() loc_adj = {} for d, t1, t2, l in np.array(patch): if l == 0: loc_adj[str(d) + t1.replace(' ', '_')] = 1 loc_adj[str(d) + t2.replace(' ', '_')] = -1 else: loc_adj[str(d) + t1.replace(' ', '_')] = -1 loc_adj[str(d) + t2.replace(' ', '_')] = 1 patch = None patch_data = patch_data.join(pd.DataFrame.from_dict( list(loc_adj.items())).set_index(0), how='left') away_data = patch_data[patch_data[1] == -1] away_data *= -1 home_data = patch_data[patch_data[1] == 1] patch_data = home_data.append(away_data) del patch_data[1] x = patch_data.join(keep_data) print('...Completed HFA Patch') return x for y_val in ['pts_scored', 'pts_allowed']: for x_vals in [ 'defensive_stats', 'offensive_stats', 'full-team', 'all', 'possessions', 'target' ]: if x_vals in ['defensive_stats', 'offensive_stats' ] and y_val == 'pts_allowed': continue if x_vals in ['full-team', 'defensive_stats' ] and y_val == 'pts_scored': continue if x_vals == 'possessions': y_data = pull_data.pull_possessions(y_val, update_dbs.mysql_client()) x_data = pull_data.pull_model_features( y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['possessions'] x_data = x_data.join(y_data, how='inner')[list(x_data)] elif x_vals in [ 'target', 'defensive_stats', 'offensive_stats', 'full-team', 'all' ]: y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client()) if x_vals == 'full-team': def_data = pull_data.pull_model_features( y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features( y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [ i for i in list(off_data) if i not in list(def_data) ] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] off_data = None def_data = None elif x_vals == 'all': def_data = pull_data.pull_model_features( y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features( y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [ i for i in list(off_data) if i not in list(def_data) ] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features( y_val, 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features( y_val, 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None elif x_vals in ['target', 'defensive_stats', 'offensive_stats']: x_data = pull_data.pull_model_features( y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] for model_name, model_details in saved_models.stored_models[y_val][ x_vals].items(): if not os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))): print('Loading %s Values' % (model_name)) model = model_details['model'] scale = model_details['scale'] scale.fit(x_data[model_details['features']]) joblib.dump( scale, os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (y_val, x_vals, model_name))) model.fit( scale.transform(x_data[model_details['features']]), np.ravel(y_data)) joblib.dump( model, os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))) print('Stored %s' % (model_name))