def retrieve_data():
    y_val = 'pts_scored'
    y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
    def_data = pull_data.pull_model_features(y_val, 'defensive_stats',
                                             update_dbs.mongodb_client)
    def_data = hfa_patch(def_data, update_dbs.mysql_client())
    off_data = pull_data.pull_model_features(y_val, 'offensive_stats',
                                             update_dbs.mongodb_client)
    off_feats = [i for i in list(off_data) if i not in list(def_data)]
    off_data = off_data[off_feats]
    off_data = hfa_patch(off_data, update_dbs.mysql_client())
    poss_data = pull_data.pull_model_features(y_val, 'possessions',
                                              update_dbs.mongodb_client)
    poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
    tar_data = pull_data.pull_model_features(y_val, 'target',
                                             update_dbs.mongodb_client)
    tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
    x_data = def_data.join(off_data, how='inner')
    x_data = x_data.join(poss_data, how='inner')
    x_data = x_data.join(tar_data, how='inner')
    train_index = pull_data.pull_train_index(update_dbs.mysql_client())
    x_data = x_data.loc[x_data.index.isin(train_index)]
    x_data = x_data.join(y_data, how='inner')[list(x_data)]
    def_data = None
    off_data = None
    poss_data = None
    tar_data = None
    data = x_data.join(y_data, how='inner')
    data = data.reset_index()
    Y = data['pts']
    x_feats = [
        'expected_pts_pg_for', '75_g_HAspread_for_floor-percentage',
        'pregame_pts_pg_for', 'expected_poss_pg_for', 'expected_ppp_for',
        '50_game_avg_15_g_HAweight_allow_assist--per--turnover-ratio',
        '75_g_HAspread_allow_points-per-game',
        '100_g_HAspread_allow_block-pct', 'pregame_poss_pg_for',
        '10_game_avg_30_g_HAweight_allow_personal-foul-pct',
        'expected_turnovers-per-possession_for',
        'expected_offensive-rebounding-pct_for',
        '30_g_HAspread_for_floor-percentage',
        'expected_ftm-per-100-possessions_for',
        'expected_effective-field-goal-pct_for',
        'pregame_effective-field-goal-pct_for',
        '100_g_HAspread_allow_assist--per--turnover-ratio',
        '30_g_HAspread_allow_floor-percentage',
        '10_game_avg_30_g_HAweight_allow_two-point-rate',
        '5_game_avg_50_g_HAweight_for_points-per-game`/`possessions-per-game',
        '10_game_avg_50_g_Tweight_for_effective-field-goal-pct',
        '30_game_avg_5_g_Tweight_for_points-per-game`/`possessions-per-game'
    ]
    X = data[x_feats]
    return X, Y
Пример #2
0
def raw_data():
    def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats',
                                             update_dbs.mongodb_client)
    def_data = hfa_patch(def_data, update_dbs.mysql_client())
    off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats',
                                             update_dbs.mongodb_client)
    off_feats = [i for i in list(off_data) if i not in list(def_data)]
    off_data = off_data[off_feats]
    off_data = hfa_patch(off_data, update_dbs.mysql_client())
    poss_data = pull_data.pull_model_features('pts_scored', 'possessions',
                                              update_dbs.mongodb_client)
    poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
    tar_data = pull_data.pull_model_features('pts_scored', 'target',
                                             update_dbs.mongodb_client)
    tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
    x_data = def_data.join(off_data, how='inner')
    x_data = x_data.join(poss_data, how='inner')
    x_data = x_data.join(tar_data, how='inner')
    train_index = pull_data.pull_train_index(update_dbs.mysql_client())
    x_data = x_data.loc[x_data.index.isin(train_index)]
    y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
    team_data = x_data.join(y_data, how='inner')[list(x_data)]
    def_data = None
    off_data = None
    poss_data = None
    tar_data = None

    def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats',
                                             update_dbs.mongodb_client)
    def_data = hfa_patch(def_data, update_dbs.mysql_client())
    off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats',
                                             update_dbs.mongodb_client)
    off_feats = [i for i in list(off_data) if i not in list(def_data)]
    off_data = off_data[off_feats]
    off_data = hfa_patch(off_data, update_dbs.mysql_client())
    poss_data = pull_data.pull_model_features('pts_allowed', 'possessions',
                                              update_dbs.mongodb_client)
    poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
    tar_data = pull_data.pull_model_features('pts_allowed', 'target',
                                             update_dbs.mongodb_client)
    tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
    x_data = def_data.join(off_data, how='inner')
    x_data = x_data.join(poss_data, how='inner')
    opponent_data = x_data.join(tar_data, how='inner')
    def_data = None
    off_data = None
    poss_data = None
    tar_data = None

    cnx = update_dbs.mysql_client()
    cursor = cnx.cursor()
    query = 'SELECT * from gamedata;'
    cursor.execute(query)
    switch = pd.DataFrame(cursor.fetchall(),
                          columns=['teamname', 'date', 'opponent', 'location'])
    idx_switch = {}
    for t, d, o, l in np.array(switch):
        idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_')
    idx = []
    for idxx in opponent_data.index:
        idx.append(idx_switch[idxx])
    opponent_data['idx'] = idx
    opponent_data = opponent_data.set_index('idx')
    opponent_data *= -1
    opponent_data = opponent_data.rename(
        columns={i: '-' + i
                 for i in list(opponent_data)})
    data = opponent_data.join(team_data)
    data = data.join(y_data, how='inner')
    data = data.replace([np.inf, -np.inf], np.nan)
    data = data.replace('NULL', np.nan)
    data = data.dropna(how='any')
    return data
Пример #3
0
sys.path.insert(-1, os.path.join(cur_path, 'model_conf'))
sys.path.insert(-1, os.path.join(cur_path, 'db_utils'))
sys.path.insert(-1, os.path.join(cur_path, 'model_tuning'))

output_folder = os.path.join(cur_path, 'model_results')
features_folder = os.path.join(cur_path, 'feature_dumps')
model_storage = os.path.join(cur_path, 'saved_models')

import numpy as np
import pull_data
import update_dbs
import random
import saved_models
import pandas as pd
from sklearn.model_selection import cross_validate, StratifiedKFold
train_index = pull_data.pull_train_index(update_dbs.mysql_client())
random.seed(86)
random.shuffle(train_index)
derived_data = {}


def hfa_patch(x, cnx):
    print('Running HFA Patch')
    keep_stats = []
    patch_stats = []
    for stat in list(x):
        try:
            stat.split('_HAspread_')[1]
            patch_stats.append(stat)
        except IndexError:
            keep_stats.append(stat)
Пример #4
0
def save():
    train_index = pull_data.pull_train_index(update_dbs.mysql_client())
    random.seed(86)
    random.shuffle(train_index)

    def hfa_patch(x, cnx):
        print('Running HFA Patch')
        keep_stats = []
        patch_stats = []
        for stat in list(x):
            try:
                stat.split('_HAspread_')[1]
                patch_stats.append(stat)
            except IndexError:
                keep_stats.append(stat)

        patch_data = x[patch_stats]
        keep_data = x[keep_stats]
        cursor = cnx.cursor()
        query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;'
        cursor.execute(query)
        patch = pd.DataFrame(cursor.fetchall(),
                             columns=['date', 't1', 't2', 'location'])
        cursor.close()

        loc_adj = {}
        for d, t1, t2, l in np.array(patch):
            if l == 0:
                loc_adj[str(d) + t1.replace(' ', '_')] = 1
                loc_adj[str(d) + t2.replace(' ', '_')] = -1
            else:
                loc_adj[str(d) + t1.replace(' ', '_')] = -1
                loc_adj[str(d) + t2.replace(' ', '_')] = 1
        patch = None

        patch_data = patch_data.join(pd.DataFrame.from_dict(
            list(loc_adj.items())).set_index(0),
                                     how='left')
        away_data = patch_data[patch_data[1] == -1]
        away_data *= -1
        home_data = patch_data[patch_data[1] == 1]
        patch_data = home_data.append(away_data)
        del patch_data[1]
        x = patch_data.join(keep_data)
        print('Completed HFA Patch')
        return x

    def raw_data():
        def_data = pull_data.pull_model_features('pts_scored',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_scored',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_scored', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_scored', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        x_data = x_data.join(tar_data, how='inner')
        train_index = pull_data.pull_train_index(update_dbs.mysql_client())
        x_data = x_data.loc[x_data.index.isin(train_index)]
        y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
        team_data = x_data.join(y_data, how='inner')[list(x_data)]
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        def_data = pull_data.pull_model_features('pts_allowed',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_allowed',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_allowed', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_allowed', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        opponent_data = x_data.join(tar_data, how='inner')
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        cnx = update_dbs.mysql_client()
        cursor = cnx.cursor()
        query = 'SELECT * from gamedata;'
        cursor.execute(query)
        switch = pd.DataFrame(
            cursor.fetchall(),
            columns=['teamname', 'date', 'opponent', 'location'])
        idx_switch = {}
        for t, d, o, l in np.array(switch):
            idx_switch[str(d) +
                       t.replace(' ', '_')] = str(d) + o.replace(' ', '_')
        idx = []
        for idxx in opponent_data.index:
            idx.append(idx_switch[idxx])
        opponent_data['idx'] = idx
        opponent_data = opponent_data.set_index('idx')
        opponent_data *= -1
        opponent_data = opponent_data.rename(
            columns={i: '-' + i
                     for i in list(opponent_data)})
        data = opponent_data.join(team_data)
        data = data.join(y_data, how='inner')
        data = data.replace([np.inf, -np.inf], np.nan)
        data = data.replace('NULL', np.nan)
        data = data.dropna(how='any')
        return data

    raw_x = raw_data()
    x_score = pull_data.score(update_dbs.mysql_client())
    y_wl = pull_data.pull_wl(update_dbs.mysql_client())
    x_ou = pull_data.ou_preds(update_dbs.mysql_client())
    y_ou = pull_data.ou_wl(update_dbs.mysql_client())
    y_line = pull_data.line_wl(update_dbs.mysql_client())
    x_line = pull_data.line_preds(update_dbs.mysql_client())

    all_x_data = {
        'winner': {
            '+pts': x_score.join(y_wl, how='inner'),
            'raw': raw_x.join(y_wl, how='inner'),
        },
        'line': {
            '+pts': x_score.join(y_line, how='inner').join(x_line,
                                                           how='inner'),
            'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'),
        },
        'ou': {
            '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner'),
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner'),
        },
    }

    all_y_data = {
        'winner': {
            '+pts': x_score.join(y_wl, how='inner')['outcome'],
            'raw': raw_x.join(y_wl, how='inner')['outcome'],
        },
        'line': {
            '+pts':
            x_score.join(y_line, how='inner').join(x_line,
                                                   how='inner')['line'],
            'raw':
            raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'],
        },
        'ou': {
            '+pts': x_score.join(y_ou, how='inner').join(x_ou,
                                                         how='inner')['ou'],
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'],
        },
    }

    raw_x = None
    x_score = None
    y_wl = None
    x_ou = None
    y_ou = None
    y_line = None
    x_line = None
    random.seed(86)

    for sort in ['ou', 'winner', 'line']:
        print('... starting %s' % (sort))
        for kind in ['raw', '+pts']:
            print('... starting %s' % (kind))
            for model_name, model_details in saved_models.stored_models[sort][
                    kind].items():
                if model_name == 'keras':
                    continue
                if not os.path.isfile(
                        os.path.join(
                            model_storage, '%s_%s_%s_model.pkl' %
                            (sort, kind, model_name))):
                    print('...storing %s' % (model_name))

                    model = model_details['model']
                    scale = model_details['scale']

                    scale.fit(
                        all_x_data[sort][kind][model_details['features']])
                    joblib.dump(
                        scale,
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_scaler.pkl' % (sort, kind, model_name)))

                    model.fit(
                        scale.transform(
                            all_x_data[sort][kind][model_details['features']]),
                        np.ravel(all_y_data[sort][kind]))
                    joblib.dump(
                        model,
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_model.pkl' % (sort, kind, model_name)))

                    print('Stored %s' % (model_name))
            print('Finished %s' % (kind))
        print('Finished %s' % (sort))
Пример #5
0
def save():
    train_index = pull_data.pull_train_index(update_dbs.mysql_client())
    for x_vals in ['offense', 'defense']:
        for y_val in ['pace', 'ppp']:
            if y_val == 'ppp':
                data = pull_data.ppp(update_dbs.mysql_client(), x_vals)
                y_data = data[[y_val]]
                x_feats = list(data)
                x_feats.remove(y_val)
                x_data = data[x_feats]
                data = x_data.join(y_data, how='inner')
                data = data.loc[data.index.isin(train_index)]
                x_data = data[x_feats]
                y_data = data[[y_val]]
            elif y_val == 'pace':
                data = pull_data.pace(update_dbs.mysql_client(), x_vals)
                y_data = data[['possessions']]
                x_feats = list(data)
                x_feats.remove('possessions')
                x_data = data[x_feats]
                data = x_data.join(y_data, how='inner')
                data = data.loc[data.index.isin(train_index)]
                x_data = data[x_feats]
                y_data = data[['possessions']]

            if not os.path.isfile(
                    os.path.join(
                        model_storage, '%s_%s_regression_model.pkl' %
                        (y_val, x_vals))):
                print('Loading %s_%s' % (x_vals, y_val))
                model = saved_models.stored_models[x_vals][y_val]['model']
                scale = saved_models.stored_models[x_vals][y_val]['scale']
                scale.fit(x_data[saved_models.stored_models[x_vals][y_val]
                                 ['features']])
                joblib.dump(
                    scale,
                    os.path.join(
                        model_storage,
                        '%s_%s_regression_scaler.pkl' % (y_val, x_vals)))
                model.fit(
                    scale.transform(x_data[saved_models.stored_models[x_vals]
                                           [y_val]['features']]),
                    np.ravel(y_data))
                joblib.dump(
                    model,
                    os.path.join(
                        model_storage,
                        '%s_%s_regression_model.pkl' % (y_val, x_vals)))
                print('Stored %s_%s' % (x_vals, y_val))

    def hfa_patch(x, cnx):
        print('Running HFA Patch')
        keep_stats = []
        patch_stats = []
        for stat in list(x):
            try:
                stat.split('_HAspread_')[1]
                patch_stats.append(stat)
            except IndexError:
                keep_stats.append(stat)

        patch_data = x[patch_stats]
        keep_data = x[keep_stats]
        cursor = cnx.cursor()
        query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;'
        cursor.execute(query)
        patch = pd.DataFrame(cursor.fetchall(),
                             columns=['date', 't1', 't2', 'location'])
        cursor.close()

        loc_adj = {}
        for d, t1, t2, l in np.array(patch):
            if l == 0:
                loc_adj[str(d) + t1.replace(' ', '_')] = 1
                loc_adj[str(d) + t2.replace(' ', '_')] = -1
            else:
                loc_adj[str(d) + t1.replace(' ', '_')] = -1
                loc_adj[str(d) + t2.replace(' ', '_')] = 1
        patch = None

        patch_data = patch_data.join(pd.DataFrame.from_dict(
            list(loc_adj.items())).set_index(0),
                                     how='left')
        away_data = patch_data[patch_data[1] == -1]
        away_data *= -1
        home_data = patch_data[patch_data[1] == 1]
        patch_data = home_data.append(away_data)
        del patch_data[1]
        x = patch_data.join(keep_data)
        print('Completed HFA Patch')
        return x

    def raw_data():
        def_data = pull_data.pull_model_features('pts_scored',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_scored',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_scored', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_scored', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        x_data = x_data.join(tar_data, how='inner')
        train_index = pull_data.pull_train_index(update_dbs.mysql_client())
        x_data = x_data.loc[x_data.index.isin(train_index)]
        y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
        team_data = x_data.join(y_data, how='inner')[list(x_data)]
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        def_data = pull_data.pull_model_features('pts_allowed',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_allowed',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_allowed', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_allowed', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        opponent_data = x_data.join(tar_data, how='inner')
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        cnx = update_dbs.mysql_client()
        cursor = cnx.cursor()
        query = 'SELECT * from gamedata;'
        cursor.execute(query)
        switch = pd.DataFrame(
            cursor.fetchall(),
            columns=['teamname', 'date', 'opponent', 'location'])
        idx_switch = {}
        for t, d, o, l in np.array(switch):
            idx_switch[str(d) +
                       t.replace(' ', '_')] = str(d) + o.replace(' ', '_')
        idx = []
        for idxx in opponent_data.index:
            idx.append(idx_switch[idxx])
        opponent_data['idx'] = idx
        opponent_data = opponent_data.set_index('idx')
        opponent_data *= -1
        opponent_data = opponent_data.rename(
            columns={i: '-' + i
                     for i in list(opponent_data)})
        data = opponent_data.join(team_data)
        data = data.join(y_data, how='inner')
        data = data.replace([np.inf, -np.inf], np.nan)
        data = data.replace('NULL', np.nan)
        data = data.dropna(how='any')
        return data

    data = raw_data()
    x_data_stable = pull_data.share(update_dbs.mysql_client())
    data = data.join(x_data_stable, how='inner')
    data = data.reset_index()
    x_vals = 'share'
    for y_val in ['+pts', 'keras']:
        if not os.path.isfile(
                os.path.join(model_storage, '%s_%s_regression_model.pkl' %
                             (x_vals, y_val))) and not os.path.isfile(
                                 os.path.join(
                                     model_storage,
                                     '%s_%s_regression_model.h5' %
                                     (x_vals, y_val))):
            print('Loading %s_%s' % (x_vals, y_val))

            model = saved_models.stored_models[x_vals][y_val]['model']
            scale = saved_models.stored_models[x_vals][y_val]['scale']

            scale.fit(
                data[saved_models.stored_models[x_vals][y_val]['features']])
            joblib.dump(
                scale,
                os.path.join(model_storage,
                             '%s_%s_regression_scaler.pkl' % (y_val, x_vals)))
            model.fit(
                scale.transform(data[saved_models.stored_models[x_vals][y_val]
                                     ['features']]), np.ravel(data['share']))
            if y_val != 'keras':
                joblib.dump(
                    model,
                    os.path.join(
                        model_storage,
                        '%s_%s_regression_model.pkl' % (y_val, x_vals)))
            else:
                model.model.save(
                    os.path.join(model_storage, '%s_%s_regression_model.h5' %
                                 (y_val, x_vals)))

            print('Stored %s_%s' % (x_vals, y_val))
Пример #6
0
def save():
    def hfa_patch(x, cnx):
        print('Running HFA Patch')
        keep_stats = []
        patch_stats = []
        for stat in list(x):
            try:
                stat.split('_HAspread_')[1]
                patch_stats.append(stat)
            except IndexError:
                keep_stats.append(stat)

        patch_data = x[patch_stats]
        keep_data = x[keep_stats]
        cursor = cnx.cursor()
        query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;'
        cursor.execute(query)
        patch = pd.DataFrame(cursor.fetchall(),
                             columns=['date', 't1', 't2', 'location'])
        cursor.close()

        loc_adj = {}
        for d, t1, t2, l in np.array(patch):
            if l == 0:
                loc_adj[str(d) + t1.replace(' ', '_')] = 1
                loc_adj[str(d) + t2.replace(' ', '_')] = -1
            else:
                loc_adj[str(d) + t1.replace(' ', '_')] = -1
                loc_adj[str(d) + t2.replace(' ', '_')] = 1
        patch = None

        patch_data = patch_data.join(pd.DataFrame.from_dict(
            list(loc_adj.items())).set_index(0),
                                     how='left')
        away_data = patch_data[patch_data[1] == -1]
        away_data *= -1
        home_data = patch_data[patch_data[1] == 1]
        patch_data = home_data.append(away_data)
        del patch_data[1]
        x = patch_data.join(keep_data)
        print('...Completed HFA Patch')
        return x

    for y_val in ['pts_scored', 'pts_allowed']:
        for x_vals in [
                'defensive_stats', 'offensive_stats', 'full-team', 'all',
                'possessions', 'target'
        ]:
            if x_vals in ['defensive_stats', 'offensive_stats'
                          ] and y_val == 'pts_allowed':
                continue
            if x_vals in ['full-team', 'defensive_stats'
                          ] and y_val == 'pts_scored':
                continue
            if x_vals == 'possessions':
                y_data = pull_data.pull_possessions(y_val,
                                                    update_dbs.mysql_client())
                x_data = pull_data.pull_model_features(
                    y_val, x_vals, update_dbs.mongodb_client)
                x_data = hfa_patch(x_data, update_dbs.mysql_client())
                train_index = pull_data.pull_train_index(
                    update_dbs.mysql_client())
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['possessions']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]

            elif x_vals in [
                    'target', 'defensive_stats', 'offensive_stats',
                    'full-team', 'all'
            ]:
                y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client())

            if x_vals == 'full-team':
                def_data = pull_data.pull_model_features(
                    y_val, 'defensive_stats', update_dbs.mongodb_client)
                def_data = hfa_patch(def_data, update_dbs.mysql_client())
                off_data = pull_data.pull_model_features(
                    y_val, 'offensive_stats', update_dbs.mongodb_client)
                off_feats = [
                    i for i in list(off_data) if i not in list(def_data)
                ]
                off_data = off_data[off_feats]
                off_data = hfa_patch(off_data, update_dbs.mysql_client())
                x_data = def_data.join(off_data, how='inner')
                train_index = pull_data.pull_train_index(
                    update_dbs.mysql_client())
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['ppp']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]
                off_data = None
                def_data = None

            elif x_vals == 'all':
                def_data = pull_data.pull_model_features(
                    y_val, 'defensive_stats', update_dbs.mongodb_client)
                def_data = hfa_patch(def_data, update_dbs.mysql_client())
                off_data = pull_data.pull_model_features(
                    y_val, 'offensive_stats', update_dbs.mongodb_client)
                off_feats = [
                    i for i in list(off_data) if i not in list(def_data)
                ]
                off_data = off_data[off_feats]
                off_data = hfa_patch(off_data, update_dbs.mysql_client())
                poss_data = pull_data.pull_model_features(
                    y_val, 'possessions', update_dbs.mongodb_client)
                poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
                tar_data = pull_data.pull_model_features(
                    y_val, 'target', update_dbs.mongodb_client)
                tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
                x_data = def_data.join(off_data, how='inner')
                x_data = x_data.join(poss_data, how='inner')
                x_data = x_data.join(tar_data, how='inner')
                train_index = pull_data.pull_train_index(
                    update_dbs.mysql_client())
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['ppp']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]
                def_data = None
                off_data = None
                poss_data = None
                tar_data = None

            elif x_vals in ['target', 'defensive_stats', 'offensive_stats']:
                x_data = pull_data.pull_model_features(
                    y_val, x_vals, update_dbs.mongodb_client)
                x_data = hfa_patch(x_data, update_dbs.mysql_client())
                train_index = pull_data.pull_train_index(
                    update_dbs.mysql_client())
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['ppp']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]

            for model_name, model_details in saved_models.stored_models[y_val][
                    x_vals].items():
                if not os.path.isfile(
                        os.path.join(
                            model_storage, '%s_%s_%s_model.pkl' %
                            (y_val, x_vals, model_name))):
                    print('Loading %s Values' % (model_name))

                    model = model_details['model']
                    scale = model_details['scale']

                    scale.fit(x_data[model_details['features']])
                    joblib.dump(
                        scale,
                        os.path.join(
                            model_storage, '%s_%s_%s_scaler.pkl' %
                            (y_val, x_vals, model_name)))

                    model.fit(
                        scale.transform(x_data[model_details['features']]),
                        np.ravel(y_data))
                    joblib.dump(
                        model,
                        os.path.join(
                            model_storage, '%s_%s_%s_model.pkl' %
                            (y_val, x_vals, model_name)))

                    print('Stored %s' % (model_name))