示例#1
0

for mod_num, (mod_name) in enumerate(os.listdir(final_model_folder)):
    progress(mod_num + 1, tot_mods, mod_name)  
    if mod_name == '.DS_Store':
        continue
    model_path = os.listdir(os.path.join(final_model_folder, mod_name))
    model = load(os.path.join(final_model_folder, mod_name, model_path[0]))
    model.fit(X0, Y0)
    model_preds = model.predict_proba(X2)
    model_preds = [i[1] for i in model_preds]
#    mod_preds.rename(columns = {0: mod_name}, inplace = True)
    pred_df[mod_name] = model_preds


vegas_preds = pg_query(PSQL.client, "SELECT * from ufc.winner_consensus_odds;")
vegas_preds.columns = ['bout_id', 'fighter_id', 'VEGAS']
vegas_preds.set_index(['bout_id', 'fighter_id'], inplace = True)
vegas_pred_df = pred_df.join(vegas_preds).dropna()


vegas_pred_df.to_csv(os.path.join(cur_path, 'data', 'pred_data_winner_ens_tuning.csv'))




train_data = pd.read_csv(os.path.join(cur_path, 'data', 'winner_data_validation.csv'))
train_data = train_data.sort_values('bout_id').set_index(['bout_id', 'fighter_id'])
X_train = train_data[[i for i in list(train_data) if i != 'winner']]
Y_train = train_data['winner'].apply(lambda x: x if x == 1 else 0)
示例#2
0
def update_avg_data():
    print('Updating Average Stats')

    stats = pull_stats()
    missing_bouts = pg_query(
        PSQL.client,
        "select b.bout_id from ufc.bouts b full join ufc.avg_stats avs on b.bout_id = avs.bout_id join ufc.fights f on f.fight_id = b.fight_id where avs.bout_id is NULL and date < NOW() + INTERVAL '1 day'"
    )
    fight_dates = pg_query(PSQL.client,
                           "SELECT fight_id, date from ufc.fights;")
    fight_dates = {k: v for k, v in fight_dates.values}
    tot_bouts = len(missing_bouts)
    for bout_num, (bout_id) in enumerate(missing_bouts.values):
        progress(bout_num + 1, tot_bouts)
        bout_info = pg_query(
            PSQL.client,
            "select bfx.bout_id, fighter_id, opponent_id, fight_id from ufc.bout_fighter_xref bfx join ufc.bouts b on bfx.bout_id = b.bout_id where bfx.bout_id = '%s';"
            % (bout_id[0]))
        if len(bout_info) != 2:
            continue
        bout_info.columns = [
            'bout_id', 'fighter_id', 'opponent_id', 'fight_id'
        ]
        for bout, fighter, opponent, fight in bout_info.values:
            fight_date = fight_dates[fight]
            f_stats = {}
            ostat = stats.loc[(stats['fighter_id'] == fighter)
                              & (stats['fight_date'] < fight_date)]
            ostat.sort_values('fight_date', inplace=True)
            dstat = stats.loc[(stats['opponent_id'] == fighter)
                              & (stats['fight_date'] < fight_date)]
            dstat.sort_values('fight_date', inplace=True)
            if len(dstat) != len(ostat):
                raise ValueError()
            if (dstat[['bout_id', 'fight_date']].values !=
                    ostat[['bout_id', 'fight_date']].values).any():
                raise ValueError()
            if len(ostat) == 0:
                continue
            f_stats[bout] = {'fight_date': fight_date, 'opponent_id': opponent}
            for col in cols:
                f_stats[bout]['avg_o_' + col] = ostat[col].mean()
            for col in cols:
                f_stats[bout]['avg_d_' + col] = dstat['d_' + col].mean()
            if len(f_stats.keys()) == 0:
                continue
            avg_data = pd.DataFrame.from_dict(f_stats).T

            for avg_d_bodyssa, avg_d_bodysss, avg_d_clinssa, avg_d_clinsss, avg_d_disssa,\
             avg_d_dissss,avg_d_gndssa, avg_d_gndsss,avg_d_headssa,avg_d_headsss,avg_d_kd,avg_d_legssa,avg_d_legsss,\
             avg_d_pas,avg_d_rev,avg_d_ssa,avg_d_sss,avg_d_sub,avg_d_tda,avg_d_tds,\
             avg_d_tsa, avg_d_tss, avg_o_bodyssa, avg_o_bodysss, avg_o_clinssa, avg_o_clinsss, avg_o_disssa,\
             avg_o_dissss,avg_o_gndssa,avg_o_gndsss,avg_o_headssa,avg_o_headsss,avg_o_kd,avg_o_legssa,avg_o_legsss,\
             avg_o_pas,avg_o_rev,avg_o_ssa,avg_o_sss,avg_o_sub,avg_o_tda,avg_o_tds,avg_o_tsa,avg_o_tss,\
             fight_date,opponent_id in avg_data.values:

                script = "INSERT INTO ufc.avg_stats(\
                	fighter_id, bout_id, avg_o_kd, avg_o_ssa, avg_o_sss, avg_o_tsa, avg_o_tss, avg_o_sub, avg_o_pas, avg_o_rev,\
                    avg_o_headssa, avg_o_headsss, avg_o_bodyssa, avg_o_bodysss, avg_o_legssa, avg_o_legsss, avg_o_disssa, avg_o_dissss,\
                    avg_o_clinssa, avg_o_clinsss, avg_o_gndssa, avg_o_gndsss, avg_o_tda, avg_o_tds, avg_d_kd, avg_d_ssa, avg_d_sss,\
                    avg_d_tsa, avg_d_tss, avg_d_sub, avg_d_pas, avg_d_rev, avg_d_headssa, avg_d_headsss, avg_d_bodyssa, avg_d_bodysss,\
                    avg_d_legssa, avg_d_legsss, avg_d_disssa, avg_d_dissss, avg_d_clinssa, avg_d_clinsss, avg_d_gndssa, avg_d_gndsss, avg_d_tda, avg_d_tds)\
                	VALUES ('%s', '%s', %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f,\
                    %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f,\
                    %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f);"                                                                                  % (fighter, bout, avg_d_bodyssa, avg_d_bodysss, \
                    avg_d_clinssa, avg_d_clinsss, avg_d_disssa, avg_d_dissss,avg_d_gndssa, avg_d_gndsss,avg_d_headssa,avg_d_headsss,\
                    avg_d_kd,avg_d_legssa,avg_d_legsss, avg_d_pas,avg_d_rev,avg_d_ssa,avg_d_sss,avg_d_sub,avg_d_tda,avg_d_tds, avg_d_tsa, avg_d_tss, \
                    avg_o_bodyssa, avg_o_bodysss, avg_o_clinssa, avg_o_clinsss, avg_o_disssa, avg_o_dissss,avg_o_gndssa,avg_o_gndsss,\
                    avg_o_headssa,avg_o_headsss,avg_o_kd,avg_o_legssa,avg_o_legsss, avg_o_pas,avg_o_rev,avg_o_ssa,avg_o_sss,\
                    avg_o_sub,avg_o_tda,avg_o_tds,avg_o_tsa,avg_o_tss)
                pg_insert(PSQL.client, script)
示例#3
0
def update_adj_data():
    print('Updating Adjusted Stats')
    stats = pull_stats()
    avg_data = pull_avg_data()
    missing_bouts = pg_query(
        PSQL.client,
        "select b.bout_id from ufc.bouts b full join ufc.adj_stats avs on b.bout_id = avs.bout_id join ufc.fights f on f.fight_id = b.fight_id where avs.bout_id is NULL and date < NOW() + INTERVAL '1 day'"
    )
    fight_dates = pg_query(PSQL.client,
                           "SELECT fight_id, date from ufc.fights;")
    fight_dates = {k: v for k, v in fight_dates.values}
    tot_bouts = len(missing_bouts)
    for bout_num, (bout_id) in enumerate(missing_bouts.values):
        #        if bout_id == 'bf719cf83cab229b':
        #            asdfasdf
        progress(bout_num + 1, tot_bouts)
        bout_info = pg_query(
            PSQL.client,
            "select bfx.bout_id, fighter_id, opponent_id, fight_id from ufc.bout_fighter_xref bfx join ufc.bouts b on bfx.bout_id = b.bout_id where bfx.bout_id = '%s';"
            % (bout_id[0]))
        if len(bout_info) != 2:
            continue
        bout_info.columns = [
            'bout_id', 'fighter_id', 'opponent_id', 'fight_id'
        ]
        for bout, fighter, opponent, fight in bout_info.values:
            fight_date = fight_dates[fight]
            f_stats = {}
            f_avgs = avg_data.loc[(avg_data['fighter_id'] == fighter)
                                  & (avg_data['fight_date'] == fight_date)]
            f_avgs.sort_values('fight_date', inplace=True)
            o_avgs = avg_data.loc[(avg_data['opponent_id'] == fighter)
                                  & (avg_data['fight_date'] == fight_date)]
            o_avgs.sort_values('fight_date', inplace=True)
            f_stats = stats.loc[(stats['fighter_id'] == fighter)
                                & (stats['fight_date'] == fight_date)]
            f_stats.sort_values('fight_date', inplace=True)
            common_bouts = set([
                j for j in [
                    i for i in f_avgs['bout_id'].values
                    if i in o_avgs['bout_id'].values
                ] if j in f_stats['bout_id'].values
            ])
            f_avgs = f_avgs.loc[f_avgs['bout_id'].apply(
                lambda x: True if x in common_bouts else False)].reset_index(
                    drop=True)
            o_avgs = o_avgs.loc[o_avgs['bout_id'].apply(
                lambda x: True if x in common_bouts else False)].reset_index(
                    drop=True)
            f_stats = f_stats.loc[f_stats['bout_id'].apply(
                lambda x: True if x in common_bouts else False)].reset_index(
                    drop=True)
            adj_stats = f_avgs[['bout_id', 'fight_date', 'opponent_id']]
            for col in cols:
                adj_stats['adj_d_' + col] = (
                    f_stats['d_' + col] / o_avgs['avg_o_' + col]
                ).apply(lambda x: x if x == x and x not in [np.inf, -np.inf]
                        else 1) * f_stats['d_' + col]
                adj_stats['adj_o_' +
                          col] = (f_stats[col] / o_avgs['avg_d_' + col]).apply(
                              lambda x: x if x == x and x not in
                              [np.inf, -np.inf] else 1) * f_stats[col]
            if len(adj_stats) == 0:
                continue
            for bout_id,fight_date,opponent_id, adj_d_bodyssa, adj_d_bodysss,\
                adj_d_clinssa,adj_d_clinsss,adj_d_disssa,adj_d_dissss,\
                adj_d_gndssa,adj_d_gndsss,adj_d_headssa,adj_d_headsss,adj_d_kd,adj_d_legssa,\
                adj_d_legsss,adj_d_pas,adj_d_rev,adj_d_ssa,adj_d_sss,adj_d_sub,adj_d_tda,adj_d_tds,\
                adj_d_tsa,adj_d_tss,adj_o_bodyssa,adj_o_bodysss,adj_o_clinssa,adj_o_clinsss,adj_o_disssa,\
                adj_o_dissss,adj_o_gndssa,adj_o_gndsss,adj_o_headssa,adj_o_headsss,adj_o_kd,adj_o_legssa,\
                adj_o_legsss,adj_o_pas,adj_o_rev,adj_o_ssa,adj_o_sss,adj_o_sub,adj_o_tda,adj_o_tds,\
                adj_o_tsa,adj_o_tss in adj_stats.values:

                script = "INSERT INTO ufc.adj_stats(fighter_id, bout_id, adj_d_bodyssa, adj_d_bodysss,\
                        adj_d_clinssa, adj_d_clinsss, adj_d_disssa, adj_d_dissss, adj_d_gndssa, adj_d_gndsss,\
                        adj_d_headssa, adj_d_headsss, adj_d_kd, adj_d_legssa, adj_d_legsss, adj_d_pas, \
                        adj_d_rev, adj_d_ssa, adj_d_sss, adj_d_sub, adj_d_tda, adj_d_tds, adj_d_tsa, adj_d_tss,\
                        adj_o_bodyssa, adj_o_bodysss, adj_o_clinssa, adj_o_clinsss, adj_o_disssa, adj_o_dissss, \
                        adj_o_gndssa, adj_o_gndsss, adj_o_headssa, adj_o_headsss, adj_o_kd, adj_o_legssa, \
                        adj_o_legsss, adj_o_pas, adj_o_rev, adj_o_ssa, adj_o_sss, adj_o_sub, adj_o_tda, \
                        adj_o_tds, adj_o_tsa, adj_o_tss) VALUES ('%s', '%s', %.5f, %.5f, %.5f, %.5f, %.5f, \
                        %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, \
                        %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, \
                        %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f);"                                                                                        % (fighter, \
                    bout_id, adj_d_bodyssa, adj_d_bodysss,adj_d_clinssa,adj_d_clinsss,adj_d_disssa,adj_d_dissss,\
                    adj_d_gndssa,adj_d_gndsss,adj_d_headssa,adj_d_headsss,adj_d_kd,adj_d_legssa,\
                    adj_d_legsss,adj_d_pas,adj_d_rev,adj_d_ssa,adj_d_sss,adj_d_sub,adj_d_tda,adj_d_tds,\
                    adj_d_tsa,adj_d_tss,adj_o_bodyssa,adj_o_bodysss,adj_o_clinssa,adj_o_clinsss,adj_o_disssa,\
                    adj_o_dissss,adj_o_gndssa,adj_o_gndsss,adj_o_headssa,adj_o_headsss,adj_o_kd,adj_o_legssa,\
                    adj_o_legsss,adj_o_pas,adj_o_rev,adj_o_ssa,adj_o_sss,adj_o_sub,adj_o_tda,adj_o_tds,\
                    adj_o_tsa,adj_o_tss)
                pg_insert(PSQL.client, script)
示例#4
0
import os, sys
try:  # if running in CLI
    cur_path = os.path.abspath(__file__)
except NameError:  # if running in IDE
    cur_path = os.getcwd()

while cur_path.split('/')[-1] != 'ufc':
    cur_path = os.path.abspath(os.path.join(cur_path, os.pardir))
sys.path.insert(1, os.path.join(cur_path, 'lib', 'python3.7', 'site-packages'))

from _connections import db_connection
from db.pop_psql import pg_drop, pg_query

PSQL = db_connection('psql')
bouts = pg_query(
    PSQL.client,
    "select b.bout_id from ufc.bouts b join ufc.fights f on f.fight_id = b.fight_id where date > '1-1-2019';"
)

'1218cba28d4dce94' in [i[0] for i in bouts.values]
for bout in bouts.values:
    pg_drop(
        PSQL.client,
        "DELETE from ufc.avg_stats avgs where avgs.bout_id = '%s';" %
        (bout[0]))
    pg_drop(
        PSQL.client,
        "DELETE from ufc.adj_stats avgs where avgs.bout_id = '%s';" %
        (bout[0]))
    pg_drop(
        PSQL.client,
        "DELETE from ufc.adj_avg_stats avgs where avgs.bout_id = '%s';" %