Exemplo n.º 1
0
def sim_scheduled_games(sch_game_dicts, n):
    """
    
    """
#    print sch_game_dicts.keys()
#    print
    prediction_dict = {}
    CandP = CombosAndPermuts()
    trifs = CandP.get_trifectas()
    str_trifs = [str(x[0]) + str(x[1]) + str(x[2]) for x in trifs]
#[x for x in range(1, self.numPlayers +1)]
    win = [x[0] for x in trifs] ; place = [x[1] for x in trifs] ; show = [x[2] for x in trifs]
    wpsTable = pd.DataFrame(data = {'First':win,'Second':place, 'Third':show}, index = str_trifs)
    #print wpsTable.head()
    #print wpsTable.columns.values.tolist()
   

    for game_name,game_data in sch_game_dicts.items():

        pwin_dict = rds.get_pwin_dict()

        point_percentage_data = None
        player_list = []
        enh_player_list = []
        #print game
#        for el in game:
#            print el
        enh_player_list = get_enhanced_player_list(game_data)

        #print game_name, enh_player_list
        if enh_player_list != None:
            #print
            #print game_name
            for player_dict in enh_player_list:
                
                player_list.append(':'.join(ut.sorted_and_lowered(player_dict['name'])))

#            point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, n)

            point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, pwin_dict, n)
            


            prediction_dict[game_name] = point_percentage_data
    pred_dict_df = pd.DataFrame.from_dict(prediction_dict)
    #print pred_dict_df.head()
    full_df = pd.merge(wpsTable, pred_dict_df, left_index = True, right_index = True )
    #print full_df.head()

    #print prediction_dict
    fh.write_dict(fh.prediction_dict_file_path,prediction_dict)
    for game_name in sch_game_dicts.keys():
        try:
#        if game_name in full_df.colnames(full_df):
            full_df[game_name] = full_df[game_name]/n
        except:
            pass

    #print full_df.columns.values.tolist()   
    return full_df, prediction_dict
Exemplo n.º 2
0
def get_pos_freq_actuals(prediction_dict, res_games_dict):
    # To create the results vector - copy the prediction vector and set everything to 0
    if fh.file_exists(fh.prediction_actual_results_dict_file_path) == False:    
    
        res_freq_dict = prediction_dict.copy()
        #print prediction_dict
        for k,v in res_freq_dict.items():
            for k1 in v.keys():
                #print k1, v[k1]
                v[k1] = 0
        #print res_freq_dict
        for game_name, game_data in prediction_dict.items():
            #game_player_list = [game_data[x].keys() for x in range(len(game_data))]
            res_dict = res_games_dict.get(game_name.replace("sch", "res"))
            #print res_dict
            if res_dict != None:
                top_1 = res_dict['WIN'][0]
                top_2 = res_dict['PLACE'][0]
                top_3 = res_dict['SHOW'][0]
                top_123 = str(top_1) + str(top_2) + str(top_3)
                res_freq_dict[game_name][top_123] += 1
                #res_dict[top_123] += 1
        #print res_freq_dict
        fh.write_dict(fh.prediction_actual_results_dict_file_path,res_freq_dict)
        return res_freq_dict
    else:
        pass
Exemplo n.º 3
0
def simulate_scheduled_games(sch_game_dicts, ji, n = 12):
    """
    
    """

    prediction_dict = {}
    # Setr the variable here?
    #PointSetPlayer.gamma = 0.4
#    game_dict = {}
    #print sch_game_dicts.keys()
    for game_name,game_data in sch_game_dicts.items():

        point_percentage_data = None
        player_list = []
        enh_player_list = []
        #print game
#        for el in game:
#            print el
        enh_player_list = get_enhanced_player_list(game_data)

        #print enh_player_list
        if enh_player_list != None:
            #print
            #print game_name
            for player_dict in enh_player_list:
                
                player_list.append(':'.join(ut.sorted_and_lowered(player_dict['name'])))
    #        for team in player_list:
    #            team_name = ':'.join(ut.sorted_and_lowered(player))
            #print player_list
            # run the game
            point_percentage_data = sc.runGame_get_WPSL_percentages(player_list, n)
#            for player,position_totals in sorted(point_percentage_data.items()):   
#                print str(player) + ': ' + str(position_totals)
#            for p in player_list:
#                print point_percentage_data[p]
#            for d in point_percentage_data:
#                print d
            prediction_dict[game_name] = point_percentage_data
            #print point_percentage_data
    # Find a way to sort this dictionary in starting post order !!!!!!
    #fh.write_dict(os.path.join(fh.prediction_dict_file_path, str(n)),prediction_dict)
    #print os.path.join(fh.prediction_dict_file_path, str(j))
    ji += 1
#    path, filename = os.path.split(fh.prediction_dict_file_path)
#    filename = os.path.splitext(filename)[0]
#    newfilename = 'ok_%s_%s.txt' % (filename, str(j))
#    newpath = os.path.join(path, newfilename)
#    fh.write_dict(newpath,prediction_dict)
    fh.write_dict(fh.prediction_dict_file_path,prediction_dict)
    return prediction_dict
Exemplo n.º 4
0
def get_player_season_records():
    """
    If the player season record does not exist for the top level data directory
    it is created, otherwise it is just returned from a file location
    Returns a dictionary of dictionaries for the player season record in the form:
    {eggy:richard [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 3]]} called all_player_season_records
    """
    fh.set_up_paths(DATA_DIR)
    all_player_season_records = {}
    if fh.file_exists(fh.player_season_records_file_path):
        all_player_season_records = fh.read_dict(fh.player_season_records_file_path)
    else:
        all_player_season_records = create_player_season_records()
        fh.write_dict(fh.player_season_records_file_path,all_player_season_records)
#    print fin_list
    return all_player_season_records  
Exemplo n.º 5
0
def get_prediction_actual_results_dict(prediction_dict, res_game_dicts):
    """
    This looks the similar to the prediction dict 
    ma-01-01-1996_res_Milford_14
[{'aragues:lasa': [0, 1, 0, 0]}, {'azpiri:zabala': [1, 0, 0, 0]}, {'acin:beitia': [0, 0, 0, 0]}................. for all players in the game]}]
    
    """
    actual_results_dict = {}
    # Get corresponding results file
        
    for game_name, game_data in prediction_dict.items():
        game_player_list = [game_data[x].keys() for x in range(len(game_data))] # game_data

        res_dict = res_game_dicts.get(game_name.replace("sch", "res"))

        if res_dict != None:
#            print
#            print game_name
#            print res_dict
            #print
            actual_results_dict_data = []
            #game_data_dict = {}
            for p in game_player_list:
                p_dict = {}
                #print game_name
                p = p[0].split(":")
                #print p, dp.get_player_game_result(res_dict, p)
                #p_dict[p[0]] = dp.get_player_game_result(res_dict, p)
                p_dict[':'.join(ut.sorted_and_lowered(p))] = dp.get_player_game_result(res_dict, p)
                #print p_dict[p[0]]
                p = ':'.join(ut.sorted_and_lowered(p))
                #print p
                actual_results_dict_data.append(p_dict)
            actual_results_dict[game_name] = actual_results_dict_data
            #print game_player_list
#    for k,v in actual_results_dict.items():
#        print
#        print k,v
#    print
#    print
#    for k, v in prediction_dict.items():
#        print
#        print k,v
    fh.write_dict(fh.prediction_actual_results_dict_file_path,actual_results_dict)
    return actual_results_dict
Exemplo n.º 6
0
def simulate_scheduled_games_freq(sch_game_dicts, n = 12):
    """
    
    """

    prediction_dict = {}

    for game_name,game_data in sch_game_dicts.items():

        pwin_dict = rds.get_pwin_dict()

        point_percentage_data = None
        player_list = []
        enh_player_list = []
        #print game
#        for el in game:
#            print el
        enh_player_list = get_enhanced_player_list(game_data)

        #print enh_player_list
        if enh_player_list != None:
            #print
            #print game_name
            for player_dict in enh_player_list:
                
                player_list.append(':'.join(ut.sorted_and_lowered(player_dict['name'])))

#            point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, n)

            point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, pwin_dict, n)


            prediction_dict[game_name] = point_percentage_data

    #print prediction_dict
    fh.write_dict(fh.prediction_dict_file_path,prediction_dict)
    return prediction_dict
Exemplo n.º 7
0
def run_correlations():
    start_time = time.time()
    dir_list = []
    for i in range(1,15):
        for j in range(1,5):
            dir_list.append(('correl_' + str(i) + 'm', 'correl_' + str(j)+ 'w'))
    #print dir_list
    dir_list = [('correl_1m', 'correl_1w'),('correl_6m', 'correl_1w'),('correl_14m', 'correl_1w')]
    dir_list = [('correl_1m', 'correl_1w'),('correl_1m', 'correl_2w')]    
    thresholds = [20,40,60,80,100,120,140,160,180,200,220,240, 260, 280, 300, 350, 400, 500]
    thresholds = [100, 500]
    low_threshold = 20
    results_dir = {}


    for dir_pair in dir_list:
        for threshold in thresholds:
            rds.DATA_DIR = dir_pair[0]
            fh.set_up_paths(rds.DATA_DIR)
            clear_dir(fh.data_summaries_path)
            rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = threshold
            pwin_dict_1 = rds.get_pwin_dict()
            
            rds.DATA_DIR = dir_pair[1]
            fh.set_up_paths(rds.DATA_DIR)    
            clear_dir(fh.data_summaries_path)            
            rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = low_threshold
            pwin_dict_2 = rds.get_pwin_dict()
            pwin_dict_3 = dict((pwin_dict_1[key], pwin_dict_2[key]) for key in pwin_dict_1 if key in pwin_dict_2)
        
            #print pwin_dict_3
            
            x = np.array(pwin_dict_3.keys())
            y = np.array(pwin_dict_3.values())
#            correlation1 = np.corrcoef(x,y)
#            print correlation1
#            print pearson_def(x, y)
#            print pearsonr(x, y)
            if len(x) < 5:
                res = (0, 0)
            else:
                res = pearsonr(x, y)
            num = len(x)
            results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)] = [res[0], res[1], num, threshold, dir_pair[0].split('_')[1], dir_pair[1].split('_')[1]]
            #print [str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)], results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)]
            #print
            
#    for k,v in results_dir.items():
#        print k, v
    df = pd.DataFrame(results_dir)
    df = df.transpose()
    #curr_path = os.path.dirname(os.path.realpath(__file__))
    curr_path = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0]
    fh.write_dict(os.path.join(curr_path, 'jaialai','analysis','corr_result.txt'),results_dir)
    #print df
    df.columns = ['corr', 'p value', 'number players', 'threshold', 'before', 'after']
    df.to_excel(os.path.join(curr_path, 'jaialai','analysis','corr_result.xls'))
        


    print "To complete %.0f correlations took %.2f minutes " %(len(results_dir),(time.time() - start_time)/60) 
Exemplo n.º 8
0
def run_experiment():
    """

    """
    start_time = time.time()
    replicates = 5
    #gamma_range = [0.2,0.4,0.6,0.8,1.0,1.2]
    gamma_range = [0.6]
    serve_adv_range = [-0.05]    
    n = 100000
    WPS_THRESHOLD = 0.3
    TRIFECTA_THRESHOLD = 1
    QUINIELA_THRESHOLD = 1
    EXACTA_THRESHOLD = 1
    rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = 140
    
    
    start_time = time.time()
    #ilf = 0
    experiment_dict = {}
    logging.info('starting at %s', (time.time() - start_time)/60)    
    fh.set_up_paths(DATA_DIR)
       


    
    rds.DATA_DIR = DATA_DIR

    logging.info('run_data_setup() at %s', (time.time() - start_time)/60)    
    for replicate in range(1,replicates+1):
        logging.info('Replicate : %s at %s', replicate, (time.time() - start_time)/60)
        for gamma in gamma_range:
            logging.info('gamma : %s at %s', gamma, (time.time() - start_time)/60)    
            psp.gamma = gamma
            for serve_adv in serve_adv_range:
                if fh.file_exists(fh.prediction_actual_results_dict_file_path):
                    os.remove(fh.prediction_actual_results_dict_file_path)
#                if fh.file_exists(fh.pwin_dict_file_path):
#                    os.remove(fh.pwin_dict_file_path) 
                logging.info('removing files at %s', (time.time() - start_time)/60) 
                rds.run_data_setup()
                logging.info('run_data_setup at %s', (time.time() - start_time)/60)                 
                
                
                logging.info('serve adv : %s at %s', serve_adv, (time.time() - start_time)/60)                
                run_dict = {}
                psp.doublesServerAdvantage = serve_adv
                sch_game_dicts = set_up_sch_game_dicts(DATA_DIR)
                logging.info('set_up_sch_game_dicts complete at %s', (time.time() - start_time)/60) 
                res_game_dicts = set_up_res_games_dicts(DATA_DIR, sch_game_dicts)
                
                #sch_game_dicts, res_game_dicts = remove_know_sub_games(sch_game_dicts, res_game_dicts)
    
    
                wps_prob_table, prediction_dict = sim_scheduled_games(sch_game_dicts, n)
                logging.info('serve adv : sim_scheduled_games complete at %s',(time.time() - start_time)/60)                 
                bet_table = BetTable(wps_prob_table)
                bet_table.WPS_THRESHOLD = WPS_THRESHOLD
                bet_table.EXACTA_THRESHOLD = EXACTA_THRESHOLD
                bet_table.QUINIELA_THRESHOLD = QUINIELA_THRESHOLD
                bet_table.TRIFECTA_THRESHOLD = TRIFECTA_THRESHOLD
                bet_table.create_bet_list(wps_prob_table)
                logging.info('serve adv : bet_table.create_bet_list complete at %s',(time.time() - start_time)/60)               
                
                get_pos_freq_actuals(prediction_dict, res_game_dicts) 
                logging.info('get_pos_freq_actuals complete at %s',(time.time() - start_time)/60)    
                
                my_stakes, my_gross_return, my_return = bet_table.get_returns(prediction_dict, res_game_dicts)
    
                #qlf,ilf = calculate_quadratic_loss_function()
                #ilf = calculate_ilf(n)
                run_dict['rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD'] = rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD
                run_dict['rds.DATA_DIR'] = rds.DATA_DIR
                run_dict['DATA_DIR'] = DATA_DIR
                run_dict['n'] = n   
                run_dict['psp.doublesServerAdvantage'] = psp.doublesServerAdvantage
                run_dict['psp.gamma'] = psp.gamma
                #run_dict['ilf'] = ilf
                #run_dict['qlf'] = qlf
                run_dict['my_stakes'] = my_stakes ; run_dict['my_gross_return'] = my_gross_return
                run_dict['replicate'] = replicate
                run_dict['my_return'] = my_return
                run_dict['WPS_THRESHOLD'] = bet_table.WPS_THRESHOLD
                run_dict['EXACTA_THRESHOLD'] = bet_table.EXACTA_THRESHOLD
                run_dict['QUINIELA_THRESHOLD'] = bet_table.QUINIELA_THRESHOLD
                run_dict['TRIFECTA_THRESHOLD'] = bet_table.TRIFECTA_THRESHOLD
                experiment_dict[str(psp.gamma) + '_' + str(psp.doublesServerAdvantage)+ '_' + str(replicate)] = run_dict
                print
                for k,v in run_dict.items():
                    print k, v
                print 'out ', run_dict['my_stakes']
                print 'in ' , run_dict['my_gross_return']
                print 'NET : ' , run_dict['my_return']
                logging.info('instance completed at %s', (time.time() - start_time)/60)             
    fh.write_dict(fh.experiment_results_file_path,experiment_dict)
#    for k, v in experiment_dict.items():
##        print
#        print k,v['psp.gamma'],v['psp.doublesServerAdvantage'],v['qlf']
    num_trials = len(experiment_dict.keys())
#    print
    print "To complete %.0f replicates of %.0f runs took %.2f minutes at n = %.0f over %.0f games" %(replicates, num_trials,(time.time() - start_time)/60,n, len(prediction_dict))