Пример #1
0
def set_up_sch_game_dicts(DATA_DIR):
    """
    Returns a dictionary of game dictionaries
    The dictionary keys look like: 'ma-01-01-1998_sch_Milford_1'
    The dict of dicts looks like:
    all_games_dict = {'ma-01-01-1998_sch_Milford_1':{'POS-1': ['Ara'], 'POS-2': ['Douglas'], 'POS-3': ['Liam'], 'POS-4': ['Tino'], 'S/D': 'Doubles', 'POS-6': ['Aitor'], 'POS-7': ['Zarandona'], 'POS-8': ['Eggy'], 'GAME': 1, 'DATE': '01/01/1998', 'POS-5': ['Aja'], 'POS-6-ID': [52], 'POS-SUB': ['Altuna'], 'POINTS': 7, 'DAY': 'Thursday', 'GAME-COUNT': 15, 'POS-4-ID': [17], 'POS-8-ID':[2], 'POS-SUB-ID':[15], 'POS-3-ID': [9], 'POS-7-ID': [38], 'FRONTON': 'Milford', 'POS-5-ID': [13], 'ABSOLUTE-DATE': 35795, 'POS-2-ID': [42], 'POS-1-ID':[4]},'ma-01-01-1998_res_Milford_2':{'POS-1': ['Ara'], 'POS-2': ['Douglas'], 'POS-3': ['Liam'], 'POS-4': ['Tino'], 'S/D': 'Doubles', 'POS-6': ['Aitor'], 'POS-7': ['Zarandona'], 'POS-8': ['Eggy'], 'GAME': 1, 'DATE': '01/01/1998', 'POS-5': ['Aja'], 'POS-6-ID': [52], 'POS-SUB': ['Altuna'], 'POINTS': 7, 'DAY': 'Thursday', 'GAME-COUNT': 15, 'POS-4-ID': [17], 'POS-8-ID':[2], 'POS-SUB-ID':[15], 'POS-3-ID': [9], 'POS-7-ID': [38], 'FRONTON': 'Milford', 'POS-5-ID': [13], 'ABSOLUTE-DATE': 35795, 'POS-2-ID': [42], 'POS-1-ID':[4]}}
    """
    all_games_dict = {}
    fh.set_up_paths(DATA_DIR)
    # Get a list of paths to training files
    scheduled_file_list = fh.get_scheduled_file_paths()
    #print scheduled_file_list
    # for each training file path
    for scheduled_file_path in scheduled_file_list:
#        #print event_file_path
#        # open the file and read into a list
        schedule_list = fh.file_to_list(scheduled_file_path)
        #print 'schedule_list : ' + str(schedule_list)
##        print
        game_event_string_list = ut.format_file_list_to_game_string(schedule_list)
        #print game_event_string_list
        for game_event_string in game_event_string_list:
            temp_dict_name, temp_dict = dp.convert_str_to_dict(game_event_string, scheduled_file_path)
            all_games_dict[temp_dict_name] = temp_dict
    #print all_games_dict
    #for dict_name in all_games_dict.keys():
        #print dict_name
    ####### REMOVE GAMES WITH KNOWN SUBSTITUTIONS

        #if get_player_game_result(gamedict_res, player)
            
            
    ############ -------------#############        
    return all_games_dict
Пример #2
0
def set_up_res_games_dicts(DATA_DIR, sch_games_dicts):
    """
    
    """
    fin_all_results_games_dict = {}
    all_results_games_dict = {}
    fh.set_up_paths(DATA_DIR)
    # Get a list of paths to training files
    results_file_list = fh.get_results_file_paths()
    #print scheduled_file_list
    # for each training file path
    for results_file_path in results_file_list:
#        #print event_file_path
#        # open the file and read into a list
        results_list = fh.file_to_list(results_file_path)
        #print 'schedule_list : ' + str(schedule_list)
##        print
        game_event_string_list = ut.format_file_list_to_game_string(results_list)
        #print game_event_string_list
        for game_event_string in game_event_string_list:
            temp_dict_name, temp_dict = dp.convert_str_to_dict(game_event_string, results_file_path)
            all_results_games_dict[temp_dict_name] = temp_dict
    #print all_games_dict
#    for dict_name,v in all_results_games_dict.items():
#        print dict_name,v
    # ENSURE THAT ANY RESULTS FILES DO RELATE TO A SCHEDULE FILE
    for dict_name, dict_data in sch_games_dicts.items():
        test_name = dict_name.replace("sch", "res")
        if all_results_games_dict.get(test_name) != None:
            fin_all_results_games_dict[test_name] = all_results_games_dict[test_name]
    
#    for dict_name,v in fin_all_results_games_dict.items():
#        print dict_name,v        
        
    return fin_all_results_games_dict
Пример #3
0
def get_roster_list():
    """
    Tested
    Accepts a top level data directory
    If the roster list does not exist for the top level data directory
    it is created, otherwise it is just returned from a file location
    Returns a list of lists with each player, and 2 player team in a list :
    [['player1'],['player2','player3'], ....]    
    """
    fh.set_up_paths(DATA_DIR)
    roster_list = []
    temp_list = []
    fin_list = []
    if fh.file_exists(fh.roster_list_file):
        roster_list = fh.file_to_list(fh.roster_list_file)
        for l in roster_list:
            #print l
            temp_list.append(l)
        for ll in temp_list:
#            print 'll : '+ str(ll),
#            print ll.split(':')
            roster_list.append(ll.split(':'))
        ret_roster_list = dp.get_names_from_list(roster_list)
        for name in ret_roster_list:
            if name not in roster_list:
                fin_list.append(name)
    else:
        fin_list = create_roster_list()
        fh.write_list_of_lists_file(fin_list, fh.roster_list_file)
#    print fin_list
    return fin_list    
Пример #4
0
def run_data_setup():
    """
    Generates all the dataSummary files based on the training folder content
    Uses DATA_DIR set a the top of the module
    """
    # Copy the name_excludes and the post_position_points factors from constant_data_summaries
    fh.set_up_paths(DATA_DIR)
    get_roster_list()
    get_player_season_records()
    get_pwin_dict()
Пример #5
0
def get_player_season_records():
    """
    If the player season record does not exist for the top level data directory
    it is created, otherwise it is just returned from a file location
    Returns a dictionary of dictionaries for the player season record in the form:
    {eggy:richard [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 3]]} called all_player_season_records
    """
    fh.set_up_paths(DATA_DIR)
    all_player_season_records = {}
    if fh.file_exists(fh.player_season_records_file_path):
        all_player_season_records = fh.read_dict(fh.player_season_records_file_path)
    else:
        all_player_season_records = create_player_season_records()
        fh.write_dict(fh.player_season_records_file_path,all_player_season_records)
#    print fin_list
    return all_player_season_records  
Пример #6
0
def create_pwin_dict():
    """
    
    """
    # May eliminate this afterwards
    fh.set_up_paths(DATA_DIR)
    pwin_dict = {}
#    p_win = 0
#    sum_points_won = 0
#    sum_points_played = 0
#    post_position_points_factors = [[[4.631,5.911],[3.332,5.274],[2.730,5.263],[1.128,3.899]],[[4.616,5.894],[3.345,5.308],[2.721,5.258],[1.153,3.914]],[[4.623,5.824],[3.400,5.185],[2.908,5.287],[1.190,3.882]],[[4.618,5.681],[3.395,5.065],[2.970,5.111],[1.162,3.638]],[[4.647,5.640],[3.571,5.137],[2.972,4.890],[1.147,3.412]],[[4.542,5.315],[3.484,4.967],[2.992,4.781],[1.068,3.086]],[[4.561,5.267],[3.604,5.003],[3.137,4.848],[0.963,2.973]],[[4.000,4.476],[2.960,4.226],[2.744,4.327],[0.792,2.438]]]
    post_position_points_factors = fh.get_point_factors()
#    player_season_record = [[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1]]
    #player_season_record = [[1,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,1]]
    p_s_r = get_player_season_records()
    for player, player_season_record in p_s_r.items():
        p_win = 0
        sum_points_won = 0
        sum_points_played = 0
#        print player, player_season_record 
        for post_pos, post_pos_results in enumerate(player_season_record):
#            print post_pos, post_pos_results
            for WPSL_pos, freq in enumerate(post_pos_results):
#                print WPSL_pos, freq
#                print post_pos, post_position_points_factors[post_pos][WPSL_pos][1]
                sum_points_won += freq * post_position_points_factors[post_pos][WPSL_pos][0]
                sum_points_played += freq * post_position_points_factors[post_pos][WPSL_pos][1]
#        print sum_points_won
#        print sum_points_played
        if sum_points_played > PWIN_DICT_POINTS_PLAYED_THRESHOLD:
            p_win = sum_points_won/sum_points_played
            pwin_dict[player] = p_win
#        else:
#            p_win = None
#        if p_pwin_dict
    #print pwin_dict
    return pwin_dict
Пример #7
0
def run_correlations():
    start_time = time.time()
    dir_list = []
    for i in range(1,15):
        for j in range(1,5):
            dir_list.append(('correl_' + str(i) + 'm', 'correl_' + str(j)+ 'w'))
    #print dir_list
    dir_list = [('correl_1m', 'correl_1w'),('correl_6m', 'correl_1w'),('correl_14m', 'correl_1w')]
    dir_list = [('correl_1m', 'correl_1w'),('correl_1m', 'correl_2w')]    
    thresholds = [20,40,60,80,100,120,140,160,180,200,220,240, 260, 280, 300, 350, 400, 500]
    thresholds = [100, 500]
    low_threshold = 20
    results_dir = {}


    for dir_pair in dir_list:
        for threshold in thresholds:
            rds.DATA_DIR = dir_pair[0]
            fh.set_up_paths(rds.DATA_DIR)
            clear_dir(fh.data_summaries_path)
            rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = threshold
            pwin_dict_1 = rds.get_pwin_dict()
            
            rds.DATA_DIR = dir_pair[1]
            fh.set_up_paths(rds.DATA_DIR)    
            clear_dir(fh.data_summaries_path)            
            rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = low_threshold
            pwin_dict_2 = rds.get_pwin_dict()
            pwin_dict_3 = dict((pwin_dict_1[key], pwin_dict_2[key]) for key in pwin_dict_1 if key in pwin_dict_2)
        
            #print pwin_dict_3
            
            x = np.array(pwin_dict_3.keys())
            y = np.array(pwin_dict_3.values())
#            correlation1 = np.corrcoef(x,y)
#            print correlation1
#            print pearson_def(x, y)
#            print pearsonr(x, y)
            if len(x) < 5:
                res = (0, 0)
            else:
                res = pearsonr(x, y)
            num = len(x)
            results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)] = [res[0], res[1], num, threshold, dir_pair[0].split('_')[1], dir_pair[1].split('_')[1]]
            #print [str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)], results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)]
            #print
            
#    for k,v in results_dir.items():
#        print k, v
    df = pd.DataFrame(results_dir)
    df = df.transpose()
    #curr_path = os.path.dirname(os.path.realpath(__file__))
    curr_path = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0]
    fh.write_dict(os.path.join(curr_path, 'jaialai','analysis','corr_result.txt'),results_dir)
    #print df
    df.columns = ['corr', 'p value', 'number players', 'threshold', 'before', 'after']
    df.to_excel(os.path.join(curr_path, 'jaialai','analysis','corr_result.xls'))
        


    print "To complete %.0f correlations took %.2f minutes " %(len(results_dir),(time.time() - start_time)/60) 
Пример #8
0
def trial_calculate_quadratic_loss_function():
    fh.set_up_paths(DATA_DIR)
    calculate_quadratic_loss_function()
Пример #9
0
def run_experiment():
    """

    """
    start_time = time.time()
    replicates = 5
    #gamma_range = [0.2,0.4,0.6,0.8,1.0,1.2]
    gamma_range = [0.6]
    serve_adv_range = [-0.05]    
    n = 100000
    WPS_THRESHOLD = 0.3
    TRIFECTA_THRESHOLD = 1
    QUINIELA_THRESHOLD = 1
    EXACTA_THRESHOLD = 1
    rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = 140
    
    
    start_time = time.time()
    #ilf = 0
    experiment_dict = {}
    logging.info('starting at %s', (time.time() - start_time)/60)    
    fh.set_up_paths(DATA_DIR)
       


    
    rds.DATA_DIR = DATA_DIR

    logging.info('run_data_setup() at %s', (time.time() - start_time)/60)    
    for replicate in range(1,replicates+1):
        logging.info('Replicate : %s at %s', replicate, (time.time() - start_time)/60)
        for gamma in gamma_range:
            logging.info('gamma : %s at %s', gamma, (time.time() - start_time)/60)    
            psp.gamma = gamma
            for serve_adv in serve_adv_range:
                if fh.file_exists(fh.prediction_actual_results_dict_file_path):
                    os.remove(fh.prediction_actual_results_dict_file_path)
#                if fh.file_exists(fh.pwin_dict_file_path):
#                    os.remove(fh.pwin_dict_file_path) 
                logging.info('removing files at %s', (time.time() - start_time)/60) 
                rds.run_data_setup()
                logging.info('run_data_setup at %s', (time.time() - start_time)/60)                 
                
                
                logging.info('serve adv : %s at %s', serve_adv, (time.time() - start_time)/60)                
                run_dict = {}
                psp.doublesServerAdvantage = serve_adv
                sch_game_dicts = set_up_sch_game_dicts(DATA_DIR)
                logging.info('set_up_sch_game_dicts complete at %s', (time.time() - start_time)/60) 
                res_game_dicts = set_up_res_games_dicts(DATA_DIR, sch_game_dicts)
                
                #sch_game_dicts, res_game_dicts = remove_know_sub_games(sch_game_dicts, res_game_dicts)
    
    
                wps_prob_table, prediction_dict = sim_scheduled_games(sch_game_dicts, n)
                logging.info('serve adv : sim_scheduled_games complete at %s',(time.time() - start_time)/60)                 
                bet_table = BetTable(wps_prob_table)
                bet_table.WPS_THRESHOLD = WPS_THRESHOLD
                bet_table.EXACTA_THRESHOLD = EXACTA_THRESHOLD
                bet_table.QUINIELA_THRESHOLD = QUINIELA_THRESHOLD
                bet_table.TRIFECTA_THRESHOLD = TRIFECTA_THRESHOLD
                bet_table.create_bet_list(wps_prob_table)
                logging.info('serve adv : bet_table.create_bet_list complete at %s',(time.time() - start_time)/60)               
                
                get_pos_freq_actuals(prediction_dict, res_game_dicts) 
                logging.info('get_pos_freq_actuals complete at %s',(time.time() - start_time)/60)    
                
                my_stakes, my_gross_return, my_return = bet_table.get_returns(prediction_dict, res_game_dicts)
    
                #qlf,ilf = calculate_quadratic_loss_function()
                #ilf = calculate_ilf(n)
                run_dict['rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD'] = rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD
                run_dict['rds.DATA_DIR'] = rds.DATA_DIR
                run_dict['DATA_DIR'] = DATA_DIR
                run_dict['n'] = n   
                run_dict['psp.doublesServerAdvantage'] = psp.doublesServerAdvantage
                run_dict['psp.gamma'] = psp.gamma
                #run_dict['ilf'] = ilf
                #run_dict['qlf'] = qlf
                run_dict['my_stakes'] = my_stakes ; run_dict['my_gross_return'] = my_gross_return
                run_dict['replicate'] = replicate
                run_dict['my_return'] = my_return
                run_dict['WPS_THRESHOLD'] = bet_table.WPS_THRESHOLD
                run_dict['EXACTA_THRESHOLD'] = bet_table.EXACTA_THRESHOLD
                run_dict['QUINIELA_THRESHOLD'] = bet_table.QUINIELA_THRESHOLD
                run_dict['TRIFECTA_THRESHOLD'] = bet_table.TRIFECTA_THRESHOLD
                experiment_dict[str(psp.gamma) + '_' + str(psp.doublesServerAdvantage)+ '_' + str(replicate)] = run_dict
                print
                for k,v in run_dict.items():
                    print k, v
                print 'out ', run_dict['my_stakes']
                print 'in ' , run_dict['my_gross_return']
                print 'NET : ' , run_dict['my_return']
                logging.info('instance completed at %s', (time.time() - start_time)/60)             
    fh.write_dict(fh.experiment_results_file_path,experiment_dict)
#    for k, v in experiment_dict.items():
##        print
#        print k,v['psp.gamma'],v['psp.doublesServerAdvantage'],v['qlf']
    num_trials = len(experiment_dict.keys())
#    print
    print "To complete %.0f replicates of %.0f runs took %.2f minutes at n = %.0f over %.0f games" %(replicates, num_trials,(time.time() - start_time)/60,n, len(prediction_dict))