Exemplo n.º 1
0
def createBanditInstancesAndSimulate(params,n_mc_sim):
    n_sim = n_mc_sim
    for i in range(len(params)):
        results = []
        time_horizon = params[i]['time_horizon']
        number_of_arms = params[i]['number_of_arms']
        number_of_exploration_per_arm = params[i]['number_of_exploration_per_arm']

        exp_agent = ExploreThenExploit(time_horizon,number_of_arms,number_of_exploration_per_arm)
        epsilon_greedy_constant_half_epsilonAgent = EpsilonGreedy(time_horizon,number_of_arms,[1/2]*time_horizon)
        epsilon_greedy_constant_epsilonAgent = EpsilonGreedy(time_horizon,number_of_arms,[number_of_exploration_per_arm*number_of_arms/time_horizon]*time_horizon)
        ubc_agent = UBC1Agent(time_horizon,number_of_arms)
        se_agent = SuccessiveEliminationAgent(time_horizon,number_of_arms) 
        random_agent = Agent()
        
        bandit = Bandit(time_horizon,number_of_arms,random_agent)
        results.append(mc_simulate(n_sim,bandit))

        bandit = Bandit(time_horizon,number_of_arms,exp_agent)
        results.append(mc_simulate(n_sim,bandit))

        bandit = Bandit(time_horizon,number_of_arms,epsilon_greedy_constant_epsilonAgent)
        results.append(mc_simulate(n_sim,bandit,"constant-epsilon=rate-of-explore-exploit"))

        bandit = Bandit(time_horizon,number_of_arms,epsilon_greedy_constant_half_epsilonAgent)
        results.append(mc_simulate(n_sim,bandit,"constant-epsilon=0.5"))

        bandit = Bandit(time_horizon,number_of_arms,ubc_agent)
        results.append(mc_simulate(n_sim,bandit))

        bandit = Bandit(time_horizon,number_of_arms,se_agent)
        results.append(mc_simulate(n_sim,bandit))

        plot(results,time_horizon,params[i])
Exemplo n.º 2
0
def experiment2():
    params = [{
        "time_horizon" : 500,
        "number_of_arms" : 5
    },
    {
        "time_horizon" : 5000,
        "number_of_arms" : 5
    },
    {
        "time_horizon" : 500,
        "number_of_arms" : 10
    },
    {
        "time_horizon" : 5000,
        "number_of_arms" : 10
    },
    {
        "time_horizon" : 500,
        "number_of_arms" : 20
    },
    {
        "time_horizon" : 5000,
        "number_of_arms" : 20
    },
    ]
    
    for i in range(len(params)):
        results = []
        time_horizon = params[i]["time_horizon"]
        number_of_arms = params[i]["number_of_arms"]
        agent1 = agentFactory("random",time_horizon,number_of_arms)
        
        epsilons = []
        for j in range(time_horizon):
            epsilons.append(math.pow((j+1)*number_of_arms*math.log(j+1),1/3))
        
        agent2 = agentFactory("epsilon-greedy",time_horizon,number_of_arms,epsilons)
        agent3 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,time_horizon/100)
        agent4 = agentFactory("ucb1",time_horizon,number_of_arms)
        agent5 = agentFactory("successive-elimination",time_horizon,number_of_arms)

        bandit = Bandit(time_horizon,number_of_arms,agent1)
        results.append(mc_simulate(n_sim,bandit))
 
        bandit = Bandit(time_horizon,number_of_arms,agent2)
        results.append(mc_simulate(n_sim,bandit))
        
        bandit = Bandit(time_horizon,number_of_arms,agent3)
        results.append(mc_simulate(n_sim,bandit,"N=T/100"))

        bandit = Bandit(time_horizon,number_of_arms,agent4)
        results.append(mc_simulate(n_sim,bandit))
        
        bandit = Bandit(time_horizon,number_of_arms,agent5)
        results.append(mc_simulate(n_sim,bandit))

        plot(results,time_horizon,params[i])
Exemplo n.º 3
0
def experiment1():
    params = [{
        "time_horizon" : 1000,
        "number_of_arms" : 5
    },
    {
        "time_horizon" : 10000,
        "number_of_arms" : 5
    },
    {
        "time_horizon" : 1000,
        "number_of_arms" : 10
    },
    {
        "time_horizon" : 10000,
        "number_of_arms" : 10
    },
    {
        "time_horizon" : 1000,
        "number_of_arms" : 20
    },
    {
        "time_horizon" : 10000,
        "number_of_arms" : 20
    },
    ]
    
    for i in range(len(params)):
        results = []
        time_horizon = params[i]["time_horizon"]
        number_of_arms = params[i]["number_of_arms"]
        agent1 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,5)
        agent2 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,time_horizon/10)
        agent3 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,time_horizon/100)
        
        bandit = Bandit(time_horizon,number_of_arms,agent1)
        results.append(mc_simulate(n_sim,bandit,"N=5"))
 
        bandit = Bandit(time_horizon,number_of_arms,agent2)
        results.append(mc_simulate(n_sim,bandit,"N=T/10"))
        
        bandit = Bandit(time_horizon,number_of_arms,agent3)
        results.append(mc_simulate(n_sim,bandit,"N=T/100"))

        plot(results,time_horizon,params[i])
Exemplo n.º 4
0
def create_evaluation_report(
        results,
        k=5,
        setups=None,
        score='tra_score',
        show_plots=False,
        lower_is_better=True):

    drop = list(set(results.keys()).intersection(set(nonrelevant_keys)))
    results = results.drop(columns=drop)

    best_results = []
    # show average only if there is more than one sample
    if len(np.unique(results['sample'])) > 1:
        best_results.append(find_best_average(results, k=k, score=score,
            lower_is_better=lower_is_better))
    best_results.append(find_best(results, k=k, score=score,
        lower_is_better=lower_is_better))

    print("Evaluated setups: " + str(np.unique(results['setup'])))

    for best in best_results:

        samples = np.unique(best['sample'])
        configuration_keys = list(set(results.keys()) - set(metric_keys) - set(nonrelevant_keys))

        print("%d best configurations:"%k)
        for sample in samples:

            sample_best = best[best['sample']==sample]

            first_table_keys = [
                'sample',
                'setup',
                'iteration',
                'merge_function',
                'threshold',
                'seg_score',
                'tra_score']

            table_keys = first_table_keys + [
                key
                for key in results.keys()
                if key not in first_table_keys and key not in nonrelevant_keys
            ]
            table_frame = sample_best.loc[:,table_keys]
            if setups is not None and len(setups) > 0:
                table_frame = pandas.merge(table_frame, setups, how='left', on='setup')
            report.render_table(table_frame)

            if show_plots:
                groups = [
                    {
                        'sample': sample,
                    }
                ]
                figures = [
                    {'x_axis': 'threshold',  'y_axis': 'seg_score', 'title': 'SEG'},
                    {'x_axis': 'threshold',  'y_axis': 'tra_score', 'title': 'TRA'},
                ]
                configurations = [
                    # for all thresholds
                    dict(
                        {
                            c: curate_value(row[1][c]) for c in configuration_keys
                        },
                        **{'style':'line'}
                    )
                    for row in sample_best.iterrows()
                ] + [
                    # for best threshold only
                    {
                        c: curate_value(row[1][c]) for c in configuration_keys + ['threshold']
                    }
                    for row in sample_best.iterrows()
                ]
                report.plot(groups, figures, configurations, results.sort_values(by='threshold'))