def run_different_parameter(thread_id, gamma):
    figure_name = ('gamma' + str(gamma)).replace('.', 'dot')
    bandit_env = StochasticEnvironment(k, sigma_noise)
    policies = [EXP3('EXP3', k, gamma, 'black')]
    simulator = Simulator(bandit_env, policies, time_horizon)
    regret_dict = simulator.run(num_thread, num_mc)
    plot_regret(figure_name, policies, regret_dict)
def parameter_multiprocessing_run(thread_id, sigma_noise):
    figure_name = ('sigma_noise' + str(sigma_noise)).replace('.', 'dot')
    bandit_env = Environment(k, d, sigma_noise)
    policies = [LinUCB('LinUCB', k, d, lambda_, arm_norm_bound, theta_norm_bound, delta, sigma_noise, 'black')]
    simulator = Simulator(bandit_env, policies, time_horizon)
    regret_dict = simulator.multiprocessing_run(num_thread, num_mc)
    plot_regret(figure_name, policies, regret_dict, jupyter_notebook)
def solo_processing():
    # solo_processing
    option = 'solo_processing'
    sigma_noise = 0.1
    figure_name = ('ucb_convergence_verification_sigma' + str(sigma_noise)).replace('.', 'dot')
    policies = [UCB(sigma_noise, k, delta)]
    avg_regret = run_experiment(num_mc, option)
    plot_regret(figure_name, line_name, avg_regret, jupyter_notebook)
def multi_processing():
    global num_thread, sigma_noise, policies
    # multi_processing
    option = 'multi_processing'
    num_thread = 10
    sigma_noise = 0.1
    figure_name = ('ucb_convergence_verification_sigma' + str(sigma_noise)).replace('.', 'dot')
    policies = [UCB(sigma_noise, k, delta)]
    avg_regret = run_experiment(num_mc, option)
    plot_regret(figure_name, line_name, avg_regret, jupyter_notebook)
Пример #5
0
def run_model():
    data, true_labels = ldl.get_data_linear()
    true_buckets = [util.bucket(t) for t in true_labels]

    data = np.tile(data, (DATA_MULTIPLIER, 1))
    print("DATA SHAPE:", data.shape)
    true_buckets = np.tile(true_buckets, DATA_MULTIPLIER)

    # tuples of (batch_id, total regret, error while training, eval error, precision, recall)
    batch_results = []

    for T in range(NUM_BATCHES):
        model = Lin_UCB(ALPHA)
        #model = LASSO_BANDIT()
        if False:
            data, true_labels, columns_dict, values_dict = dl.get_data()
            true_buckets = [util.bucket(t) for t in true_labels]
        #model = Fixed_Dose(columns_dict, values_dict)
        #model = Warfarin_Clinical_Dose(columns_dict, values_dict)
        #model = Warfarin_Pharmacogenetic_Dose(columns_dict, values_dict)

        batch_id = str(random.randint(100000, 999999))
        print()
        print("Start Batch: ", batch_id)

        zipped_data = list(zip(data, true_buckets))
        random.shuffle(zipped_data)
        data, true_buckets = zip(*zipped_data)
        data = np.array(data)

        model.train(data, true_buckets)
        pred_buckets = model.evaluate(data)
        print(batch_id, "Performance on " + str(model))
        acc, precision, recall = util.evaluate_performance(
            pred_buckets, true_buckets)
        print("\tAccuracy:", acc)
        print("\tPrecision:", precision)
        print("\tRecall:", recall)

        plot_regret(model.regret, ALPHA, batch_id)
        plot_error_rate(model.error_rate, ALPHA, batch_id)

        batch_results.append(
            (batch_id, model.get_regret()[-1], model.get_error_rate()[-1],
             1 - acc, precision, recall))

        with open('batch/regret' + str(model) + batch_id, 'wb') as fp:
            pickle.dump(model.regret, fp)
        with open('batch/error' + str(model) + batch_id, 'wb') as fp:
            pickle.dump(model.error_rate, fp)

    return batch_results
Пример #6
0
            tmp = features.T.dot(self.A_inv[i]).dot(features)
            p[i] = self.theta[i].dot(features) + self.alpha * np.sqrt(tmp)

        choose_action = np.argmax(p)
        return choose_action


def test_lin_ucb_full(data, true_buckets, alpha=0.1):
    lin_ucb = Lin_UCB(alpha=alpha)
    lin_ucb.train(data, true_buckets)
    pred_buckets = lin_ucb.evaluate(data)
    acc, precision, recall = util.evaluate_performance(pred_buckets,
                                                       true_buckets)
    #print("accuracy on linear UCB: " + str(acc))


if __name__ == '__main__':
    data, true_labels = ldl.get_data_linear()
    true_buckets = [util.bucket(t) for t in true_labels]

    ALPHA = 0.1

    lin_ucb = Lin_UCB(alpha=ALPHA)
    lin_ucb.train(data, true_buckets)
    pred_buckets = lin_ucb.evaluate(data)
    acc, precision, recall = util.evaluate_performance(pred_buckets,
                                                       true_buckets)
    #print("accuracy on linear UCB: " + str(acc))
    plot_regret(lin_ucb.regret, ALPHA)
    plot_error_rate(lin_ucb.error_rate, ALPHA)