# obtain what used to be sample_vars
    noise_vars = numpy.array(
        [noise_and_cost_func(i + 1, pt)[0] for pt in init_pts[i]])
    kg_data.append_historical_data(IS_pts, vals, noise_vars)

    # find the best initial value
    if numpy.amin(init_vals[i]) < best_sampled_val:
        best_sampled_val = numpy.amin(init_vals[i])
        best_sampled_point = init_pts[i][numpy.argmin(init_vals[i]), :]
truth_at_best_sampled = obj_func_min.evaluate(truth_IS, best_sampled_point)

kg_cov = MixedSquareExponential(hyperparameters=kg_hyper_param,
                                total_dim=obj_func_max._dim + 1,
                                num_is=obj_func_max._num_IS)
kg_cov_cpp = cppMixedSquareExponential(hyperparameters=kg_hyper_param)
kg_gp_cpp = GaussianProcessNew(kg_cov_cpp, kg_data, obj_func_max._num_IS)
for kg_n in range(num_iterations):
    print "itr {0}, {1}".format(kg_n, benchmark_result_table_name)
    ### First discretize points and then only keep the good points idea
    discretization_points = search_domain.generate_uniform_random_points_in_domain(
        num_discretization_before_ranking)
    discretization_points = numpy.hstack((numpy.zeros(
        (num_discretization_before_ranking, 1)), discretization_points))
    all_mu = kg_gp_cpp.compute_mean_of_points(discretization_points)
    sorted_idx = numpy.argsort(all_mu)
    all_zero_x_prime = discretization_points[sorted_idx[-num_x_prime:], :]

    ### idea ends
    # all_zero_x_prime = numpy.hstack((numpy.zeros((num_x_prime,1)), search_domain.generate_uniform_random_points_in_domain(num_x_prime)))
Пример #2
0
# construct problem instance given CMD args
# format: run_mkg.py ${benchmark_name} ${repl_no} ${func_idx}
argv = sys.argv[1:]
if argv[0].find("kg") < 0:
    raise ValueError("benchmark is not mkg/kg!")
problem = identify_problem(argv, bucket)

# algorithm params
exploitation_threshold = 1e-5
num_x_prime = 3000
num_discretization_before_ranking = num_x_prime * 2
num_threads = 32
num_multistart = 32

# mkg begins
kg_cov_cpp = cppMixedSquareExponential(hyperparameters=problem.hyper_param)
kg_gp_cpp = GaussianProcessNew(kg_cov_cpp, problem.hist_data, num_IS_in=problem.num_is_in)
# data containers for pickle storage
list_best = []
list_cost = []
list_sampled_IS = []
list_sampled_points = []
list_sampled_vals = []
list_noise_var = []
list_mu_star_truth = []
list_raw_voi = []
init_best_idx = numpy.argmax(problem.hist_data._points_sampled_value[problem.hist_data._points_sampled[:, 0] == problem.truth_is])
best_sampled_val = -1.0 * problem.hist_data._points_sampled_value[init_best_idx]    # minus sign is because vals in hist_data were
                                                                                    # obtained from obj_func_max, while all values
                                                                                    # to store are from obj_func_min, for consistency
truth_at_init_best_sampled = problem.obj_func_min.evaluate(problem.truth_is, problem.hist_data.points_sampled[init_best_idx, 1:])
Пример #3
0
        # update noise in historical data
        updated_points_sampled_noise_variance = create_array_points_sampled_noise_variance(
            current_hist_data.points_sampled, hyperparameters_noise)

        # create new Historical data object with updated values
        new_historical_data = HistoricalData(
            dim=problem.obj_func_min.getDim() +
            1)  # increased by one for index of IS
        new_historical_data.append_historical_data(
            current_hist_data.points_sampled,
            current_hist_data.points_sampled_value,
            updated_points_sampled_noise_variance)

        # Use new hyperparameters -- this requires instantiating a new GP object
        kg_cov_cpp = cppMixedSquareExponential(hyperparameters=hypers_GP)
        kg_gp_cpp = GaussianProcessNew(kg_cov_cpp,
                                       new_historical_data,
                                       num_IS_in=problem.num_is_in)
        # kg_cov_cpp is not used afterwards

    ### Find IS and point that maximize KG/cost
    discretization_points = problem.obj_func_min.get_moe_domain(
    ).generate_uniform_random_points_in_domain(
        num_discretization_before_ranking)
    discretization_points = np.hstack((np.zeros(
        (num_discretization_before_ranking, 1)), discretization_points))
    all_mu = kg_gp_cpp.compute_mean_of_points(discretization_points)
    sorted_idx = np.argsort(all_mu)
    all_zero_x_prime = discretization_points[sorted_idx[-num_x_prime:], :]
Пример #4
0
def plot_ato_cor(num_points, num_discretization):
    dim = 8
    num_func = 4
    num_repl = 100
    search_domain = TensorProductDomain(
        [ClosedInterval(0.0, 20.0) for i in range(dim)])
    average_points = search_domain.generate_uniform_random_points_in_domain(
        num_points)
    func_name_0_list = ["vanilla", "var2", "var3", "var4"]
    func_name_1_list = ["var3", "var4", "vanilla", "var2"]
    func_name_2_list = ["var4", "vanilla", "var2", "var3"]
    with open("{0}/mkg_ato.pickle".format(hyper_dir), 'rb') as f:
        data = pickle.load(f)
    hyper_param = data['hyperparam']
    kg_cov_cpp = cppMixedSquareExponential(hyperparameters=hyper_param)
    info_1 = 1
    info_2 = 2
    x_coords = np.linspace(0.0, 20.0, num=num_discretization)
    cor_IS = np.zeros(
        (num_func * num_repl * num_points, num_discretization, dim))
    cor_delta_gp = np.zeros(
        (num_func * num_repl * num_points, num_discretization, dim))
    count = 0
    for func_idx in range(num_func):
        for repl_no in range(num_repl):
            hist_data = construct_hist_data_from_pickle(
                dim=dim,
                directory=data_dir,
                IS_filename_dict={
                    0:
                    "kg_atoC_{0}_repl_{1}".format(func_name_0_list[func_idx],
                                                  repl_no),
                    1:
                    "kg_atoC_{0}_repl_{1}".format(func_name_1_list[func_idx],
                                                  repl_no),
                    2:
                    "kg_atoC_{0}_repl_{1}".format(func_name_2_list[func_idx],
                                                  repl_no)
                },
                combine_IS=True,
                sign=-1.0)
            kg_gp_cpp = GaussianProcessNew(kg_cov_cpp, hist_data, num_IS_in=2)
            for the_point in average_points:
                for which_dim in range(dim):
                    cor_IS[count, :,
                           which_dim] = compute_correlation_info_source(
                               the_point, info_1, info_2, which_dim, x_coords,
                               kg_gp_cpp)
                    cor_delta_gp[count, :,
                                 which_dim] = compute_correlation_delta_gp(
                                     the_point, info_1, info_2, which_dim,
                                     x_coords, kg_gp_cpp)
                count += 1
                print "ato, ct {0}".format(count)
    with open("{0}/ato_plot_data.pickle".format(plot_dir), "wb") as f:
        pickle.dump(
            {
                "cor_is": cor_IS,
                "cor_delta": cor_delta_gp,
                "x": x_coords
            }, f)
    plot_cor(x_coords, cor_IS, cor_delta_gp, dim, plot_dir, "ato")