]) list_best.append(truth_at_best_sampled) total_cost += noise_and_cost_func(sample_IS, point_to_sample)[1] list_cost.append(total_cost) list_sampled_IS.append(sample_IS) list_sampled_points.append(point_to_sample) list_sampled_vals.append(val) list_noise_var.append(noise_and_cost_func(sample_IS, point_to_sample)[0]) list_raw_voi.append(-min_neg_ei) result_to_pickle = { "best": np.array(list_best), "cost": np.array(list_cost), "sampled_is": np.array(list_sampled_IS), "sampled_points": np.array(list_sampled_points), "sampled_vals": np.array(list_sampled_vals), "sampled_noise_var": np.array(list_noise_var), "raw_voi": np.array(list_raw_voi), "init_best_truth": truth_at_init_best_sampled, } # write data to pickle. send_data_to_s3(bucket, problem.result_path, result_to_pickle) if problem.data_path is not None: data_to_pickle = { "points": np.array(list_sampled_points), "vals": np.array(list_sampled_vals), "noise": np.array(list_noise_var), } send_data_to_s3(bucket, problem.data_path, data_to_pickle)
func = func_dict[func_name] num_replications = 100 search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in func._search_domain]) num_parallel_jobs = num_pts # Jialei's original choice if(('ato' in func_name) and (num_parallel_jobs > 16)): # do not start too many MATLAB instances num_parallel_jobs = 16 if(('lrMU' in func_name) and (num_parallel_jobs > 8)): # do not start too many theano instances num_parallel_jobs = 8 if(not allows_parallelization): num_parallel_jobs = 1 def parallel_func(IS, pt): return func.evaluate(IS, pt) for repl_no in range(num_replications): with Parallel(n_jobs=num_parallel_jobs) as parallel: for index_IS in func.getList_IS_to_query(): points = search_domain.generate_uniform_random_points_in_domain(num_pts) # def parallel_func(IS, pt): # return func.evaluate(IS, pt) # vals = [func.evaluate(0, pt) for pt in points] vals = parallel(delayed(parallel_func)(index_IS, pt) for pt in points) noise = [func.noise_and_cost_func(index_IS, pt)[0] for pt in points] #func.noise_and_cost_func(index_IS, None)[0] * np.ones(num_pts) data = {"points": np.array(points), "vals": np.array(vals), "noise": np.array(noise)} # write to S3 key = directory+'/{2}_IS_{0}_{3}_points_repl_{1}'.format(index_IS, repl_no, func_name, num_pts) send_data_to_s3(bucket, key, data)
} for i in range(len(hist_data)): result["data_{0}_points_sampled".format(i)] = hist_data[i].points_sampled result["data_{0}_points_sampled_value".format(i)] = hist_data[i].points_sampled_value for key in range(len(hist_data)): # Setup prior for MAP if key == 0: prior_mean = np.concatenate(([max(0.01, np.var(hist_data[key].points_sampled_value) - np.mean(hist_data[key].points_sampled_noise_variance))], [(d[1]-d[0]) for d in problem.obj_func_min._search_domain])) else: prior_mean = np.concatenate(([max(0.01, np.var(hist_data[key].points_sampled_value) - np.mean(hist_data[key].points_sampled_noise_variance) - np.mean(hist_data[0].points_sampled_noise_variance))], [(d[1]-d[0]) for d in problem.obj_func_min._search_domain])) prior_sig = np.diag(np.power(prior_mean/2., 2.0)) prior = NormalPrior(prior_mean, prior_sig) hyper_bounds = [(0.001, prior_mean[i]+2.*np.sqrt(prior_sig[i,i])) for i in range(problem.obj_func_min.getDim()+1)] hyperparam_search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in hyper_bounds]) multistart_pts = hyperparam_search_domain.generate_uniform_random_points_in_domain(num_hyper_multistart) best_f = np.inf cov = SquareExponential(prior_mean) for i in range(num_hyper_multistart): hyper, f, output = hyper_opt(cov, data=hist_data[key], init_hyper=multistart_pts[i, :], hyper_bounds=hyper_bounds, approx_grad=False, hyper_prior=prior) # print output if f < best_f: best_hyper = hyper best_f = f result['prior_mean'].append(prior_mean) result['prior_sig'].append(np.diag(prior_sig)) result['hyper_bounds'].append(hyper_bounds) result['hyperparam'] = np.concatenate((result['hyperparam'], best_hyper)) result['loglikelihood'].append(-best_f) send_data_to_s3(bucket, problem.hyper_path, result)
### copy back from /backup the files where IS 1 is centered directory_backup = '/miso/data/backup' new_func_name = 'lrMU2' # # num_pts = 1000 # for index_IS in func.getList_IS_to_query(): # key = directory_backup+'/hyper_{1}_IS_{0}_{2}_points'.format(index_IS, func_name, num_pts) # data = get_data_from_s3(bucket, key) # # # write to lrMU2 file # key_new = directory + '/hyper_{1}_IS_{0}_{2}_points'.format(index_IS, new_func_name, num_pts) # # print key_new # send_data_to_s3(bucket, key_new, data) # init data num_replications = 100 num_pts = 10 for repl_no in xrange(num_replications): print '\nrepl ' + str(repl_no) for index_IS in func.getList_IS_to_query(): key = directory_backup + '/{2}_IS_{0}_{3}_points_repl_{1}'.format( index_IS, repl_no, func_name, num_pts) data = get_data_from_s3(bucket, key) # write to lrMU2 file key_new = directory + '/{2}_IS_{0}_{3}_points_repl_{1}'.format( index_IS, repl_no, new_func_name, num_pts) # print key_new send_data_to_s3(bucket, key_new, data)
# bucket = conn.get_bucket('frazier-research', validate=True) # cluster_dir = '/fs/europa/g_pf/pickles/miso/data/' # s3_dir = 'miso/data/' # for filename in os.listdir(cluster_dir): # if '.pickle' in filename: # print filename.split('.')[0] # with open(cluster_dir+filename, 'rb') as f: # d=pickle.load(f) # send_data_to_s3(bucket, s3_dir+filename.split('.')[0], d) cluster_dir = '/fs/europa/g_pf/pickles/miso/data/' s3_dir = 'miso/data/' for i in range(100): for IS in range(3): name = "atoext_IS_{0}_20_points_repl_{1}".format(IS, i) print name with open(cluster_dir + name, 'rb') as f: data = pickle.load(f) send_data_to_s3(bucket, s3_dir + name, data) # cluster_dir = '/fs/europa/g_pf/pickles/miso/hyper/' # s3_dir = 'miso/hyper/' # # for i in range(100): # # for IS in range(3): # # name = "atoext_IS_{0}_20_points_repl_{1}".format(IS, i) # name = "mkg_atoext" # print name # data = get_data_from_s3(bucket, s3_dir+name) # with open(cluster_dir+name, 'wb') as f: # pickle.dump(data, f)