def test_sample_point_from_gp(self): """Test that sampling points from the GP works.""" point_one = SamplePoint([0.0, 1.0], -1.0, 0.0) point_two = SamplePoint([2.0, 2.5], 1.0, 0.1) covariance = SquareExponential([1.0, 1.0, 1.0]) historical_data = HistoricalData(len(point_one.point), [point_one, point_two]) gaussian_process = GaussianProcess(covariance, historical_data) out_values = numpy.zeros(3) for i in xrange(3): out_values[i] = gaussian_process.sample_point_from_gp( point_two.point, 0.001) gaussian_process._gaussian_process.reset_to_most_recent_seed() out_values_test = numpy.ones(3) for i in xrange(3): out_values_test[i] = gaussian_process.sample_point_from_gp( point_two.point, 0.001) # Exact match b/c we should've run over the exact same computations self.assert_vector_within_relative(out_values_test, out_values, 0.0) # Sampling from a historical point (that had 0 noise) should produce the same value associated w/that point value = gaussian_process.sample_point_from_gp(point_one.point, 0.0) self.assert_scalar_within_relative(value, point_one.value, numpy.finfo(numpy.float64).eps)
def run_example(num_points_to_sample=20, verbose=True, **kwargs): """Run the example, aksing MOE for ``num_points_to_sample`` optimal points to sample.""" exp = Experiment([[0, 2], [0, 4]]) # 2D experiment, we build a tensor product domain # Bootstrap with some known or already sampled point(s) exp.historical_data.append_sample_points([ SamplePoint( [0, 0], function_to_minimize([0, 0]), 0.05 ), # Iterables of the form [point, f_val, f_var] are also allowed ]) # Sample num_points_to_sample points for _ in range(num_points_to_sample): # Use MOE to determine what is the point with highest Expected Improvement to use next next_point_to_sample = gp_next_points( exp, **kwargs)[0] # By default we only ask for one point # Sample the point from our objective function, we can replace this with any function value_of_next_point = function_to_minimize(next_point_to_sample) if verbose: print "Sampled f({0:s}) = {1:.18E}".format( str(next_point_to_sample), value_of_next_point) # Add the information about the point to the experiment historical data to inform the GP exp.historical_data.append_sample_points( [SamplePoint(next_point_to_sample, value_of_next_point, 0.01)]) # We can add some noise
def test_1d_analytic_ei_edge_cases(self): """Test cases where analytic EI would attempt to compute 0/0 without variance lower bounds.""" base_coord = numpy.array([0.5]) point1 = SamplePoint(base_coord, -1.809342, 0) point2 = SamplePoint(base_coord * 2.0, -1.09342, 0) # First a symmetric case: only one historical point data = HistoricalData(base_coord.size, [point1]) hyperparameters = numpy.array([0.2, 0.3]) covariance = SquareExponential(hyperparameters) gaussian_process = GaussianProcess(covariance, data) point_to_sample = base_coord ei_eval = ExpectedImprovement(gaussian_process, point_to_sample) ei = ei_eval.compute_expected_improvement() grad_ei = ei_eval.compute_grad_expected_improvement() self.assert_scalar_within_relative(ei, 0.0, 1.0e-15) self.assert_vector_within_relative(grad_ei, numpy.zeros(grad_ei.shape), 1.0e-15) shifts = (1.0e-15, 4.0e-11, 3.14e-6, 8.89e-1, 2.71) self._check_ei_symmetry(ei_eval, point_to_sample, shifts) # Now introduce some asymmetry with a second point # Right side has a larger objetive value, so the EI minimum # is shifted *slightly* to the left of best_so_far. gaussian_process.add_sampled_points([point2]) shift = 3.0e-12 ei_eval = ExpectedImprovement(gaussian_process, point_to_sample - shift) ei = ei_eval.compute_expected_improvement() grad_ei = ei_eval.compute_grad_expected_improvement() self.assert_scalar_within_relative(ei, 0.0, 1.0e-15) self.assert_vector_within_relative(grad_ei, numpy.zeros(grad_ei.shape), 1.0e-15)
def test_gp_construction_singular_covariance_matrix(self): """Test that the GaussianProcess ctor indicates a singular covariance matrix when points_sampled contains duplicates (0 noise).""" index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 1)) domain, gaussian_process = self.gp_test_environments[index] point_one = SamplePoint([0.0] * domain.dim, 1.0, 0.0) # points two and three have duplicate coordinates and we have noise_variance = 0.0 point_two = SamplePoint([1.0] * domain.dim, 1.0, 0.0) point_three = point_two historical_data = HistoricalData(len(point_one.point), [point_one, point_two, point_three]) T.assert_raises(C_GP.SingularMatrixException, GaussianProcess, gaussian_process.get_covariance_copy(), historical_data)
def run_example(num_points_to_sample=200, verbose=False, **kwargs): b = Branin() bounds = b.get_meta_information()['bounds'] dimensions = len(bounds) lower =np.array([i[0] for i in bounds]) upper =np.array([i[1] for i in bounds]) start_point = (upper-lower)/2 exp = Experiment([lower,upper]) exp.historical_data.append_sample_points([ SamplePoint(start_point, wrapper(start_point,b), 0.6)]) for _ in range(num_points_to_sample): next_point_to_sample = gp_next_points(exp, **kwargs)[0] value_of_next_point = wrapper(next_point_to_sample,b) if verbose: print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point) exp.historical_data.append_sample_points([SamplePoint(next_point_to_sample, value_of_next_point, 0.6)])
def do_rfc_MOE(num_points_to_sample, X_train, y_train, verbose=True, **kwargs): exp_rfc = Experiment([[0.005, 1], [0.04, 1], [0.1, 1], [0.1, 1]]) # n_estimators_range = [5, 1000] and max_features_range = [2, 24] are normalized # max_depth_range = [1, 10] & min_samples_leaf_range = [1, 10] are normalized best_point = [] best_point_value = 0. for _ in range(num_points_to_sample): # Use MOE to determine what is the point with highest Expected Improvement to use next next_point_to_sample = gp_next_points(exp_rfc, rest_host='localhost', rest_port=6543, **kwargs)[0] # By default we only ask for one point # Sample the point from objective function n_estimators = int(round(next_point_to_sample[0] * 1000.0)) max_features = int(round(next_point_to_sample[1] * 50)) max_depth = int(round(next_point_to_sample[2] * 10)) min_samples_leaf = int(round(next_point_to_sample[3] * 10)) rfc = RandomForestClassifier(n_estimators=n_estimators, criterion='gini', max_depth=max_depth, min_samples_split=2, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=0.0, max_features=max_features, max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=-1, random_state=None, verbose=0, warm_start=False, class_weight=None) score_cv = cross_validation.cross_val_score(rfc, X_train, y_train, cv=10, scoring='accuracy') value_of next_point = np.mean(score_cv) if value_of_next_point > best_point_value: best_point_value = value_of_next_point best_point = next_point_to_sample if verbose: print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point) # Add the information about the point to the experiment historical data to inform the GP exp_rfc.historical_data.append_sample_points([SamplePoint(next_point_to_sample, -value_of_next_point, 0.0001)]) # We can add some noise
def test_gp_add_sampled_points_singular_covariance_matrix(self): """Test that GaussianProcess.add_sampled_points indicates a singular covariance matrix when points_sampled contains duplicates (0 noise).""" test_environment_input = copy.copy(self.gp_test_environment_input) test_environment_input.num_sampled = 1 test_environment_input.gaussian_process_class = GaussianProcess _, gaussian_process = self._build_gaussian_process_test_data(test_environment_input) # points one and three have duplicate coordinates and we have noise_variance = 0.0 point_one = SamplePoint([0.5] * gaussian_process.dim, 1.0, 0.0) point_two = SamplePoint([1.0] * gaussian_process.dim, -1.0, 0.0) point_three = point_one # points one and two are different, so this is safe gaussian_process.add_sampled_points([point_one, point_two]) # point_three is identical to point_one; this will produce a singular covariance matrix T.assert_raises(C_GP.SingularMatrixException, gaussian_process.add_sampled_points, [point_three])
def build_random_gaussian_process(points_sampled, covariance, noise_variance=None, gaussian_process_type=GaussianProcess): r"""Utility to draw ``points_sampled.shape[0]`` points from a GaussianProcess prior, add those values to the GP, and return the GP. This is mainly useful for testing or when "random" data is needed that will produce reasonably well-behaved GPs. :param points_sampled: points at which to draw from the GP :type points_sampled: array of float64 with shape (num_sampled, dim) :param covariance: covariance function backing the GP :type covariance: interfaces.covariance_interface.CovarianceInterface subclass composable with gaussian_process_type :param noise_variance: the ``\sigma_n^2`` (noise variance) associated w/the new observations, ``points_sampled_value`` :type noise_variance: array of float64 with shape (num_sampled) :param gaussian_process_type: gaussian process whose historical data is being set :type gaussian_process_type: interfaces.gaussian_process_interface.GaussianProcessInterface subclass :return: a gaussian process with the generated prior data :rtype: gaussian_process_type object """ if noise_variance is None: noise_variance = numpy.zeros(points_sampled.shape[0]) gaussian_process = gaussian_process_type( covariance, HistoricalData(points_sampled.shape[1])) for i, point in enumerate(points_sampled): # Draw function value from the GP function_value = gaussian_process.sample_point_from_gp( point, noise_variance=noise_variance[i]) # Add function value back into the GP sample_point = [SamplePoint(point, function_value, noise_variance[i])] gaussian_process.add_sampled_points(sample_point) return gaussian_process
def evaluator(alpha, beta): avg, var = plan_eval.teach_until_perfect(alpha, beta, students, test_qs, test_ans, cut_off, perf_thresh) print alpha, beta, avg, var # Wipe Students memory for s in students: s.wipe_memory() # Since minimizes by default say score is 1 - avg history.append((avg, alpha, beta)) score = avg return SamplePoint([alpha, beta], score, var)
def evaluator(alpha, beta): avg, var = plan_eval.evaluate_plan(alpha, beta, students, num_exs, test_qs, test_ans) print alpha, beta, avg, var # Wipe Students memory for s in students: s.wipe_memory() # Since minimizes by default say score is 1 - avg history.append((avg, alpha, beta)) score = 1 - avg return SamplePoint([alpha, beta], score, var)
def add_sampled_point(self, x, vals, noise_vars, costs): """ :param x: the point to add :param vals: sampled values of x using all IS :param noise_vars: noise variances at x for all IS :param costs: costs at x for all IS :return: """ self._gp.add_sampled_points([ SamplePoint(x, vals[i], noise_vars[i]) for i in range(self._num_IS) ]) self.cumulated_cost += costs.sum()
def _make_gp_from_params(params): """Create and return a C++ backed gaussian_process from the request params as a dict. ``params`` has the following form:: params = { 'gp_historical_info': <instance of :class:`moe.views.schemas.base_schemas.GpHistoricalInfo`>, 'domain_info': <instance of :class:`moe.views.schemas.base_schemas.DomainInfo`>, 'covariance_info': <instance of :class:`moe.views.schemas.base_schemas.CovarianceInfo`>, } :param params: The request params dict :type params: dict """ # Load up the info gp_historical_info = params.get("gp_historical_info") domain_info = params.get("domain_info") points_sampled = gp_historical_info.get('points_sampled') sample_point_list = [] for point in points_sampled: sample_point_list.append( SamplePoint( point['point'], point['value'], point['value_var'], )) optimizer_info = params.get('optimizer_info', {}) optimizer_type = optimizer_info.get('optimizer_type', None) if optimizer_type == L_BFGS_B_OPTIMIZER: covariance_of_process = _make_covariance_of_process_from_params( params, "python") gaussian_process = pythonGaussianProcess( covariance_of_process, HistoricalData(domain_info.get('dim'), sample_point_list), ) else: covariance_of_process = _make_covariance_of_process_from_params(params) gaussian_process = GaussianProcess( covariance_of_process, HistoricalData(domain_info.get('dim'), sample_point_list), ) return gaussian_process
def do_xgb_train_MOE(num_points_to_sample, X_train, y_train, verbose=True, **kwargs): # Finding Best XGB parameters using MOE xgb_parameters = {} xgb_parameters['objective'] = 'multi:softmax' xgb_parameters['silent'] = 1 xgb_parameters['nthread'] = 4 xgb_parameters['num_class'] = 6 # Range of XGBoost parameters that are optimized exp_xgb = Experiment([ [0.1, 1], [0.02, 1] ]) # eta_range = [0.1, 1]; max_depth_range = [2, 100] but it is normalized num_round = 5 n_folds = 10 cv_folds = cross_validation.StratifiedKFold(y_train, n_folds=n_folds) best_point = [] best_point_value = 0. for _ in range(num_points_to_sample): # Use MOE to determine what is the point with highest Expected Improvement to use next next_point_to_sample = gp_next_points( exp_xgb, rest_host='localhost', rest_port=6543, **kwargs)[0] # By default we only ask for one point # Sample the point from objective function xgb_parameters['eta'] = next_point_to_sample[0] xgb_parameters['max_depth'] = int(round(next_point_to_sample[1] * 100)) acc_cv, prec_cv, rec_cv, cm_cv, cm_full_cv = xgboost_train_cross_validation( X_train, y_train, xgb_parameters, num_round, cv_folds) value_of_next_point = acc_cv if value_of_next_point > best_point_value: best_point_value = value_of_next_point best_point = next_point_to_sample if verbose: print "Sampled f({0:s}) = {1:.18E}".format( str(next_point_to_sample), value_of_next_point) # Add the information about the point to the experiment historical data to inform the GP exp_xgb.historical_data.append_sample_points( [SamplePoint(next_point_to_sample, -value_of_next_point, 0.0001)]) # We can add some noise best_point[1] = int(round(best_point[1] * 100)) return best_point, best_point_value
def _iterate(self): result = self.benchmark.execute() inverted_result = 100 - result self.moe.historical_data.append_sample_points( [SamplePoint(self._getValues(), inverted_result, 0.1)]) print " -- Results: " + str(result) for setting in self.settings: print " -- " + str(setting) self._getNextParamFromMOE() sys.stdout.flush()
def do_svc_linear_MOE(num_points_to_sample, X_train, y_train, verbose=True, **kwargs): exp_svc_linear = Experiment([[1.0000e-05, 1.0]]) # C_range = [0.1, 10000] is divided to be in [0.1, 1] range best_point = [] best_point_value = 0. for _ in range(num_points_to_sample): # Use MOE to determine what is the point with hnighest Expected Improvement to use next next_point_to_sample = gp_next_points(exp_svc_linear, rest_host='localhost', rest_port=6543, **kwargs)[0] # By default we only ask for one point # Sample the point from objective function C = next_point_to_sample[0] * 10000.0 svc_linear = svm.LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=C, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000) score_cv = cross_validation.cross_val_score(svc_linear, X_train, y_train, cv=10, scoring='accuracy') value_of_next_point = np.mean(score_cv) if value_of_next_point > best_point_value: best_point_value = value_of_next_point best_point = next_point_to_sample if verbose: print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point) # Add the information about the point to the experiment historical data to inform the GP; # - infront of value_of_next_point is due to fact that moe minimize and max accuracy is of interest in HAR classification exp_svc_linear.historical_data.append_sample_points([SamplePoint(next_point_to_sample, -value_of_next_point, .000001)]) # We can add some noise
def do_svc_rbf_MOE(num_points_to_sample, X_train, y_train, verbose=True, **kwargs): exp_svc_rbf = Experiment([[1.0000e-05, 1], [1.0000e-08, 1]]) # C_range = [0.1, 10000] is divided to be in [0.1, 1] range best_point = [] best_point_value = 0. for _ in range(num_points_to_sample): # Use MOE to determine what is the point with highest Expected Improvement to use next next_point_to_sample = gp_next_points(exp_svc_rbf, rest_host='localhost', rest_port=6543, **kwargs)[0] # By default we only ask for one point # Sample the point from objective function C = next_point_to_sample[0] * 10000.0 gamma = next_point_to_sample[1] svc_rbf = svm.SVC(C=C, kernel='rbf', degree=3, gamma=gamma, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, random_state=None) score_cv = cross_validation.cross_val_score(svc_rbf, X_train, y_train, cv=10, scoring='accuracy') value_of_next_point = np.mean(score_cv) if value_of_next_point > best_point_value: best_point_value = value_of_next_point best_point = next_point_to_sample if verbose: print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point) # Add the information about the point to the experiment historical data to inform the GP exp_svc_rbf.historical_data.append_sample_points([SamplePoint(next_point_to_sample, -value_of_next_point, 0.0001)]) # We can add some noise
def do_abc_MOE(num_points_to_sample, X_train, y_train, verbose=True, **kwargs): exp_abc = Experiment([[0.005, 1], [0.1, 1]]) # n_estimators_range = [5, 1000] is normalized best_point = [] best_point_value = 0. for _ in range(num_points_to_sample): # Use MOE to determine what is the point with highest Expected Improvement to use next next_point_to_sample = gp_next_points(exp_abc, rest_host='localhost', rest_port=6543, **kwargs)[0] # By default we only ask for one point # Sample the point from objective function n_estimators = int(round(next_point_to_sample[0] * 1000.0)) learning_rate = next_point_to_sample[1] abc = AdaBoostClassifier((DecisionTreeClassifier(max_depth=2)),n_estimators=n_estimators, learning_rate=learning_rate) score_cv = cross_validation.cross_val_score(abc, X_train, y_train, cv=10, scoring='accuracy') value_of next_point = np.mean(score_cv) if value_of_next_point > best_point_value: best_point_value = value_of_next_point best_point = next_point_to_sample if verbose: print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point) # Add the information about the point to the experiment historical data to inform the GP exp_abc.historical_data.append_sample_points([SamplePoint(next_point_to_sample, -value_of_next_point, 0.0001)]) # We can add some noise best_point[0] = int(round(best_point[0] * 1000)) return best_point, best_point_value
def itterate(): print str(datetime.datetime.now()) # Use MOE to determine what is the point with highest Expected Improvement to use next next_point_to_sample = gp_next_points(exp)[0] print next_point_to_sample # By default we only ask for one point x = [(n - argoffset) / argscale for n in next_point_to_sample] # Sample the point from our objective function, we can replace this with any function value_of_next_point = function_to_minimize(next_point_to_sample) print x, value_of_next_point # Store the sample with open('moveheuristic.csv', 'a') as csvfile: csvfile.write(", ".join([str(k) for k in x + [value_of_next_point]]) + "\n") # Add the information about the point to the experiment historical data to inform the GP exp.historical_data.append_sample_points([ SamplePoint(next_point_to_sample, offset + value_of_next_point * scale, variance) ])
objective_func._num_fidelity) # observe derivatives = objective_func._observations observations = [0] + [i + 1 for i in derivatives] init_pts_value = np.array([ objective_func.evaluate(id % num_to_sample, pt) for id, pt in enumerate(init_pts) ]) #[:, observations] true_value_init = init_pts_value # true_value_init = np.array([objective_func.evaluate_true(id % num_to_sample, pt) for id, pt in enumerate(init_pts)])#[:, observations] init_data = HistoricalData(dim=objective_func._dim, num_derivatives=len(derivatives)) init_data.append_sample_points([ SamplePoint(pt, [init_pts_value[num, i] for i in observations], objective_func._sample_var) for num, pt in enumerate(init_pts) ]) # initialize the model prior = DefaultPrior(1 + dim + len(observations), len(observations)) # noisy = False means the underlying function being optimized is noise-free cpp_gp_loglikelihood = cppGaussianProcessLogLikelihoodMCMC( historical_data=init_data, derivatives=derivatives, prior=prior, chain_length=1000, burnin_steps=2000, n_hypers=10, noisy=False) cpp_gp_loglikelihood.train()
[objective_func.evaluate_true(pt) for pt in init_pts] ) # [:, observations] # Collecting Data s_suggest = np.array(init_pts) f_s_suggest = np.array(init_pts_value).reshape(initial_n, 1) s_recommend = np.array(init_pts) f_s_recommend = np.array(true_value_init).reshape(initial_n, 1) elapsed = np.zeros([1, num_iteration + initial_n]) init_data = HistoricalData(dim=objective_func._dim, num_derivatives=len(derivatives)) init_data.append_sample_points( [ SamplePoint( pt, [init_pts_value[num, i] for i in observations], objective_func._sample_var, ) for num, pt in enumerate(init_pts) ] ) # initialize the model prior = DefaultPrior(1 + dim + len(observations), len(observations)) # noisy = False means the underlying function being optimized is noise-free cpp_gp_loglikelihood = cppGaussianProcessLogLikelihoodMCMC( historical_data=init_data, derivatives=derivatives, prior=prior, chain_length=1000,
:func:`moe.easy_interface.simple_endpoint.gp_mean_var` The function requires some historical information to inform the Gaussian Process The optimal hyperparameters are returned. """ import numpy from moe.easy_interface.simple_endpoint import gp_hyper_opt from moe.optimal_learning.python.data_containers import SamplePoint # Randomly generate some historical data # points_sampled is an iterable of iterables of the form [point_as_a_list, objective_function_value, value_variance] points_sampled = [ SamplePoint(numpy.array([x]), numpy.random.uniform(-1, 1), 0.01) for x in numpy.arange(0, 1, 0.1) ] def run_example(verbose=True, **kwargs): """Run the example, aksing MOE for optimal hyperparameters given historical data.""" covariance_info = gp_hyper_opt(points_sampled, **kwargs) if verbose: print(covariance_info) if __name__ == '__main__': run_example()
mu_star_truth = obj_func_min.evaluate(truth_IS, mu_star_point) multi_kg_result.add_entry(point_to_sample, sample_IS, sample_val, best_sampled_val, truth_at_best_sampled, predict_mean, predict_var, cost, -min_negative_kg, mu_star=negative_mu_star, mu_star_var=mu_star_var, mu_star_truth=mu_star_truth, mu_star_point=mu_star_point) print "pt: {0} \n IS: {1} \n val: {2} \n voi: {3} \n best_sample_truth: {4} \n mu_star_point: {5} \n mu_star_truth: {6} \n total cost: {7}".format( point_to_sample, sample_IS, sample_val, -min_negative_kg, truth_at_best_sampled, mu_star_point, mu_star_truth, multi_kg_result._total_cost) if sample_val < best_sampled_val: best_sampled_val = sample_val best_sampled_point = point_to_sample truth_at_best_sampled = obj_func_min.evaluate(truth_IS, best_sampled_point) kg_gp_cpp.add_sampled_points([ SamplePoint(numpy.concatenate(([sample_IS], point_to_sample)), -sample_val, noise_and_cost_func(sample_IS, point_to_sample)[0]) ])
def main(): args = docopt(__doc__) # Parse arguments mesh = args['<mesh>'] weights = np.load(args['<weightfile>']) init_centroid = np.genfromtxt(args['<init_centroid>']) coil = args['<coil>'] output_file = args['<output_file>'] cpus = int(args['--cpus']) or 8 tmpdir = args['--tmp-dir'] or os.getenv('TMPDIR') or "/tmp/" num_iters = int(args['--n-iters']) or 50 min_samps = int(args['--min-var-samps']) or 10 tol = float(args['--convergence']) or 0.001 history = args['--history'] skip_convergence = args['--skip-convergence'] options = args['--options'] if options: with open(options, 'r') as f: opts = json.load(f) logging.info("Using custom options file {}".format(options)) logging.info("{}".format('\''.join( [f"{k}:{v}" for k, v in opts.items()]))) else: opts = {} logging.info('Using {} cpus'.format(cpus)) f = FieldFunc(mesh_file=mesh, initial_centroid=init_centroid, tet_weights=weights, coil=coil, field_dir=tmpdir, cpus=cpus, **opts) # Make search domain search_domain = TensorProductDomain([ ClosedInterval(f.bounds[0, 0], f.bounds[0, 1]), ClosedInterval(f.bounds[1, 0], f.bounds[1, 1]), ClosedInterval(0, 180) ]) c_search_domain = cTensorProductDomain([ ClosedInterval(f.bounds[0, 0], f.bounds[0, 1]), ClosedInterval(f.bounds[1, 0], f.bounds[1, 1]), ClosedInterval(0, 180) ]) # Generate historical points prior = DefaultPrior(n_dims=3 + 2, num_noise=1) prior.tophat = TophatPrior(-2, 5) prior.ln_prior = NormalPrior(12.5, 1.6) hist_pts = cpus i = 0 init_pts = search_domain.generate_uniform_random_points_in_domain(hist_pts) observations = -f.evaluate(init_pts) hist_data = HistoricalData(dim=3, num_derivatives=0) hist_data.append_sample_points( [SamplePoint(inp, o, 0.0) for o, inp in zip(observations, init_pts)]) # Train GP model gp_ll = GaussianProcessLogLikelihoodMCMC(historical_data=hist_data, derivatives=[], prior=prior, chain_length=1000, burnin_steps=2000, n_hypers=2**4, noisy=False) gp_ll.train() # Initialize grad desc params sgd_params = cGDParams(num_multistarts=200, max_num_steps=50, max_num_restarts=5, num_steps_averaged=4, gamma=0.7, pre_mult=1.0, max_relative_change=0.5, tolerance=1.0e-10) num_samples = int(cpus * 1.3) best_point_history = [] # Sum of errors buffer var_buffer = deque(maxlen=min_samps) for i in np.arange(0, num_iters): # Optimize qEI and pick samples points_to_sample, ei = gen_sample_from_qei(gp_ll.models[0], c_search_domain, sgd_params=sgd_params, num_samples=num_samples, num_mc=2**10) # Collect observations sampled_points = -f.evaluate(points_to_sample) evidence = [ SamplePoint(c, v, 0.0) for c, v in zip(points_to_sample, sampled_points) ] # Update model gp_ll.add_sampled_points(evidence) gp_ll.train() # Pull model and pull values gp = gp_ll.models[0] min_point = np.argmin(gp._points_sampled_value) min_val = np.min(gp._points_sampled_value) best_coord = gp.get_historical_data_copy().points_sampled[min_point] logging.info('Iteration {} of {}'.format(i, num_iters)) logging.info('Recommended Points:') logging.info(points_to_sample) logging.info('Expected Improvement: {}'.format(ei)) logging.info('Current Best:') logging.info(f'f(x*)= {min_val}') logging.info(f'Coord: {best_coord}') best_point_history.append(str(min_val)) if history: with open(history, 'w') as buf: buf.write('\n'.join(best_point_history)) # Convergence check if (len(var_buffer) == var_buffer.maxlen) and not skip_convergence: deviation = sum([abs(x - min_val) for x in var_buffer]) if deviation < tol: logging.info('Convergence reached!') logging.info('Deviation: {}'.format(deviation)) logging.info('History length: {}'.format(var_buffer.maxlen)) logging.info('Tolerance: {}'.format(tol)) break var_buffer.append(min_val) # Save position and orientation matrix np.savetxt(output_file, best_coord)
list_pending_mu_star_points = list_pending_mu_star_points[:-1] vals_pending_mu_star_points = vals_pending_mu_star_points[:-1] # add evaluations of mu_star to our list list_mu_star_truth.extend(vals_pending_mu_star_points) list_pending_mu_star_points = [] else: # just do the cheap observation and defer the expensive one sample_val = problem.obj_func_min.evaluate(sample_is, point_to_sample) # add point and observation to GP # NOTE: while we work everywhere with the values of the minimization problem in the computation, we used the maximization obj values for the GP. # That is why here sample_val is multiplied by -1.0 kg_gp_cpp.add_sampled_points([ SamplePoint( np.concatenate(([sample_is], point_to_sample)), -1.0 * sample_val, problem.obj_func_min.noise_and_cost_func(sample_is, point_to_sample)[0]) ]) ### Recommendation: Search for point of optimal posterior mean for truth IS def find_mu_star(start_pt): ''' Find the optimum of the posterior mean. This is the point that misoKG will recommend in this iteration. :param start_pt: starting point for BFGS :return: recommended point ''' return bfgs_optimization(start_pt, negative_mu_kg(kg_gp_cpp), problem.obj_func_min._search_domain) def search_mu_star_point(kg_gp_cpp, list_sampled_points, point_to_sample, num_multistart, num_threads, problem):
for i in xrange(objective_func._search_domain.shape[0]-num_fidelity)]) # get the initial data init_pts = np.zeros((objective_func._num_init_pts, objective_func._dim)) init_pts[:, :objective_func._dim-objective_func._num_fidelity] = inner_search_domain.generate_uniform_random_points_in_domain(objective_func._num_init_pts) for pt in init_pts: pt[objective_func._dim-objective_func._num_fidelity:] = np.ones(objective_func._num_fidelity) # observe derivatives = np.arange(objective_func._num_observations) observations = [0] + [i+1 for i in derivatives] init_pts_value = np.array([objective_func.evaluate(pt) for pt in init_pts])[:, observations] true_value_init = np.array([objective_func.evaluate_true(pt) for pt in init_pts])[:, observations] init_data = HistoricalData(dim = objective_func._dim, num_derivatives = len(derivatives)) init_data.append_sample_points([SamplePoint(pt, [init_pts_value[num, i] for i in observations], objective_func._sample_var) for num, pt in enumerate(init_pts)]) # initialize the model prior = DefaultPrior(1+dim+len(observations), len(observations)) # noisy = False means the underlying function being optimized is noise-free cpp_gp_loglikelihood = cppGaussianProcessLogLikelihoodMCMC(historical_data = init_data, derivatives = derivatives, prior = prior, chain_length = 2000, burnin_steps = 2000, n_hypers = 10, noisy = False) cpp_gp_loglikelihood.train() py_sgd_params_ps = pyGradientDescentParameters(max_num_steps=100, max_num_restarts=2, num_steps_averaged=15, gamma=0.7, pre_mult=0.01, max_relative_change=0.1, tolerance=1.0e-5) cpp_sgd_params_ps = cppGradientDescentParameters(num_multistarts=1, max_num_steps=20, max_num_restarts=1, num_steps_averaged=3, gamma=0.7, pre_mult=0.03, max_relative_change=0.06, tolerance=1.0e-5)
def record_result(self, hyperparam_dict, score): self.experiment.historical_data.append_sample_points( [SamplePoint(point=hyperparam_dict.values(), value=score)])
def lower_confidence_bound_optimization( gaussian_process, candidate_pts, num_to_sample, ): """Solve the q,p-LCB problem, returning the optimal set of q points to sample CONCURRENTLY in future experiments. .. NOTE:: The following comments are copied from gpp_math.hpp, ComputeOptimalPointsToSample(). These comments are copied into :func:`moe.optimal_learning.python.python_version.expected_improvement.multistart_expected_improvement_optimization` This is the primary entry-point for EI optimization in the optimal_learning library. It offers our best shot at improving robustness by combining higher accuracy methods like gradient descent with fail-safes like random/grid search. Returns the optimal set of q points to sample CONCURRENTLY by solving the q,p-EI problem. That is, we may want to run 4 experiments at the same time and maximize the EI across all 4 experiments at once while knowing of 2 ongoing experiments (4,2-EI). This function handles this use case. Evaluation of q,p-EI (and its gradient) for q > 1 or p > 1 is expensive (requires monte-carlo iteration), so this method is usually very expensive. Compared to ComputeHeuristicPointsToSample() (``gpp_heuristic_expected_improvement_optimization.hpp``), this function makes no external assumptions about the underlying objective function. Instead, it utilizes a feature of the GaussianProcess that allows the GP to account for ongoing/incomplete experiments. If ``num_to_sample = 1``, this is the same as ComputeOptimalPointsToSampleWithRandomStarts(). The option of using GPU to compute general q,p-EI via MC simulation is also available. To enable it, make sure you have installed GPU components of MOE, otherwise, it will throw Runtime excpetion. :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,p-EI) :type num_to_sample: int >= 1 :return: point(s) that maximize the knowledge gradient (solving the q,p-KG problem) :rtype: array of float64 with shape (num_to_sample, ei_optimizer.objective_function.dim) """ # Create enough randomness sources if none are specified. mean_surface = gaussian_process.compute_mean_of_points(candidate_pts) standard_deviation = numpy.zeros(candidate_pts.shape[0]) for pt in xrange(candidate_pts.shape[0]): standard_deviation[ pt] = gaussian_process.compute_cholesky_variance_of_points( candidate_pts[[pt], :])[0, 0] target = mean_surface - standard_deviation index = numpy.argmin(target) ucb = mean_surface + standard_deviation upper_bound = numpy.min(ucb) condition = target <= upper_bound satisfied_candidate_pts = candidate_pts[condition, :] satisfied_standard_deviation = numpy.zeros( satisfied_candidate_pts.shape[0]) results = numpy.zeros((num_to_sample, gaussian_process.dim)) results[0] = candidate_pts[index] for i in xrange(1, num_to_sample): sample_point = [ SamplePoint(results[i - 1], numpy.zeros(gaussian_process.num_derivatives + 1), 0.25) ] gaussian_process.add_sampled_points(sample_point) for pt in xrange(satisfied_standard_deviation.shape[0]): satisfied_standard_deviation[ pt] = gaussian_process.compute_cholesky_variance_of_points( satisfied_candidate_pts[[pt], :])[0, 0] index = numpy.argmax(satisfied_standard_deviation) results[i] = satisfied_candidate_pts[index] return results, 0.0
pt_to_sample, sample_is, acq, raw_acq = optimize_entropy( pes, pes_model, problem.obj_func_min.getDim(), num_discretization=1000, cost_func=cost_func, list_sample_is=problem.list_sample_is) pt_to_sample_org_space = scale_back(pt_to_sample, lower_bounds, upper_bounds) sample_noise, sample_cost = problem.obj_func_min.noise_and_cost_func( sample_is, None) sample_value = problem.obj_func_min.evaluate(sample_is, pt_to_sample_org_space) pes_model.gp_model.add_sampled_points([ SamplePoint(numpy.concatenate(([sample_is], pt_to_sample)), sample_value, noise_variance=sample_noise) ]) # update best_sampled_val and truth_at_best_sampled if sample_value < best_sampled_val: best_sampled_val = sample_value if (problem.truth_is == sample_is): truth_at_best_sampled = sample_value else: truth_at_best_sampled = problem.obj_func_min.evaluate( problem.truth_is, pt_to_sample_org_space) total_cost += sample_cost list_best.append(truth_at_best_sampled) list_cost.append(total_cost) list_sampled_IS.append(sample_is) list_sampled_points.append(pt_to_sample_org_space)
# start_points = start_points_prepare[sorted_idx_kg[:num_multistart], :] start_points = search_domain.generate_uniform_random_points_in_domain( num_multistart) parallel_results = parallel( delayed(min_negative_ei_func)(pt) for pt in start_points) min_neg_ei, point_to_sample = process_parallel_results(parallel_results) sample_IS = multifidelity_expected_improvement_evaluator.choose_IS( point_to_sample) val = problem.obj_func_min.evaluate(sample_IS, point_to_sample) if val < best_sampled_val: best_sampled_val = val best_sampled_point = point_to_sample truth_at_best_sampled = problem.obj_func_min.evaluate( problem.truth_is, point_to_sample) gp_dict[sample_IS].add_sampled_points([ SamplePoint(point_to_sample, val, noise_and_cost_func(sample_IS, point_to_sample)[0]) ]) list_best.append(truth_at_best_sampled) total_cost += noise_and_cost_func(sample_IS, point_to_sample)[1] list_cost.append(total_cost) list_sampled_IS.append(sample_IS) list_sampled_points.append(point_to_sample) list_sampled_vals.append(val) list_noise_var.append(noise_and_cost_func(sample_IS, point_to_sample)[0]) list_raw_voi.append(-min_neg_ei) result_to_pickle = { "best": np.array(list_best), "cost": np.array(list_cost), "sampled_is": np.array(list_sampled_IS), "sampled_points": np.array(list_sampled_points),
if ((point_to_sample == mu_star_point).all and (truth_IS == sample_IS)): mu_star_truth = sample_val else: mu_star_truth = obj_func_min.evaluate(truth_IS, mu_star_point) if sample_val < best_sampled_val: best_sampled_val = sample_val best_sampled_point = point_to_sample if(truth_IS == sample_IS): truth_at_best_sampled = sample_val else: truth_at_best_sampled = obj_func_min.evaluate(truth_IS, best_sampled_point) # NOTE: while Jialei worked everywhere with the values of the minimization problem in the computation, he used the maximization obj values for the GP. # That is why here sample_val is multiplied by -1 kg_gp_cpp.add_sampled_points([SamplePoint(numpy.concatenate(([sample_IS], point_to_sample)), -sample_val, noise_and_cost_func(sample_IS, point_to_sample)[0])]) best_so_far[kg_iteration] = min(mu_star_truth, truth_at_best_sampled) cost_so_far[kg_iteration] = cost if kg_iteration == 0 else (cost + cost_so_far[kg_iteration - 1]) # save data from this iteration: list_sampled_IS.append(sample_IS) list_sampled_points.append(point_to_sample) list_noise_variance_at_sample.append(noise_and_cost_func(sample_IS, point_to_sample)[0]) # NOTE: while Jialei worked everywhere with the values of the minimization problem in the computation, he used the maximization obj values for the GP. # but here we store the value of the min problem list_sampled_vals.append(sample_val) # write results to MySQL table best_so_far_table = pandas.DataFrame(best_so_far.reshape((1,-1))) best_so_far_table.to_sql(best_so_far_table_name, sql_util_cs.sql_engine, if_exists='append', index=False) # cost_so_far_table = pandas.DataFrame(cost_so_far.reshape((1,-1)))