def evaluate_log_likelihood_at_hyperparameter_list( log_likelihood_evaluator, hyperparameters_to_evaluate, max_num_threads=DEFAULT_MAX_NUM_THREADS, status=None, ): """Compute the specified log likelihood measure at each input set of hyperparameters. Generally Newton or gradient descent is preferred but when they fail to converge this may be the only "robust" option. This function is also useful for plotting or debugging purposes (just to get a bunch of log likelihood values). :param log_likelihood_evaluator: object specifying which log likelihood measure to evaluate :type log_likelihood_evaluator: interfaces.log_likelihood_interface.LogLikelihoodInterface subclass :param hyperparameters_to_evaluate: the hyperparameters at which to compute the specified log likelihood :type hyperparameters_to_evaluate: array of float64 with shape (num_to_eval, log_likelihood_evaluator.num_hyperparameters) :param max_num_threads: maximum number of threads to use, >= 1 (UNUSED) :type max_num_threads: int :param status: (output) status messages (e.g., reporting on optimizer success, etc.) :type status: dict :return: log likelihood value at each specified set of hyperparameters :rtype: array of float64 with shape (hyperparameters_to_evaluate.shape[0]) """ null_optimizer = NullOptimizer(None, log_likelihood_evaluator) _, values = multistart_optimize(null_optimizer, starting_points=hyperparameters_to_evaluate) # TODO(GH-59): Have null optimizer actually indicate whether updates were found, e.g., in an IOContainer-like structure. found_flag = True if status is not None: status["evaluate_log_likelihood_at_hyperparameter_list"] = found_flag return values
def evaluate_log_likelihood_at_hyperparameter_list( log_likelihood_evaluator, hyperparameters_to_evaluate, max_num_threads=DEFAULT_MAX_NUM_THREADS, status=None, ): """Compute the specified log likelihood measure at each input set of hyperparameters. Generally Newton or gradient descent is preferred but when they fail to converge this may be the only "robust" option. This function is also useful for plotting or debugging purposes (just to get a bunch of log likelihood values). :param log_likelihood_evaluator: object specifying which log likelihood measure to evaluate :type log_likelihood_evaluator: interfaces.log_likelihood_interface.LogLikelihoodInterface subclass :param hyperparameters_to_evaluate: the hyperparameters at which to compute the specified log likelihood :type hyperparameters_to_evaluate: array of float64 with shape (num_to_eval, log_likelihood_evaluator.num_hyperparameters) :param max_num_threads: maximum number of threads to use, >= 1 (UNUSED) :type max_num_threads: int :param status: (output) status messages (e.g., reporting on optimizer success, etc.) :type status: dict :return: log likelihood value at each specified set of hyperparameters :rtype: array of float64 with shape (hyperparameters_to_evaluate.shape[0]) """ null_optimizer = NullOptimizer(None, log_likelihood_evaluator) _, values = multistart_optimize( null_optimizer, starting_points=hyperparameters_to_evaluate) # TODO(GH-59): Have null optimizer actually indicate whether updates were found, e.g., in an IOContainer-like structure. found_flag = True if status is not None: status["evaluate_log_likelihood_at_hyperparameter_list"] = found_flag return values
def get_optimum(self,gp, n_starts = 4): approx_grad = False max_func_evals = 15000 max_metric_correc = 10 pgtol = 1.0e-8 epsilon = 1.0e-8 tolerance = 3.0e-5 lbfgs_parameters = LBFGSBParameters( approx_grad, max_func_evals, max_metric_correc, tolerance, pgtol, epsilon ) optimizable_gp = OptimizableGaussianProcess(gp) expanded_domain = TensorProductDomain([ClosedInterval(-0.5, 0.7), ClosedInterval(-0.5, 0.7)]) gp_optimizer = LBFGSBOptimizer(expanded_domain, optimizable_gp, lbfgs_parameters) #use allways same starting position here, assuming optimum is known x = numpy.linspace(-0.5, 0.6, n_starts) x_tmp = numpy.zeros((n_starts,2)) x_tmp[:,0] = x best_point, random_starts_values, function_argument_list = multistart_optimize(gp_optimizer, starting_points=x_tmp, num_multistarts = n_starts) best_point[1] = 0 return best_point
def multistart_expected_improvement_optimization(self, ei_optimizer, num_multistarts): x_tmp = numpy.zeros((num_multistarts,2)) #x = numpy.linspace(-1.95, 1.95, num_multistarts) x = numpy.random.uniform(-0.5, 0.6, size=(1,num_multistarts)) x_tmp[:,0] = x best_point, random_starts_values, function_argument_list = multistart_optimize(ei_optimizer, starting_points=x_tmp) return best_point, function_argument_list, x_tmp[:,0]
def multistart_expected_improvement_optimization( ei_optimizer, num_multistarts, num_to_sample, randomness=None, max_num_threads=DEFAULT_MAX_NUM_THREADS, status=None, ): """Solve the q,p-EI problem, returning the optimal set of q points to sample CONCURRENTLY in future experiments. When ``points_being_sampled.shape[0] == 0 && num_to_sample == 1``, this function will use (fast) analytic EI computations. .. NOTE:: The following comments are copied from :func:`moe.optimal_learning.python.cpp_wrappers.expected_improvement.multistart_expected_improvement_optimization`. This is the primary entry-point for EI optimization in the optimal_learning library. It offers our best shot at improving robustness by combining higher accuracy methods like gradient descent with fail-safes like random/grid search. Returns the optimal set of q points to sample CONCURRENTLY by solving the q,p-EI problem. That is, we may want to run 4 experiments at the same time and maximize the EI across all 4 experiments at once while knowing of 2 ongoing experiments (4,2-EI). This function handles this use case. Evaluation of q,p-EI (and its gradient) for q > 1 or p > 1 is expensive (requires monte-carlo iteration), so this method is usually very expensive. Compared to ComputeHeuristicPointsToSample() (``gpp_heuristic_expected_improvement_optimization.hpp``), this function makes no external assumptions about the underlying objective function. Instead, it utilizes the Expected (Parallel) Improvement, allowing the GP to account for ongoing/incomplete experiments. If ``num_to_sample = 1``, this is the same as ComputeOptimalPointsToSampleWithRandomStarts(). TODO(GH-56): Allow callers to pass in a source of randomness. :param ei_optimizer: object that optimizes (e.g., gradient descent, newton) EI over a domain :type ei_optimizer: interfaces.optimization_interfaces.OptimizerInterface subclass :param num_multistarts: number of times to multistart ``ei_optimizer`` :type num_multistarts: int > 0 :param num_to_sample: how many simultaneous experiments you would like to run (i.e., the q in q,p-EI) (UNUSED, specify through ei_optimizer) :type num_to_sample: int >= 1 :param randomness: random source(s) used to generate multistart points and perform monte-carlo integration (when applicable) (UNUSED) :type randomness: (UNUSED) :param max_num_threads: maximum number of threads to use, >= 1 (UNUSED) :type max_num_threads: int > 0 :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.) :type status: dict :return: point(s) that maximize the expected improvement (solving the q,p-EI problem) :rtype: array of float64 with shape (num_to_sample, ei_evaluator.dim) """ random_starts = ei_optimizer.domain.generate_uniform_random_points_in_domain(num_points=num_multistarts) best_point, _ = multistart_optimize(ei_optimizer, starting_points=random_starts) # TODO(GH-59): Have GD actually indicate whether updates were found. found_flag = True if status is not None: status["gradient_descent_found_update"] = found_flag return best_point
def test_multistarted_null_optimizer(self): """Test that multistarting null optimizer just evalutes the function and indentifies the max.""" num_points = 15 points = self.domain.generate_uniform_random_points_in_domain(num_points) truth = numpy.empty(num_points) for i, point in enumerate(points): self.null_optimizer.objective_function.current_point = point truth[i] = self.null_optimizer.objective_function.compute_objective_function() best_index = numpy.argmax(truth) truth_best_point = points[best_index, ...] test_best_point, test_values = multistart_optimize(self.null_optimizer, starting_points=points) self.assert_vector_within_relative(test_best_point, truth_best_point, 0.0) self.assert_vector_within_relative(test_values, truth, 0.0)
def evaluate_at_point_list( self, points_to_evaluate, randomness=None, max_num_threads=DEFAULT_MAX_NUM_THREADS, status=None, ): """Evaluate Expected Improvement (q,p-EI) over a specified list of ``points_to_evaluate``. .. Note:: We use ``points_to_evaluate`` instead of ``self._points_to_sample`` and compute the EI at those points only. ``self._points_to_sample`` will be changed. Generally gradient descent is preferred but when it fails to converge this may be the only "robust" option. This function is also useful for plotting or debugging purposes (just to get a bunch of EI values). TODO(GH-56): Allow callers to pass in a source of randomness. :param ei_evaluator: object specifying how to evaluate the expected improvement :type ei_evaluator: interfaces.expected_improvement_interface.ExpectedImprovementInterface subclass :param points_to_evaluate: points at which to compute EI :type points_to_evaluate: array of float64 with shape (num_to_evaluate, num_to_sample, ei_evaluator.dim) :param randomness: random source(s) used for monte-carlo integration (when applicable) (UNUSED) :type randomness: (UNUSED) :param max_num_threads: maximum number of threads to use, >= 1 (UNUSED) :type max_num_threads: int > 0 :param status: (output) status messages from C++ (e.g., reporting on optimizer success, etc.) :type status: dict :return: EI evaluated at each of points_to_evaluate :rtype: array of float64 with shape (points_to_evaluate.shape[0]) """ null_optimizer = NullOptimizer(None, self) _, values = multistart_optimize(null_optimizer, starting_points=points_to_evaluate) # TODO(GH-59): Have multistart actually indicate whether updates were found. found_flag = True if status is not None: status["evaluate_EI_at_point_list"] = found_flag return values
def test_multistarted_null_optimizer(self): """Test that multistarting null optimizer just evalutes the function and indentifies the max.""" num_points = 15 points = self.domain.generate_uniform_random_points_in_domain( num_points) truth = numpy.empty(num_points) for i, point in enumerate(points): self.null_optimizer.objective_function.current_point = point truth[ i] = self.null_optimizer.objective_function.compute_objective_function( ) best_index = numpy.argmax(truth) truth_best_point = points[best_index, ...] test_best_point, test_values = multistart_optimize( self.null_optimizer, starting_points=points) self.assert_vector_within_relative(test_best_point, truth_best_point, 0.0) self.assert_vector_within_relative(test_values, truth, 0.0)
def evaluate_log_likelihood_at_hyperparameter_list( log_likelihood_evaluator, hyperparameters_to_evaluate, max_num_threads=DEFAULT_MAX_NUM_THREADS, ): """Compute the specified log likelihood measure at each input set of hyperparameters. Generally Newton or gradient descent is preferred but when they fail to converge this may be the only "robust" option. This function is also useful for plotting or debugging purposes (just to get a bunch of log likelihood values). :param log_likelihood_evaluator: object specifying which log likelihood measure to evaluate :type log_likelihood_evaluator: interfaces.log_likelihood_interface.LogLikelihoodInterface subclass :param hyperparameters_to_evaluate: the hyperparameters at which to compute the specified log likelihood :type hyperparameters_to_evaluate: array of float64 with shape (num_to_eval, log_likelihood_evaluator.num_hyperparameters) :param max_num_threads: maximum number of threads to use, >= 1 (UNUSED) :type max_num_threads: int :return: log likelihood value at each specified set of hyperparameters :rtype: array of float64 with shape (hyperparameters_to_evaluate.shape[0]) """ null_optimizer = NullOptimizer(None, log_likelihood_evaluator) _, values = multistart_optimize(null_optimizer, starting_points=hyperparameters_to_evaluate) return values
test = np.zeros(eval_pts.shape[0]) ps = PosteriorMeanMCMC(cpp_gp_loglikelihood.models, num_fidelity) for i, pt in enumerate(eval_pts): ps.set_current_point( pt.reshape((1, cpp_gp_loglikelihood.dim - objective_func._num_fidelity)) ) test[i] = -ps.compute_objective_function() report_point = eval_pts[np.argmin(test)].reshape( (1, cpp_gp_loglikelihood.dim - objective_func._num_fidelity) ) py_repeated_search_domain = RepeatedDomain(num_repeats=1, domain=inner_search_domain) ps_mean_opt = pyGradientDescentOptimizer( py_repeated_search_domain, ps, py_sgd_params_ps ) report_point = multistart_optimize(ps_mean_opt, report_point, num_multistarts=1)[0] report_point = report_point.ravel() report_point = np.concatenate((report_point, np.ones(objective_func._num_fidelity))) print( "best so far in the initial data {0}".format( true_value_init[np.argmin(true_value_init[:, 0])][0] ) ) capital_so_far = 0.0 start = time.time() for n in range(num_iteration): print( method + ", {0}th job, {1}th iteration, func={2}, q={3}".format( job_id, n, obj_func_name, num_to_sample
def multistart_hyperparameter_optimization( hyperparameter_optimizer, num_multistarts, randomness=None, max_num_threads=DEFAULT_MAX_NUM_THREADS, status=None, ): r"""Select the hyperparameters that maximize the specified log likelihood measure of model fit (over the historical data) within the specified domain. .. Note:: The following comments are copied from :func:`moe.optimal_learning.python.cpp_wrappers.log_likelihood.multistart_hyperparameter_optimization`. See :class:`moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLogMarginalLikelihood` and :class:`moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLeaveOneOutLogLikelihood` for an overview of some example log likelihood-like measures. Optimizers are: null ('dumb' search), gradient descent, newton Newton is the suggested optimizer, which is not presently available in Python (use the C++ interface). In Python, gradient descent is suggested. TODO(GH-57): Implement hessians and Newton's method. 'dumb' search means this will just evaluate the objective log likelihood measure at num_multistarts 'points' (hyperparameters) in the domain, uniformly sampled using latin hypercube sampling. See gpp_python_common.cpp for C++ enum declarations laying out the options for objective and optimizer types. Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for sizing the domain and gd_parameters.num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely. Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a true optima (i.e., the gradient may be substantially nonzero). .. WARNING:: this function fails if NO improvement can be found! In that case, the output will always be the first randomly chosen point. status will report failure. TODO(GH-56): Allow callers to pass in a source of randomness. :param hyperparameter_optimizer: object that optimizes (e.g., gradient descent, newton) the desired log_likelihood measure over a domain (wrt the hyperparameters of covariance) :type hyperparameter_optimizer: interfaces.optimization_interfaces.OptimizerInterface subclass :param num_multistarts: number of times to multistart ``hyperparameter_optimizer`` :type num_multistarts: int > 0 :param randomness: random source used to generate multistart points (UNUSED) :type randomness: (UNUSED) :param max_num_threads: maximum number of threads to use, >= 1 (UNUSED) :type max_num_threads: int > 0 :param status: (output) status messages (e.g., reporting on optimizer success, etc.) :type status: dict :return: hyperparameters that maximize the specified log likelihood measure within the specified domain :rtype: array of float64 with shape (log_likelihood_evaluator.num_hyperparameters) """ # Producing the random starts in log10 space improves robustness by clustering some extra points near 0 domain_bounds_log10 = numpy.log10( hyperparameter_optimizer.domain._domain_bounds) domain_log10 = TensorProductDomain( ClosedInterval.build_closed_intervals_from_list(domain_bounds_log10)) random_starts = domain_log10.generate_uniform_random_points_in_domain( num_points=num_multistarts) random_starts = numpy.power(10.0, random_starts) best_hyperparameters, _ = multistart_optimize( hyperparameter_optimizer, starting_points=random_starts) # TODO(GH-59): Have GD actually indicate whether updates were found, e.g., in an IOContainer-like structure. found_flag = True if status is not None: status["gradient_descent_found_update"] = found_flag return best_hyperparameters
initial_points = np.zeros( (20, cpp_gp_loglikelihood.dim - objective_func._num_fidelity)) indices = np.argsort(test) for i in range(20): initial_points[i, :] = eval_pts[indices[i]] #initial_point = eval_pts[np.argmin(test)].reshape((1, cpp_gp_loglikelihood.dim-objective_func._num_fidelity)) py_repeated_search_domain = RepeatedDomain(num_repeats=1, domain=inner_search_domain) pvar_mean_opt = pyGradientDescentOptimizer(py_repeated_search_domain, pvar, py_sgd_params_acquisition) report_point = multistart_optimize(pvar_mean_opt, initial_points, num_multistarts=20)[0] pvar.set_current_point( report_point.reshape( (1, cpp_gp_loglikelihood.dim - objective_func._num_fidelity))) if -pvar.compute_objective_function() > np.min(test): report_point = initial_points[[0]] next_points = report_point voi = np.nan elif method == 'PI': eval_pts = inner_search_domain.generate_uniform_random_points_in_domain( int(1e3)) eval_pts = np.reshape( np.append(eval_pts, (cpp_gp_loglikelihood.get_historical_data_copy()
def multistart_hyperparameter_optimization( hyperparameter_optimizer, num_multistarts, randomness=None, max_num_threads=DEFAULT_MAX_NUM_THREADS, status=None, ): r"""Select the hyperparameters that maximize the specified log likelihood measure of model fit (over the historical data) within the specified domain. .. Note:: The following comments are copied from :func:`moe.optimal_learning.python.cpp_wrappers.log_likelihood.multistart_hyperparameter_optimization`. See :class:`moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLogMarginalLikelihood` and :class:`moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLeaveOneOutLogLikelihood` for an overview of some example log likelihood-like measures. Optimizers are: null ('dumb' search), gradient descent, newton Newton is the suggested optimizer, which is not presently available in Python (use the C++ interface). In Python, gradient descent is suggested. TODO(GH-57): Implement hessians and Newton's method. 'dumb' search means this will just evaluate the objective log likelihood measure at num_multistarts 'points' (hyperparameters) in the domain, uniformly sampled using latin hypercube sampling. See gpp_python_common.cpp for C++ enum declarations laying out the options for objective and optimizer types. Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for sizing the domain and gd_parameters.num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely. Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a true optima (i.e., the gradient may be substantially nonzero). .. WARNING:: this function fails if NO improvement can be found! In that case, the output will always be the first randomly chosen point. status will report failure. TODO(GH-56): Allow callers to pass in a source of randomness. :param hyperparameter_optimizer: object that optimizes (e.g., gradient descent, newton) the desired log_likelihood measure over a domain (wrt the hyperparameters of covariance) :type hyperparameter_optimizer: interfaces.optimization_interfaces.OptimizerInterface subclass :param num_multistarts: number of times to multistart ``hyperparameter_optimizer`` :type num_multistarts: int > 0 :param randomness: random source used to generate multistart points (UNUSED) :type randomness: (UNUSED) :param max_num_threads: maximum number of threads to use, >= 1 (UNUSED) :type max_num_threads: int > 0 :param status: status messages (e.g., reporting on optimizer success, etc.) :type status: dict :return: hyperparameters that maximize the specified log likelihood measure within the specified domain :rtype: array of float64 with shape (log_likelihood_evaluator.num_hyperparameters) """ # Producing the random starts in log10 space improves robustness by clustering some extra points near 0 domain_bounds_log10 = numpy.log10(hyperparameter_optimizer.domain._domain_bounds) domain_log10 = TensorProductDomain(ClosedInterval.build_closed_intervals_from_list(domain_bounds_log10)) random_starts = domain_log10.generate_uniform_random_points_in_domain(num_points=num_multistarts) random_starts = numpy.power(10.0, random_starts) best_hyperparameters, _ = multistart_optimize(hyperparameter_optimizer, starting_points=random_starts) # TODO(GH-59): Have GD actually indicate whether updates were found, e.g., in an IOContainer-like structure. found_flag = True if status is not None: status["gradient_descent_found_update"] = found_flag return best_hyperparameters
np.append(eval_pts[i], (cpp_gp_loglikelihood[i].get_historical_data_copy()).points_sampled[:, :(cpp_gp_loglikelihood[i].dim - objective_func._num_fidelity)]), (eval_pts[i].shape[0] + cpp_gp_loglikelihood[i]._num_sampled, cpp_gp_loglikelihood[i].dim - objective_func._num_fidelity)) test[i] = np.zeros(eval_pts[i].shape[0]) ps[i] = PosteriorMeanMCMC(cpp_gp_loglikelihood[i].models, num_fidelity[i]) for j, pt in enumerate(eval_pts[i]): ps[i].set_current_point(pt.reshape((1, cpp_gp_loglikelihood[i].dim - objective_func._num_fidelity))) test[i] = -ps[i].compute_objective_function() report_point[i] = eval_pts[i][np.argmin(test[i])].reshape( (1, cpp_gp_loglikelihood[i].dim - objective_func._num_fidelity)) py_repeated_search_domain[i] = RepeatedDomain(num_repeats=1, domain=inner_search_domain[i]) ps_mean_opt[i] = pyGradientDescentOptimizer(py_repeated_search_domain[i], ps[i], py_sgd_params_ps) report_point[i] = multistart_optimize(ps_mean_opt[i], report_point[i], num_multistarts=1)[0] report_point[i] = report_point[i].ravel() report_point[i] = np.concatenate((report_point[i], np.ones(objective_func._num_fidelity))) current_best = [0, 0, 0, 0] best_point = report_point for i in range(4): current_best[i] = true_value_init[i][np.argmin(true_value_init[i][:, 0])][0] print("obj ", i, " best so far in the initial data {0}".format(current_best[i])) # print("obj ", i, "report point value", objective_func_list[i].evaluate_true(report_point[i])[0]) capital_so_far = 0 next_points = [0, 0, 0, 0] voi = [0, 0, 0, 0] for i in range(4): objective_func = objective_func_list[i]