class LogLikelihoodTest(GaussianProcessTestCase): """Test that the C++ and Python implementations of the Log Marginal Likelihood match (value and gradient).""" precompute_gaussian_process_data = False noise_variance_base = 0.0002 dim = 3 num_hyperparameters = dim + 1 gp_test_environment_input = GaussianProcessTestEnvironmentInput( dim, num_hyperparameters, 0, noise_variance_base=noise_variance_base, hyperparameter_interval=ClosedInterval(0.2, 1.5), lower_bound_interval=ClosedInterval(-2.0, 0.5), upper_bound_interval=ClosedInterval(2.0, 3.5), covariance_class=moe.optimal_learning.python.python_version.covariance. SquareExponential, spatial_domain_class=moe.optimal_learning.python.python_version.domain. TensorProductDomain, hyperparameter_domain_class=moe.optimal_learning.python.python_version. domain.TensorProductDomain, ) num_sampled_list = (1, 2, 5, 10, 16, 20, 42) def test_python_and_cpp_return_same_log_likelihood_and_gradient(self): """Check that the C++ and Python log likelihood + gradients match over a series of randomly built data sets.""" tolerance_log_like = 5.0e-11 tolerance_grad_log_like = 4.0e-12 for num_sampled in self.num_sampled_list: self.gp_test_environment_input.num_sampled = num_sampled _, python_gp = self._build_gaussian_process_test_data( self.gp_test_environment_input) python_cov, historical_data = python_gp.get_core_data_copy() python_lml = moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLogMarginalLikelihood( python_cov, historical_data) cpp_cov = moe.optimal_learning.python.cpp_wrappers.covariance.SquareExponential( python_cov.hyperparameters) cpp_lml = moe.optimal_learning.python.cpp_wrappers.log_likelihood.GaussianProcessLogMarginalLikelihood( cpp_cov, historical_data) python_log_like = python_lml.compute_log_likelihood() cpp_log_like = cpp_lml.compute_log_likelihood() self.assert_scalar_within_relative(python_log_like, cpp_log_like, tolerance_log_like) python_grad_log_like = python_lml.compute_grad_log_likelihood() cpp_grad_log_like = cpp_lml.compute_grad_log_likelihood() self.assert_vector_within_relative(python_grad_log_like, cpp_grad_log_like, tolerance_grad_log_like)
def base_setup(cls): """Set up a test case for optimizing a simple quadratic polynomial.""" cls.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * cls.dim cls.domain = TensorProductDomain(domain_bounds) large_domain_bounds = [ClosedInterval(-1.0, 1.0)] * cls.dim cls.large_domain = TensorProductDomain(large_domain_bounds) maxima_point = numpy.full(cls.dim, 0.5) current_point = numpy.zeros(cls.dim) cls.polynomial = QuadraticFunction(maxima_point, current_point) max_num_steps = 250 max_num_restarts = 10 num_steps_averaged = 0 gamma = 0.7 # smaller gamma would lead to faster convergence, but we don't want to make the problem too easy pre_mult = 1.0 max_relative_change = 0.8 tolerance = 1.0e-12 cls.gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) approx_grad = False max_func_evals = 150000 max_metric_correc = 10 factr = 1000.0 pgtol = 1e-10 epsilon = 1e-8 cls.BFGS_parameters = LBFGSBParameters( approx_grad, max_func_evals, max_metric_correc, factr, pgtol, epsilon, ) maxfun = 1000 rhobeg = 1.0 rhoend = numpy.finfo(numpy.float64).eps catol = 2.0e-13 cls.COBYLA_parameters = COBYLAParameters( rhobeg, rhoend, maxfun, catol, )
def __init__( self, dim, num_hyperparameters, num_sampled, noise_variance_base=0.0, hyperparameter_interval=ClosedInterval(0.2, 1.3), lower_bound_interval=ClosedInterval(-2.0, 0.5), upper_bound_interval=ClosedInterval(2.0, 3.5), covariance_class=SquareExponential, spatial_domain_class=TensorProductDomain, hyperparameter_domain_class=TensorProductDomain, gaussian_process_class=GaussianProcess, ): """Create a test environment: object with enough info to construct a Gaussian Process prior from repeated random draws. :param dim: number of (expected) spatial dimensions; None to skip check :type dim: int > 0 :param num_hyperparameters: number of hyperparemeters of the covariance function :type num_hyperparameters: int > 0 :param num_sampled: number of ``points_sampled`` to generate from the GP prior :type num_sampled: int > 0 :param noise_variance_base: noise variance to associate with each sampled point :type noise_variance_base: float64 >= 0.0 :param hyperparameter_interval: interval from which to draw hyperparameters (uniform random) :type hyperparameter_interval: non-empty ClosedInterval :param lower_bound_interval: interval from which to draw domain lower bounds (uniform random) :type lower_bound_interval: non-empty ClosedInterval; cannot overlap with upper_bound_interval :param upper_bound_interval: interval from which to draw domain upper bounds (uniform random) :type upper_bound_interval: non-empty ClosedInterval; cannot overlap with lower_bound_interval :param covariance_class: the type of covariance to use when building the GP :type covariance_class: type object of covariance_interface.CovarianceInterface (or one of its subclasses) :param spatial_domain_class: the type of the domain that the GP lives in :type spatial_domain_class: type object of domain_interface.DomainInterface (or one of its subclasses) :param hyperparameter_domain_class: the type of the domain that the hyperparameters live in :type hyperparameter_domain_class: type object of domain_interface.DomainInterface (or one of its subclasses) :param gaussian_process_class: the type of the Gaussian Process to draw from :type gaussian_process_class: type object of gaussian_process_interface.GaussianProcessInterface (or one of its subclasses) """ self.dim = dim self.num_hyperparameters = num_hyperparameters self.noise_variance_base = noise_variance_base self.num_sampled = num_sampled self.hyperparameter_interval = hyperparameter_interval self.lower_bound_interval = lower_bound_interval self.upper_bound_interval = upper_bound_interval self.covariance_class = covariance_class self.spatial_domain_class = spatial_domain_class self.hyperparameter_domain_class = hyperparameter_domain_class self.gaussian_process_class = gaussian_process_class
def test_hyperparameter_gradient_pings(self): """Ping test (compare analytic result to finite difference) the gradient wrt hyperparameters.""" h = 2.0e-3 tolerance = 4.0e-5 num_tests = 10 dim = 3 num_hyperparameters = dim + 1 hyperparameter_interval = ClosedInterval(3.0, 5.0) domain = TensorProductDomain( ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]])) points1 = domain.generate_uniform_random_points_in_domain(num_tests) points2 = domain.generate_uniform_random_points_in_domain(num_tests) for i in xrange(num_tests): point_one = points1[i, ...] point_two = points2[i, ...] covariance = gp_utils.fill_random_covariance_hyperparameters( hyperparameter_interval, num_hyperparameters, covariance_type=self.CovarianceClass, ) analytic_grad = covariance.hyperparameter_grad_covariance( point_one, point_two) for k in xrange(covariance.num_hyperparameters): hyperparameters_old = covariance.hyperparameters # hyperparamter + h hyperparameters_p = numpy.copy(hyperparameters_old) hyperparameters_p[k] += h covariance.hyperparameters = hyperparameters_p cov_p = covariance.covariance(point_one, point_two) covariance.hyperparameters = hyperparameters_old # hyperparamter - h hyperparameters_m = numpy.copy(hyperparameters_old) hyperparameters_m[k] -= h covariance.hyperparameters = hyperparameters_m cov_m = covariance.covariance(point_one, point_two) covariance.hyperparameters = hyperparameters_old # calculate finite diff fd_grad = (cov_p - cov_m) / (2.0 * h) self.assert_scalar_within_relative(fd_grad, analytic_grad[k], tolerance)
def base_setup(cls): """Set up test cases (described inline).""" cls.test_cases = [ ClosedInterval(9.378, 9.378), # min == max ClosedInterval(-2.71, 3.14), # min < max ClosedInterval(-2.71, -3.14), # min > max ClosedInterval(0.0, numpy.inf), # infinte range ] cls.points_to_check = numpy.empty((len(cls.test_cases), 5)) for i, case in enumerate(cls.test_cases): cls.points_to_check[i, 0] = (case.min + case.max) * 0.5 # midpoint cls.points_to_check[i, 1] = case.min # left boundary cls.points_to_check[i, 2] = case.max # right boundary cls.points_to_check[i, 3] = case.min - 0.5 # outside on the left cls.points_to_check[i, 4] = case.max + 0.5 # outside on the right
def _build_gaussian_process_test_data(self, test_environment): """Build up a Gaussian Process randomly by repeatedly drawing from and then adding to the prior. :param test_environment: parameters describing how to construct a GP prior :type test_environment: GaussianProcessTestEnvironmentInput :return: gaussian process environments that can be used to run tests :rtype: GaussianProcessTestEnvironment """ covariance = gp_utils.fill_random_covariance_hyperparameters( test_environment.hyperparameter_interval, test_environment.num_hyperparameters, covariance_type=test_environment.covariance_class, ) domain_bounds = gp_utils.fill_random_domain_bounds( test_environment.lower_bound_interval, test_environment.upper_bound_interval, test_environment.dim, ) domain = test_environment.spatial_domain_class( ClosedInterval.build_closed_intervals_from_list(domain_bounds)) points_sampled = domain.generate_uniform_random_points_in_domain( test_environment.num_sampled) gaussian_process = gp_utils.build_random_gaussian_process( points_sampled, covariance, noise_variance=test_environment.noise_variance, gaussian_process_type=test_environment.gaussian_process_class, ) return GaussianProcessTestEnvironment(domain, gaussian_process)
def test_grid_generation(self): """Test that ``generate_grid_points`` generates a uniform grid. Test makes assumptions about the ordering of the output that may be invalidated by changes to numpy.meshgrid. """ domain_bounds = ClosedInterval.build_closed_intervals_from_list([[0.0, 1.0], [-2.0, 3.0], [2.71, 3.14]]) points_per_dimension = [7, 11, 8] # Test that all points are present grid = generate_grid_points(points_per_dimension, domain_bounds) per_axis_grid = [numpy.linspace(bounds.min, bounds.max, points_per_dimension[i]) for i, bounds in enumerate(domain_bounds)] # Loop ordering assumes the output is ordered a certain way. for i, y_coord in enumerate(per_axis_grid[1]): for j, x_coord in enumerate(per_axis_grid[0]): for k, z_coord in enumerate(per_axis_grid[2]): truth = numpy.array([x_coord, y_coord, z_coord]) index = i * per_axis_grid[2].size * per_axis_grid[0].size + j * per_axis_grid[2].size + k test = grid[index, ...] self.assert_vector_within_relative(test, truth, 0.0) # Also test that scalar points_per_dimension works points_per_dimension = [5, 5, 5] grid_truth = generate_grid_points(points_per_dimension, domain_bounds) points_per_dimension = 5 grid_test = generate_grid_points(points_per_dimension, domain_bounds) self.assert_vector_within_relative(grid_test, grid_truth, 0.0)
def _make_domain_from_params(params, domain_info_key="domain_info", python_version=False): """Create and return a C++ ingestable domain from the request params. ``params`` has the following form:: params = { 'domain_info': <instance of :class:`moe.views.schemas.base_schemas.BoundedDomainInfo`> ... } """ domain_info = params.get(domain_info_key) domain_bounds_iterable = [ ClosedInterval(bound['min'], bound['max']) for bound in domain_info.get('domain_bounds', []) ] if python_version: domain_class = DOMAIN_TYPES_TO_DOMAIN_LINKS[domain_info.get( 'domain_type')].python_domain_class else: domain_class = DOMAIN_TYPES_TO_DOMAIN_LINKS[domain_info.get( 'domain_type')].cpp_domain_class return domain_class(domain_bounds_iterable)
def _build_gaussian_process_test_data(self, test_environment): """Build up a Gaussian Process randomly by repeatedly drawing from and then adding to the prior. :param test_environment: parameters describing how to construct a GP prior :type test_environment: GaussianProcessTestEnvironmentInput :return: gaussian process environments that can be used to run tests :rtype: GaussianProcessTestEnvironment """ covariance = gp_utils.fill_random_covariance_hyperparameters( test_environment.hyperparameter_interval, test_environment.num_hyperparameters, covariance_type=test_environment.covariance_class, ) domain_bounds = gp_utils.fill_random_domain_bounds( test_environment.lower_bound_interval, test_environment.upper_bound_interval, test_environment.dim, ) domain = test_environment.spatial_domain_class(ClosedInterval.build_closed_intervals_from_list(domain_bounds)) points_sampled = domain.generate_uniform_random_points_in_domain(test_environment.num_sampled) gaussian_process = gp_utils.build_random_gaussian_process( points_sampled, covariance, noise_variance=test_environment.noise_variance, gaussian_process_type=test_environment.gaussian_process_class, ) return GaussianProcessTestEnvironment(domain, gaussian_process)
def test_grid_generation(self): """Test that ``generate_grid_points`` generates a uniform grid. Test makes assumptions about the ordering of the output that may be invalidated by changes to numpy.meshgrid. """ domain_bounds = ClosedInterval.build_closed_intervals_from_list([[0.0, 1.0], [-2.0, 3.0], [2.71, 3.14]]) points_per_dimension = [7, 11, 8] # Test that all points are present grid = generate_grid_points(points_per_dimension, domain_bounds) per_axis_grid = [numpy.linspace(bounds.min, bounds.max, points_per_dimension[i]) for i, bounds in enumerate(domain_bounds)] # Loop ordering assumes the output is ordered a certain way. for i, y_coord in enumerate(per_axis_grid[1]): for j, x_coord in enumerate(per_axis_grid[0]): for k, z_coord in enumerate(per_axis_grid[2]): truth = numpy.array([x_coord, y_coord, z_coord]) index = i * per_axis_grid[2].size * per_axis_grid[0].size + j * per_axis_grid[2].size + k test = grid[index, ...] self.assert_vector_within_relative(test, truth, 0.0) # Also test that scalar points_per_dimension works points_per_dimension = [5, 5, 5] grid_truth = generate_grid_points(points_per_dimension, domain_bounds) points_per_dimension = 5 grid_test = generate_grid_points(points_per_dimension, domain_bounds) self.assert_vector_within_relative(grid_test, grid_truth, 0.0)
def test_multistarted_bfgs_optimizer(self): """Check that multistarted GD can find the optimum in a 'very' large domain.""" # Set a large domain: a single GD run is unlikely to reach the optimum domain_bounds = [ClosedInterval(-10.0, 10.0)] * self.dim domain = TensorProductDomain(domain_bounds) tolerance = 2.0e-10 num_points = 10 bfgs_optimizer = LBFGSBOptimizer(domain, self.polynomial, self.BFGS_parameters) multistart_optimizer = MultistartOptimizer(bfgs_optimizer, num_points) output, _ = multistart_optimizer.optimize() # Verify coordinates self.assert_vector_within_relative(output, self.polynomial.optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
def fill_random_domain_bounds(lower_bound_interval, upper_bound_interval, dim): r"""Generate a random list of dim ``[min_i, max_i]`` pairs. The data is organized such that: ``min_i \in [lower_bound_interval.min, lower_bound_interval.max]`` ``max_i \in [upper_bound_interval.min, upper_bound_interval.max]`` This is mainly useful for testing or when "random" data is needed so that we get more varied cases than the unit hypercube. :param lower_bound_interval: an uniform range, ``[min, max]``, from which to draw the domain lower bounds, ``min_i`` :type lower_bound_interval: ClosedInterval :param upper_bound_interval: an uniform range, ``[min, max]``, from which to draw the domain upper bounds, ``max_i`` :type upper_bound_interval: ClosedInterval :param dim: the spatial dimension of a point (i.e., number of independent params in experiment) :type dim: int > 0 :return: ClosedInterval objects with their min, max members initialized as described :rtype: list of ClosedInterval """ domain_bounds = numpy.empty((dim, 2)) domain_bounds[..., 0] = numpy.random.uniform(lower_bound_interval.min, lower_bound_interval.max) domain_bounds[..., 1] = numpy.random.uniform(upper_bound_interval.min, upper_bound_interval.max) return ClosedInterval.build_closed_intervals_from_list(domain_bounds)
def test_multistart_qei_expected_improvement_dfo(self): """Check that multistart optimization (BFGS) can find the optimum point to sample (using 2-EI).""" numpy.random.seed(7860) index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20)) domain, gaussian_process = self.gp_test_environments[index] tolerance = 6.0e-5 num_multistarts = 3 # Expand the domain so that we are definitely not doing constrained optimization expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 3.0)] * self.dim) num_to_sample = 2 repeated_domain = RepeatedDomain(num_to_sample, expanded_domain) num_mc_iterations = 100000 # Just any random point that won't be optimal points_to_sample = repeated_domain.generate_random_point_in_domain() ei_eval = ExpectedImprovement(gaussian_process, points_to_sample, num_mc_iterations=num_mc_iterations) # Compute EI and its gradient for the sake of comparison ei_initial = ei_eval.compute_expected_improvement() ei_optimizer = LBFGSBOptimizer(repeated_domain, ei_eval, self.BFGS_parameters) best_point = multistart_expected_improvement_optimization( ei_optimizer, num_multistarts, num_to_sample) # Check that gradients are "small" or on border. MC is very inaccurate near 0, so use finite difference # gradient instead. ei_eval.current_point = best_point ei_final = ei_eval.compute_expected_improvement() finite_diff_grad = numpy.zeros(best_point.shape) h_value = 0.00001 for i in range(best_point.shape[0]): for j in range(best_point.shape[1]): best_point[i, j] += h_value ei_eval.current_point = best_point ei_upper = ei_eval.compute_expected_improvement() best_point[i, j] -= 2 * h_value ei_eval.current_point = best_point ei_lower = ei_eval.compute_expected_improvement() best_point[i, j] += h_value finite_diff_grad[i, j] = (ei_upper - ei_lower) / (2 * h_value) self.assert_vector_within_relative(finite_diff_grad, numpy.zeros(finite_diff_grad.shape), tolerance) # Check that output is in the domain assert repeated_domain.check_point_inside(best_point) is True # Since we didn't really converge to the optimal EI (too costly), do some other sanity checks # EI should have improved assert ei_final >= ei_initial
def test_multistart_monte_carlo_expected_improvement_optimization(self): """Check that multistart optimization (gradient descent) can find the optimum point to sample (using 2-EI).""" numpy.random.seed(7858) # TODO(271): Monte Carlo only works for this seed index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20)) domain, gaussian_process = self.gp_test_environments[index] max_num_steps = 75 # this is *too few* steps; we configure it this way so the test will run quickly max_num_restarts = 5 num_steps_averaged = 50 gamma = 0.2 pre_mult = 1.5 max_relative_change = 1.0 tolerance = 3.0e-2 # really large tolerance b/c converging with monte-carlo (esp in Python) is expensive gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) num_multistarts = 2 # Expand the domain so that we are definitely not doing constrained optimization expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 2.0)] * self.dim) num_to_sample = 2 repeated_domain = RepeatedDomain(num_to_sample, expanded_domain) num_mc_iterations = 10000 # Just any random point that won't be optimal points_to_sample = repeated_domain.generate_random_point_in_domain() ei_eval = ExpectedImprovement(gaussian_process, points_to_sample, num_mc_iterations=num_mc_iterations) # Compute EI and its gradient for the sake of comparison ei_initial = ei_eval.compute_expected_improvement(force_monte_carlo=True) # TODO(271) Monte Carlo only works for this seed grad_ei_initial = ei_eval.compute_grad_expected_improvement() ei_optimizer = GradientDescentOptimizer(repeated_domain, ei_eval, gd_parameters) best_point = multistart_expected_improvement_optimization(ei_optimizer, num_multistarts, num_to_sample) # Check that gradients are "small" ei_eval.current_point = best_point ei_final = ei_eval.compute_expected_improvement(force_monte_carlo=True) # TODO(271) Monte Carlo only works for this seed grad_ei_final = ei_eval.compute_grad_expected_improvement() self.assert_vector_within_relative(grad_ei_final, numpy.zeros(grad_ei_final.shape), tolerance) # Check that output is in the domain assert repeated_domain.check_point_inside(best_point) is True # Since we didn't really converge to the optimal EI (too costly), do some other sanity checks # EI should have improved assert ei_final >= ei_initial # grad EI should have improved for index in numpy.ndindex(grad_ei_final.shape): assert numpy.fabs(grad_ei_final[index]) <= numpy.fabs(grad_ei_initial[index])
def base_setup(cls): """Set up a test case for optimizing a simple quadratic polynomial.""" cls.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * cls.dim cls.domain = TensorProductDomain(domain_bounds) maxima_point = numpy.full(cls.dim, 0.5) current_point = numpy.zeros(cls.dim) cls.polynomial = QuadraticFunction(maxima_point, current_point) cls.null_optimizer = NullOptimizer(cls.domain, cls.polynomial)
def gen_data_to_s3(bucket, obj_func_min, num_pts, which_IS, key): search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_min._search_domain ]) points = search_domain.generate_uniform_random_points_in_domain(num_pts) vals = [obj_func_min.evaluate(which_IS, pt) for pt in points] noise = obj_func_min.noise_and_cost_func(which_IS, None) * np.ones(num_pts) data = {"points": points, "vals": vals, "noise": noise} send_data_to_s3(bucket, key, data)
def coldstart_gen_data(obj_func_min, num_init_pts, num_replications, directory): """ generate initial data for experiments and store in pickle """ for replication_no in range(num_replications): filename = "{0}/{1}_{2}_points_each_repl_{3}.pickle".format(directory, obj_func_min.getFuncName(), num_init_pts, replication_no) search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in obj_func_min._search_domain]) # this file is used below again and hence should be made available there, too points = search_domain.generate_uniform_random_points_in_domain(num_init_pts) vals = [obj_func_min.evaluate(0, pt) for pt in points] data = {"points": points, "vals": vals, "noise": obj_func_min.noise_and_cost_func(0, None)[0] * numpy.ones(num_init_pts)} with open(filename, "wb") as file: pickle.dump(data, file)
def gen_data_to_pickle(directory, obj_func_min, num_pts, which_IS, filename): search_domain = pythonTensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in obj_func_min._search_domain ]) points = search_domain.generate_uniform_random_points_in_domain(num_pts) vals = [obj_func_min.evaluate(which_IS, pt) for pt in points] noise = obj_func_min.noise_and_cost_func(which_IS, None) * np.ones(num_pts) data = {"points": points, "vals": vals, "noise": noise} with open(filename, "wb") as file: pickle.dump(data, file)
def find_best_mu_ei(gp, domain_bounds, num_multistart): search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds]) start_points = search_domain.generate_uniform_random_points_in_domain( num_multistart) min_mu = numpy.inf for start_point in start_points: x, f = bfgs_optimization(start_point, compute_mu(gp), domain_bounds) if min_mu > f: min_mu = f point = x return min_mu, point
def optimize_hyperparameters(problem_search_domain, points_sampled, points_sampled_value, points_sampled_noise_variance, upper_bound_noise_variances=10., consider_small_variances=True, hyper_prior=None, num_restarts=32, num_jobs=16): ''' Fit hyperparameters from data using MLE or MAP (described in Poloczek, Wang, and Frazier 2016) :param problem_search_domain: The search domain of the benchmark, as provided by the benchmark :param points_sampled: An array that gives the points sampled so far. Each points has the form [IS dim0 dim1 ... dimn] :param points_sampled_value: An array that gives the values observed at the points in same ordering :param upper_bound_noise_variances: An upper bound on the search interval for the noise variance parameters (before squaring) :param consider_small_variances: If true, half of the BFGS starting points have entries for the noise parameters set to a small value :param hyper_prior: use prior for MAP estimate if supplied, and do MLE otherwise :param num_restarts: number of starting points for BFGS to find MLE/MAP :param num_jobs: number of parallelized BFGS instances :return: An array with the best found values for the hyperparameters ''' approx_grad = True upper_bound_signal_variances = numpy.maximum( 10., numpy.var(points_sampled_value)) # pick huge upper bounds hyper_bounds = generate_hyperbounds(problem_search_domain, upper_bound_noise_variances, upper_bound_signal_variances) hyperparam_search_domain = pythonTensorProductDomain( [ClosedInterval(bd[0], bd[1]) for bd in hyper_bounds]) hyper_multistart_pts = hyperparam_search_domain.generate_uniform_random_points_in_domain( num_restarts) for i in xrange(num_restarts): init_hyper = hyper_multistart_pts[i] # if optimization is enabled, make sure that small variances are checked despite multi-modality # this optimization seems softer than using a MAP estimate if consider_small_variances and (i % 2 == 0): init_hyper[ -1] = 0.1 # use a small value as starting point for noise parameters in BFGS hyper_multistart_pts[i] = init_hyper parallel_results = Parallel(n_jobs=num_jobs)( delayed(hyper_opt)(points_sampled, points_sampled_value, points_sampled_noise_variance, init_hyper, hyper_bounds, approx_grad, hyper_prior) for init_hyper in hyper_multistart_pts) # print min(parallel_results,key=itemgetter(1)) best_hyper = min( parallel_results, key=itemgetter(1) )[0] # recall that we negated the log marginal likelihood when passing it to BFGS return best_hyper
def test_gradient_descent_optimizer_constrained(self): """Check that gradient descent can find the global optimum (in a domain) when the true optimum is outside.""" # Domain where the optimum, (0.5, 0.5, 0.5), lies outside the domain domain_bounds = [ ClosedInterval(0.05, 0.32), ClosedInterval(0.05, 0.6), ClosedInterval(0.05, 0.32) ] domain = TensorProductDomain(domain_bounds) gradient_descent_optimizer = GradientDescentOptimizer( domain, self.polynomial, self.gd_parameters) # Work out what the maxima point woudl be given the domain constraints (i.e., project to the nearest point on domain) constrained_optimum_point = self.polynomial.optimum_point for i, bounds in enumerate(domain_bounds): if constrained_optimum_point[i] > bounds.max: constrained_optimum_point[i] = bounds.max elif constrained_optimum_point[i] < bounds.min: constrained_optimum_point[i] = bounds.min tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess initial_value = gradient_descent_optimizer.objective_function.compute_objective_function( ) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, constrained_optimum_point, tolerance) # Verify optimized value is better than initial guess final_value = self.polynomial.compute_objective_function() assert final_value >= initial_value # Verify derivative: only get 0 derivative if the coordinate lies inside domain boundaries gradient = self.polynomial.compute_grad_objective_function() for i, bounds in enumerate(domain_bounds): if bounds.is_inside(self.polynomial.optimum_point[i]): self.assert_scalar_within_relative(gradient[i], 0.0, tolerance)
def check_ave_min(func_idx): num_repl = 500 func = func_list[func_idx] search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in func._search_domain]) min_vals = np.zeros((num_repl, len(num_pts_list))) for i, num_pts in enumerate(num_pts_list): for repl in range(num_repl): points = search_domain.generate_uniform_random_points_in_domain( num_pts) min_vals[repl, i] = np.amin([func.evaluate(0, pt) for pt in points]) return np.mean(min_vals, axis=0).tolist()
def coldstart_gen_hyperdata(primary_obj_func_min, list_other_obj_func_min, num_pts, directory): """ generate data for hyperparameter optimization and store in pickle """ filename = "{0}/hyper_{1}_points_{2}_{3}.pickle".format(directory, num_pts, primary_obj_func_min.getFuncName(), "_".join([func.getFuncName() for func in list_other_obj_func_min])) search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in primary_obj_func_min._search_domain]) # this file is used below again and hence should be made available there, too points = search_domain.generate_uniform_random_points_in_domain(num_pts) vals = [[primary_obj_func_min.evaluate(0, pt) for pt in points]] noise = [primary_obj_func_min.noise_and_cost_func(0, None)] for obj_func in list_other_obj_func_min: vals.append([obj_func.evaluate(0, pt) for pt in points]) noise.append(obj_func.noise_and_cost_func(0, None)) data = {"points": points, "vals": vals, "noise": noise} with open(filename, "wb") as file: pickle.dump(data, file)
def test_multistart_hyperparameter_optimization(self): """Check that multistart optimization (gradient descent) can find the optimum hyperparameters.""" random_state = numpy.random.get_state() numpy.random.seed(87612) max_num_steps = 200 # this is generally *too few* steps; we configure it this way so the test will run quickly max_num_restarts = 5 num_steps_averaged = 0 gamma = 0.2 pre_mult = 1.0 max_relative_change = 0.3 tolerance = 1.0e-11 gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) num_multistarts = 3 # again, too few multistarts; but we want the test to run reasonably quickly num_sampled = 10 self.gp_test_environment_input.num_sampled = num_sampled _, gaussian_process = self._build_gaussian_process_test_data( self.gp_test_environment_input) python_cov, historical_data = gaussian_process.get_core_data_copy() lml = GaussianProcessLogMarginalLikelihood(python_cov, historical_data) domain = TensorProductDomain( [ClosedInterval(1.0, 4.0)] * self.gp_test_environment_input.num_hyperparameters) hyperparameter_optimizer = GradientDescentOptimizer( domain, lml, gd_parameters) best_hyperparameters = multistart_hyperparameter_optimization( hyperparameter_optimizer, num_multistarts) # Check that gradients are small lml.hyperparameters = best_hyperparameters gradient = lml.compute_grad_log_likelihood() self.assert_vector_within_relative( gradient, numpy.zeros(self.num_hyperparameters), tolerance) # Check that output is in the domain assert domain.check_point_inside(best_hyperparameters) is True numpy.random.set_state(random_state)
def test_multistart_analytic_expected_improvement_optimization(self): """Check that multistart optimization (gradient descent) can find the optimum point to sample (using 1D analytic EI).""" numpy.random.seed(3148) index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20)) domain, gaussian_process = self.gp_test_environments[index] max_num_steps = 200 # this is generally *too few* steps; we configure it this way so the test will run quickly max_num_restarts = 5 num_steps_averaged = 0 gamma = 0.2 pre_mult = 1.5 max_relative_change = 1.0 tolerance = 1.0e-7 gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) num_multistarts = 3 points_to_sample = domain.generate_random_point_in_domain() ei_eval = ExpectedImprovement(gaussian_process, points_to_sample) # expand the domain so that we are definitely not doing constrained optimization expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 2.0)] * self.dim) num_to_sample = 1 repeated_domain = RepeatedDomain(ei_eval.num_to_sample, expanded_domain) ei_optimizer = GradientDescentOptimizer(repeated_domain, ei_eval, gd_parameters) best_point = multistart_expected_improvement_optimization( ei_optimizer, num_multistarts, num_to_sample) # Check that gradients are small ei_eval.current_point = best_point gradient = ei_eval.compute_grad_expected_improvement() self.assert_vector_within_relative(gradient, numpy.zeros(gradient.shape), tolerance) # Check that output is in the domain assert repeated_domain.check_point_inside(best_point) is True
def base_setup(cls): """Set up parameters for test cases.""" domain_bounds_to_test = [ ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0]]), ClosedInterval.build_closed_intervals_from_list([[-10.0, 10.0]]), ClosedInterval.build_closed_intervals_from_list([[-500.0, -490.0]]), ClosedInterval.build_closed_intervals_from_list([[6000.0, 6000.001]]), ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0], [-1.0, 1.0]]), ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]]), ClosedInterval.build_closed_intervals_from_list([[-7000.0, 10000.0], [-8000.0, -7999.0], [10000.06, 10000.0601]]), ] cls.domains_to_test = [TensorProductDomain(domain_bounds) for domain_bounds in domain_bounds_to_test] cls.num_points_to_test = (1, 2, 5, 10, 20)
def base_setup(cls): """Set up parameters for test cases.""" domain_bounds_to_test = [ ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0]]), ClosedInterval.build_closed_intervals_from_list([[-10.0, 10.0]]), ClosedInterval.build_closed_intervals_from_list([[-500.0, -490.0]]), ClosedInterval.build_closed_intervals_from_list([[6000.0, 6000.001]]), ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0], [-1.0, 1.0]]), ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]]), ClosedInterval.build_closed_intervals_from_list([[-7000.0, 10000.0], [-8000.0, -7999.0], [10000.06, 10000.0601]]), ] cls.domains_to_test = [TensorProductDomain(domain_bounds) for domain_bounds in domain_bounds_to_test] cls.num_points_to_test = (1, 2, 5, 10, 20)
def test_hyperparameter_gradient_pings(self): """Ping test (compare analytic result to finite difference) the gradient wrt hyperparameters.""" h = 2.0e-3 tolerance = 4.0e-5 num_tests = 10 dim = 3 num_hyperparameters = dim + 1 hyperparameter_interval = ClosedInterval(3.0, 5.0) domain = TensorProductDomain(ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]])) points1 = domain.generate_uniform_random_points_in_domain(num_tests) points2 = domain.generate_uniform_random_points_in_domain(num_tests) for i in xrange(num_tests): point_one = points1[i, ...] point_two = points2[i, ...] covariance = gp_utils.fill_random_covariance_hyperparameters( hyperparameter_interval, num_hyperparameters, covariance_type=self.CovarianceClass, ) analytic_grad = covariance.hyperparameter_grad_covariance(point_one, point_two) for k in xrange(covariance.num_hyperparameters): hyperparameters_old = covariance.hyperparameters # hyperparamter + h hyperparameters_p = numpy.copy(hyperparameters_old) hyperparameters_p[k] += h covariance.hyperparameters = hyperparameters_p cov_p = covariance.covariance(point_one, point_two) covariance.hyperparameters = hyperparameters_old # hyperparamter - h hyperparameters_m = numpy.copy(hyperparameters_old) hyperparameters_m[k] -= h covariance.hyperparameters = hyperparameters_m cov_m = covariance.covariance(point_one, point_two) covariance.hyperparameters = hyperparameters_old # calculate finite diff fd_grad = (cov_p - cov_m) / (2.0 * h) self.assert_scalar_within_relative(fd_grad, analytic_grad[k], tolerance)
def test_normal_prior(self): space_dim = 2 num_IS = 2 true_hyper, data = get_random_gp_data(space_dim, num_IS, 500) hyperparam_search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in numpy.repeat([[0.01, 2.]], len(true_hyper), axis=0)]) hyper_bounds = [(0.01, 100.) for i in range(len(true_hyper))] multistart_pts = hyperparam_search_domain.generate_uniform_random_points_in_domain(1) cov = MixedSquareExponential(hyperparameters=multistart_pts[0,:], total_dim=space_dim+1, num_is=num_IS) test_prior = NormalPrior(5.*numpy.ones(len(true_hyper)), 25. * numpy.eye(len(true_hyper))) hyper_test, f, output = hyper_opt(cov, data=data, init_hyper=multistart_pts[0, :], hyper_bounds=hyper_bounds, approx_grad=False, hyper_prior=test_prior) good_prior = NormalPrior(true_hyper, 0.1 * numpy.eye(len(true_hyper))) hyper_good_prior, _, _ = hyper_opt(cov, data=data, init_hyper=multistart_pts[0, :], hyper_bounds=hyper_bounds, approx_grad=False, hyper_prior=good_prior) bad_prior = NormalPrior(numpy.ones(len(true_hyper)), 0.1 * numpy.eye(len(true_hyper))) hyper_bad_prior, _, _ = hyper_opt(cov, data=data, init_hyper=multistart_pts[0, :], hyper_bounds=hyper_bounds, approx_grad=False, hyper_prior=bad_prior) print "true hyper: {0}\n hyper test: {1}\n good prior: {2}\n bad prior:\n should close to one {3}".format(true_hyper, hyper_test, hyper_good_prior, hyper_bad_prior) print "dim {0}, num_is {1}".format(space_dim, num_IS)
def global_optimization_of_GP(gp_model, bounds, num_multistart, minimization=True): """ :param gp_model: :param bounds: list of (min, max) tuples :param num_multistart: :param minimization: :return: shape(space_dim+1,), best x and first entry is always zero because we assume IS0 is truth IS """ sgn = 1 if minimization else -1 fcn = lambda x: gp_model.compute_mean_of_points( np.concatenate([[0], x]).reshape((1, -1)))[0] * sgn grad = lambda x: gp_model.compute_grad_mean_of_points( np.concatenate([[0], x]).reshape( (1, -1)), num_derivatives=1)[0, 1:] * sgn search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in bounds]) start_points = search_domain.generate_uniform_random_points_in_domain( num_multistart) min_fcn = np.inf for start_pt in start_points: result_x, result_f, output = scipy.optimize.fmin_l_bfgs_b( func=fcn, x0=start_pt, fprime=grad, args=(), approx_grad=False, bounds=bounds, m=10, factr=10.0, pgtol=1e-10, epsilon=1e-08, iprint=-1, maxfun=15000, maxiter=200, disp=0, callback=None) if result_f < min_fcn: min_fcn = result_f ret = result_x print "found GP min {0}".format(min_fcn) return np.concatenate([[0], ret]).reshape((1, -1))
def optimize_with_ego(gp, domain_bounds, num_multistart): expected_improvement_evaluator = ExpectedImprovement(gp) search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds]) start_points = search_domain.generate_uniform_random_points_in_domain( num_multistart) min_negative_ei = numpy.inf def negative_ego_func(x): expected_improvement_evaluator.set_current_point(x.reshape((1, -1))) return -1.0 * expected_improvement_evaluator.compute_expected_improvement( ) for start_point in start_points: x, f = bfgs_optimization(start_point, negative_ego_func, domain_bounds) if min_negative_ei > f: min_negative_ei = f point_to_sample = x return point_to_sample, -min_negative_ei
def optimize_entropy(pes, pes_model, space_dim, num_discretization, cost_func, list_sample_is, bounds=None): if not bounds: bounds = [(0., 1.)] * space_dim # fcn = lambda x: np.mean([pes.acquisition({'obj': pes_model}, {}, np.concatenate([[which_is], x]), current_best=None, compute_grad=False)[0,0] for pes_model in pes_model_list]) * -1. / cost # search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in bounds]) # start_points = search_domain.generate_uniform_random_points_in_domain(num_multistart) # min_fcn = np.inf # for start_pt in start_points: # result_x, result_f, output = scipy.optimize.fmin_l_bfgs_b(func=fcn, x0=start_pt, fprime=None, args=(), approx_grad=True, # bounds=bounds, m=10, factr=10.0, pgtol=1e-10, # epsilon=1e-08, iprint=-1, maxfun=15000, maxiter=200, disp=0, callback=None) # if result_f < min_fcn: # min_fcn = result_f # ret = result_x # return np.concatenate([[which_is], ret]), -min_fcn search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in bounds]) points = search_domain.generate_uniform_random_points_in_domain( num_discretization) raw_acq = [] # for tuning costs best_acq = -np.inf for which_is in list_sample_is: acq_list = pes.acquisition( {'obj': pes_model}, {}, np.hstack((np.ones((num_discretization, 1)) * which_is, points)), current_best=None, compute_grad=False) / cost_func(which_is, None) inner_best_idx = np.argmax(acq_list) raw_acq.append(acq_list[inner_best_idx] * cost_func(which_is, None)) if acq_list[inner_best_idx] > best_acq: best_acq = acq_list[inner_best_idx] best_is = which_is best_idx = inner_best_idx return points[best_idx, :], best_is, best_acq, raw_acq
def base_setup(self): """Set up a test case for optimizing a simple quadratic polynomial.""" self.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * self.dim self.domain = TensorProductDomain(domain_bounds) maxima_point = numpy.full(self.dim, 0.5) current_point = numpy.zeros(self.dim) self.polynomial = QuadraticFunction(maxima_point, current_point) max_num_steps = 250 max_num_restarts = 10 num_steps_averaged = 0 gamma = 0.7 # smaller gamma would lead to faster convergence, but we don't want to make the problem too easy pre_mult = 1.0 max_relative_change = 0.8 tolerance = 1.0e-12 self.gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) approx_grad = False max_func_evals = 150000 max_metric_correc = 10 factr = 1000.0 pgtol = 1e-10 epsilon = 1e-8 self.BFGS_parameters = LBFGSBParameters( approx_grad, max_func_evals, max_metric_correc, factr, pgtol, epsilon, )
def optimize_with_multifidelity_ei(gp_list, domain_bounds, num_IS, num_multistart, noise_and_cost_func): multifidelity_expected_improvement_evaluator = MultifidelityExpectedImprovement( gp_list, noise_and_cost_func) search_domain = pythonTensorProductDomain( [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds]) start_points = search_domain.generate_uniform_random_points_in_domain( num_multistart) min_negative_ei = numpy.inf def negative_ei_func(x): return -1.0 * multifidelity_expected_improvement_evaluator.compute_expected_improvement( x) for start_point in start_points: x, f = bfgs_optimization(start_point, negative_ei_func, domain_bounds) if min_negative_ei > f: min_negative_ei = f point_to_sample = x return point_to_sample, multifidelity_expected_improvement_evaluator.choose_IS( point_to_sample), -min_negative_ei
def fill_random_domain_bounds(lower_bound_interval, upper_bound_interval, dim): r"""Generate a random list of dim ``[min_i, max_i]`` pairs. The data is organized such that: ``min_i \in [lower_bound_interval.min, lower_bound_interval.max]`` ``max_i \in [upper_bound_interval.min, upper_bound_interval.max]`` This is mainly useful for testing or when "random" data is needed so that we get more varied cases than the unit hypercube. :param lower_bound_interval: an uniform range, ``[min, max]``, from which to draw the domain lower bounds, ``min_i`` :type lower_bound_interval: ClosedInterval :param upper_bound_interval: an uniform range, ``[min, max]``, from which to draw the domain upper bounds, ``max_i`` :type upper_bound_interval: ClosedInterval :param dim: the spatial dimension of a point (i.e., number of independent params in experiment) :type dim: int > 0 :return: ClosedInterval objects with their min, max members initialized as described :rtype: list of ClosedInterval """ domain_bounds = numpy.empty((dim, 2)) domain_bounds[..., 0] = numpy.random.uniform(lower_bound_interval.min, lower_bound_interval.max) domain_bounds[..., 1] = numpy.random.uniform(upper_bound_interval.min, upper_bound_interval.max) return ClosedInterval.build_closed_intervals_from_list(domain_bounds)
def multistart_hyperparameter_optimization( hyperparameter_optimizer, num_multistarts, randomness=None, max_num_threads=DEFAULT_MAX_NUM_THREADS, status=None, ): r"""Select the hyperparameters that maximize the specified log likelihood measure of model fit (over the historical data) within the specified domain. .. Note:: The following comments are copied from :func:`moe.optimal_learning.python.cpp_wrappers.log_likelihood.multistart_hyperparameter_optimization`. See :class:`moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLogMarginalLikelihood` and :class:`moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLeaveOneOutLogLikelihood` for an overview of some example log likelihood-like measures. Optimizers are: null ('dumb' search), gradient descent, newton Newton is the suggested optimizer, which is not presently available in Python (use the C++ interface). In Python, gradient descent is suggested. TODO(GH-57): Implement hessians and Newton's method. 'dumb' search means this will just evaluate the objective log likelihood measure at num_multistarts 'points' (hyperparameters) in the domain, uniformly sampled using latin hypercube sampling. See gpp_python_common.cpp for C++ enum declarations laying out the options for objective and optimizer types. Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for sizing the domain and gd_parameters.num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely. Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a true optima (i.e., the gradient may be substantially nonzero). .. WARNING:: this function fails if NO improvement can be found! In that case, the output will always be the first randomly chosen point. status will report failure. TODO(GH-56): Allow callers to pass in a source of randomness. :param hyperparameter_optimizer: object that optimizes (e.g., gradient descent, newton) the desired log_likelihood measure over a domain (wrt the hyperparameters of covariance) :type hyperparameter_optimizer: interfaces.optimization_interfaces.OptimizerInterface subclass :param num_multistarts: number of times to multistart ``hyperparameter_optimizer`` :type num_multistarts: int > 0 :param randomness: random source used to generate multistart points (UNUSED) :type randomness: (UNUSED) :param max_num_threads: maximum number of threads to use, >= 1 (UNUSED) :type max_num_threads: int > 0 :param status: status messages (e.g., reporting on optimizer success, etc.) :type status: dict :return: hyperparameters that maximize the specified log likelihood measure within the specified domain :rtype: array of float64 with shape (log_likelihood_evaluator.num_hyperparameters) """ # Producing the random starts in log10 space improves robustness by clustering some extra points near 0 domain_bounds_log10 = numpy.log10(hyperparameter_optimizer.domain._domain_bounds) domain_log10 = TensorProductDomain(ClosedInterval.build_closed_intervals_from_list(domain_bounds_log10)) random_starts = domain_log10.generate_uniform_random_points_in_domain(num_points=num_multistarts) random_starts = numpy.power(10.0, random_starts) best_hyperparameters, _ = multistart_optimize(hyperparameter_optimizer, starting_points=random_starts) # TODO(GH-59): Have GD actually indicate whether updates were found, e.g., in an IOContainer-like structure. found_flag = True if status is not None: status["gradient_descent_found_update"] = found_flag return best_hyperparameters