def test_evaluate_log_likelihood_at_points(self): """Check that ``evaluate_log_likelihood_at_hyperparameter_list`` computes and orders results correctly.""" num_sampled = 5 self.gp_test_environment_input.num_sampled = num_sampled _, gaussian_process = self._build_gaussian_process_test_data( self.gp_test_environment_input) python_cov, historical_data = gaussian_process.get_core_data_copy() lml = GaussianProcessLogMarginalLikelihood(python_cov, historical_data) num_to_eval = 10 domain_bounds = [ self.gp_test_environment_input.hyperparameter_interval ] * self.gp_test_environment_input.num_hyperparameters domain = TensorProductDomain(domain_bounds) hyperparameters_to_evaluate = domain.generate_uniform_random_points_in_domain( num_to_eval) test_values = evaluate_log_likelihood_at_hyperparameter_list( lml, hyperparameters_to_evaluate) for i, value in enumerate(test_values): lml.hyperparameters = hyperparameters_to_evaluate[i, ...] truth = lml.compute_log_likelihood() assert value == truth
def base_setup(self): """Set up a test case for optimizing a simple quadratic polynomial.""" self.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * self.dim self.domain = TensorProductDomain(domain_bounds) maxima_point = numpy.full(self.dim, 0.5) current_point = numpy.zeros(self.dim) self.polynomial = QuadraticFunction(maxima_point, current_point) self.null_optimizer = NullOptimizer(self.domain, self.polynomial)
def base_setup(cls): """Set up a test case for optimizing a simple quadratic polynomial.""" cls.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * cls.dim cls.domain = TensorProductDomain(domain_bounds) large_domain_bounds = [ClosedInterval(-1.0, 1.0)] * cls.dim cls.large_domain = TensorProductDomain(large_domain_bounds) maxima_point = numpy.full(cls.dim, 0.5) current_point = numpy.zeros(cls.dim) cls.polynomial = QuadraticFunction(maxima_point, current_point) max_num_steps = 250 max_num_restarts = 10 num_steps_averaged = 0 gamma = 0.7 # smaller gamma would lead to faster convergence, but we don't want to make the problem too easy pre_mult = 1.0 max_relative_change = 0.8 tolerance = 1.0e-12 cls.gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) approx_grad = False max_func_evals = 150000 max_metric_correc = 10 factr = 1000.0 pgtol = 1e-10 epsilon = 1e-8 cls.BFGS_parameters = LBFGSBParameters( approx_grad, max_func_evals, max_metric_correc, factr, pgtol, epsilon, ) maxfun = 1000 rhobeg = 1.0 rhoend = numpy.finfo(numpy.float64).eps catol = 2.0e-13 cls.COBYLA_parameters = COBYLAParameters( rhobeg, rhoend, maxfun, catol, )
def test_hyperparameter_gradient_pings(self): """Ping test (compare analytic result to finite difference) the gradient wrt hyperparameters.""" h = 2.0e-3 tolerance = 4.0e-5 num_tests = 10 dim = 3 num_hyperparameters = dim + 1 hyperparameter_interval = ClosedInterval(3.0, 5.0) domain = TensorProductDomain( ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]])) points1 = domain.generate_uniform_random_points_in_domain(num_tests) points2 = domain.generate_uniform_random_points_in_domain(num_tests) for i in xrange(num_tests): point_one = points1[i, ...] point_two = points2[i, ...] covariance = gp_utils.fill_random_covariance_hyperparameters( hyperparameter_interval, num_hyperparameters, covariance_type=self.CovarianceClass, ) analytic_grad = covariance.hyperparameter_grad_covariance( point_one, point_two) for k in xrange(covariance.num_hyperparameters): hyperparameters_old = covariance.hyperparameters # hyperparamter + h hyperparameters_p = numpy.copy(hyperparameters_old) hyperparameters_p[k] += h covariance.hyperparameters = hyperparameters_p cov_p = covariance.covariance(point_one, point_two) covariance.hyperparameters = hyperparameters_old # hyperparamter - h hyperparameters_m = numpy.copy(hyperparameters_old) hyperparameters_m[k] -= h covariance.hyperparameters = hyperparameters_m cov_m = covariance.covariance(point_one, point_two) covariance.hyperparameters = hyperparameters_old # calculate finite diff fd_grad = (cov_p - cov_m) / (2.0 * h) self.assert_scalar_within_relative(fd_grad, analytic_grad[k], tolerance)
def test_multistart_hyperparameter_optimization(self): """Check that multistart optimization (gradient descent) can find the optimum hyperparameters.""" random_state = numpy.random.get_state() numpy.random.seed(87612) max_num_steps = 200 # this is generally *too few* steps; we configure it this way so the test will run quickly max_num_restarts = 5 num_steps_averaged = 0 gamma = 0.2 pre_mult = 1.0 max_relative_change = 0.3 tolerance = 1.0e-11 gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) num_multistarts = 3 # again, too few multistarts; but we want the test to run reasonably quickly num_sampled = 10 self.gp_test_environment_input.num_sampled = num_sampled _, gaussian_process = self._build_gaussian_process_test_data( self.gp_test_environment_input) python_cov, historical_data = gaussian_process.get_core_data_copy() lml = GaussianProcessLogMarginalLikelihood(python_cov, historical_data) domain = TensorProductDomain( [ClosedInterval(1.0, 4.0)] * self.gp_test_environment_input.num_hyperparameters) hyperparameter_optimizer = GradientDescentOptimizer( domain, lml, gd_parameters) best_hyperparameters = multistart_hyperparameter_optimization( hyperparameter_optimizer, num_multistarts) # Check that gradients are small lml.hyperparameters = best_hyperparameters gradient = lml.compute_grad_log_likelihood() self.assert_vector_within_relative( gradient, numpy.zeros(self.num_hyperparameters), tolerance) # Check that output is in the domain assert domain.check_point_inside(best_hyperparameters) is True numpy.random.set_state(random_state)
class Experiment(object): """A class for MOE optimizable experiments.""" def __init__(self, domain_bounds, points_sampled=None): """Construct a MOE optimizable experiment. **Required arguments:** :param domain_bounds: The bounds for the optimization experiment :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension **Optional arguments:** :param points_sampled: The historic points sampled and their objective function values :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation """ _domain_bounds = [ ClosedInterval(bound[0], bound[1]) for bound in domain_bounds ] self.domain = TensorProductDomain(_domain_bounds) self.historical_data = HistoricalData( self.domain.dim, sample_points=points_sampled, ) def build_json_payload(self): """Construct a json serializeable and MOE REST recognizeable dictionary of the experiment.""" return { 'domain_info': self.domain.get_json_serializable_info(), 'gp_historical_info': self.historical_data.json_payload(), } def __str__(self): """Return a pprint formated version of the experiment dict.""" return pprint.pformat(self.build_json_payload)
class Experiment(object): """A class for MOE optimizable experiments.""" def __init__(self, domain_bounds, points_sampled=None): """Construct a MOE optimizable experiment. **Required arguments:** :param domain_bounds: The bounds for the optimization experiment :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension **Optional arguments:** :param points_sampled: The historic points sampled and their objective function values :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation """ _domain_bounds = [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds] self.domain = TensorProductDomain(_domain_bounds) self.historical_data = HistoricalData( self.domain.dim, sample_points=points_sampled, ) def build_json_payload(self): """Construct a json serializeable and MOE REST recognizeable dictionary of the experiment.""" return { 'domain_info': self.domain.get_json_serializable_info(), 'gp_historical_info': self.historical_data.json_payload(), } def __str__(self): """Return a pprint formated version of the experiment dict.""" return pprint.pformat(self.build_json_payload)
def base_setup(self): """Set up a test case for optimizing a simple quadratic polynomial.""" self.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * self.dim self.domain = TensorProductDomain(domain_bounds) maxima_point = numpy.full(self.dim, 0.5) current_point = numpy.zeros(self.dim) self.polynomial = QuadraticFunction(maxima_point, current_point) max_num_steps = 250 max_num_restarts = 10 num_steps_averaged = 0 gamma = 0.7 # smaller gamma would lead to faster convergence, but we don't want to make the problem too easy pre_mult = 1.0 max_relative_change = 0.8 tolerance = 1.0e-12 self.gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, )
def test_multistarted_bfgs_optimizer(self): """Check that multistarted GD can find the optimum in a 'very' large domain.""" # Set a large domain: a single GD run is unlikely to reach the optimum domain_bounds = [ClosedInterval(-10.0, 10.0)] * self.dim domain = TensorProductDomain(domain_bounds) tolerance = 2.0e-10 num_points = 10 bfgs_optimizer = LBFGSBOptimizer(domain, self.polynomial, self.BFGS_parameters) multistart_optimizer = MultistartOptimizer(bfgs_optimizer, num_points) output, _ = multistart_optimizer.optimize() # Verify coordinates self.assert_vector_within_relative(output, self.polynomial.optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
def test_hyperparameter_gradient_pings(self): """Ping test (compare analytic result to finite difference) the gradient wrt hyperparameters.""" h = 2.0e-3 tolerance = 4.0e-5 num_tests = 10 dim = 3 num_hyperparameters = dim + 1 hyperparameter_interval = ClosedInterval(3.0, 5.0) domain = TensorProductDomain(ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]])) points1 = domain.generate_uniform_random_points_in_domain(num_tests) points2 = domain.generate_uniform_random_points_in_domain(num_tests) for i in xrange(num_tests): point_one = points1[i, ...] point_two = points2[i, ...] covariance = gp_utils.fill_random_covariance_hyperparameters( hyperparameter_interval, num_hyperparameters, covariance_type=self.CovarianceClass, ) analytic_grad = covariance.hyperparameter_grad_covariance(point_one, point_two) for k in xrange(covariance.num_hyperparameters): hyperparameters_old = covariance.hyperparameters # hyperparamter + h hyperparameters_p = numpy.copy(hyperparameters_old) hyperparameters_p[k] += h covariance.hyperparameters = hyperparameters_p cov_p = covariance.covariance(point_one, point_two) covariance.hyperparameters = hyperparameters_old # hyperparamter - h hyperparameters_m = numpy.copy(hyperparameters_old) hyperparameters_m[k] -= h covariance.hyperparameters = hyperparameters_m cov_m = covariance.covariance(point_one, point_two) covariance.hyperparameters = hyperparameters_old # calculate finite diff fd_grad = (cov_p - cov_m) / (2.0 * h) self.assert_scalar_within_relative(fd_grad, analytic_grad[k], tolerance)
def test_multistart_qei_expected_improvement_dfo(self): """Check that multistart optimization (BFGS) can find the optimum point to sample (using 2-EI).""" numpy.random.seed(7860) index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20)) domain, gaussian_process = self.gp_test_environments[index] tolerance = 6.0e-5 num_multistarts = 3 # Expand the domain so that we are definitely not doing constrained optimization expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 3.0)] * self.dim) num_to_sample = 2 repeated_domain = RepeatedDomain(num_to_sample, expanded_domain) num_mc_iterations = 100000 # Just any random point that won't be optimal points_to_sample = repeated_domain.generate_random_point_in_domain() ei_eval = ExpectedImprovement(gaussian_process, points_to_sample, num_mc_iterations=num_mc_iterations) # Compute EI and its gradient for the sake of comparison ei_initial = ei_eval.compute_expected_improvement() ei_optimizer = LBFGSBOptimizer(repeated_domain, ei_eval, self.BFGS_parameters) best_point = multistart_expected_improvement_optimization( ei_optimizer, num_multistarts, num_to_sample) # Check that gradients are "small" or on border. MC is very inaccurate near 0, so use finite difference # gradient instead. ei_eval.current_point = best_point ei_final = ei_eval.compute_expected_improvement() finite_diff_grad = numpy.zeros(best_point.shape) h_value = 0.00001 for i in range(best_point.shape[0]): for j in range(best_point.shape[1]): best_point[i, j] += h_value ei_eval.current_point = best_point ei_upper = ei_eval.compute_expected_improvement() best_point[i, j] -= 2 * h_value ei_eval.current_point = best_point ei_lower = ei_eval.compute_expected_improvement() best_point[i, j] += h_value finite_diff_grad[i, j] = (ei_upper - ei_lower) / (2 * h_value) self.assert_vector_within_relative(finite_diff_grad, numpy.zeros(finite_diff_grad.shape), tolerance) # Check that output is in the domain assert repeated_domain.check_point_inside(best_point) is True # Since we didn't really converge to the optimal EI (too costly), do some other sanity checks # EI should have improved assert ei_final >= ei_initial
def test_multistart_hyperparameter_optimization(self): """Check that multistart optimization (gradient descent) can find the optimum hyperparameters.""" random_state = numpy.random.get_state() numpy.random.seed(87612) max_num_steps = 200 # this is generally *too few* steps; we configure it this way so the test will run quickly max_num_restarts = 5 num_steps_averaged = 0 gamma = 0.2 pre_mult = 1.0 max_relative_change = 0.3 tolerance = 1.0e-11 gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) num_multistarts = 3 # again, too few multistarts; but we want the test to run reasonably quickly num_sampled = 10 self.gp_test_environment_input.num_sampled = num_sampled _, gaussian_process = self._build_gaussian_process_test_data(self.gp_test_environment_input) python_cov, historical_data = gaussian_process.get_core_data_copy() lml = GaussianProcessLogMarginalLikelihood(python_cov, historical_data) domain = TensorProductDomain([ClosedInterval(1.0, 4.0)] * self.gp_test_environment_input.num_hyperparameters) hyperparameter_optimizer = GradientDescentOptimizer(domain, lml, gd_parameters) best_hyperparameters = multistart_hyperparameter_optimization(hyperparameter_optimizer, num_multistarts) # Check that gradients are small lml.hyperparameters = best_hyperparameters gradient = lml.compute_grad_log_likelihood() self.assert_vector_within_relative(gradient, numpy.zeros(self.num_hyperparameters), tolerance) # Check that output is in the domain assert domain.check_point_inside(best_hyperparameters) is True numpy.random.set_state(random_state)
def test_multistart_monte_carlo_expected_improvement_optimization(self): """Check that multistart optimization (gradient descent) can find the optimum point to sample (using 2-EI).""" numpy.random.seed(7858) # TODO(271): Monte Carlo only works for this seed index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20)) domain, gaussian_process = self.gp_test_environments[index] max_num_steps = 75 # this is *too few* steps; we configure it this way so the test will run quickly max_num_restarts = 5 num_steps_averaged = 50 gamma = 0.2 pre_mult = 1.5 max_relative_change = 1.0 tolerance = 3.0e-2 # really large tolerance b/c converging with monte-carlo (esp in Python) is expensive gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) num_multistarts = 2 # Expand the domain so that we are definitely not doing constrained optimization expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 2.0)] * self.dim) num_to_sample = 2 repeated_domain = RepeatedDomain(num_to_sample, expanded_domain) num_mc_iterations = 10000 # Just any random point that won't be optimal points_to_sample = repeated_domain.generate_random_point_in_domain() ei_eval = ExpectedImprovement(gaussian_process, points_to_sample, num_mc_iterations=num_mc_iterations) # Compute EI and its gradient for the sake of comparison ei_initial = ei_eval.compute_expected_improvement(force_monte_carlo=True) # TODO(271) Monte Carlo only works for this seed grad_ei_initial = ei_eval.compute_grad_expected_improvement() ei_optimizer = GradientDescentOptimizer(repeated_domain, ei_eval, gd_parameters) best_point = multistart_expected_improvement_optimization(ei_optimizer, num_multistarts, num_to_sample) # Check that gradients are "small" ei_eval.current_point = best_point ei_final = ei_eval.compute_expected_improvement(force_monte_carlo=True) # TODO(271) Monte Carlo only works for this seed grad_ei_final = ei_eval.compute_grad_expected_improvement() self.assert_vector_within_relative(grad_ei_final, numpy.zeros(grad_ei_final.shape), tolerance) # Check that output is in the domain assert repeated_domain.check_point_inside(best_point) is True # Since we didn't really converge to the optimal EI (too costly), do some other sanity checks # EI should have improved assert ei_final >= ei_initial # grad EI should have improved for index in numpy.ndindex(grad_ei_final.shape): assert numpy.fabs(grad_ei_final[index]) <= numpy.fabs(grad_ei_initial[index])
def base_setup(self): """Set up a test case for optimizing a simple quadratic polynomial.""" self.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * self.dim self.domain = TensorProductDomain(domain_bounds) maxima_point = numpy.full(self.dim, 0.5) current_point = numpy.zeros(self.dim) self.polynomial = QuadraticFunction(maxima_point, current_point) max_num_steps = 250 max_num_restarts = 10 num_steps_averaged = 0 gamma = 0.7 # smaller gamma would lead to faster convergence, but we don't want to make the problem too easy pre_mult = 1.0 max_relative_change = 0.8 tolerance = 1.0e-12 self.gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) approx_grad = False max_func_evals = 150000 max_metric_correc = 10 factr = 1000.0 pgtol = 1e-10 epsilon = 1e-8 self.BFGS_parameters = LBFGSBParameters( approx_grad, max_func_evals, max_metric_correc, factr, pgtol, epsilon, )
def test_evaluate_log_likelihood_at_points(self): """Check that ``evaluate_log_likelihood_at_hyperparameter_list`` computes and orders results correctly.""" num_sampled = 5 self.gp_test_environment_input.num_sampled = num_sampled _, gaussian_process = self._build_gaussian_process_test_data(self.gp_test_environment_input) python_cov, historical_data = gaussian_process.get_core_data_copy() lml = GaussianProcessLogMarginalLikelihood(python_cov, historical_data) num_to_eval = 10 domain_bounds = [self.gp_test_environment_input.hyperparameter_interval] * self.gp_test_environment_input.num_hyperparameters domain = TensorProductDomain(domain_bounds) hyperparameters_to_evaluate = domain.generate_uniform_random_points_in_domain(num_to_eval) test_values = evaluate_log_likelihood_at_hyperparameter_list(lml, hyperparameters_to_evaluate) for i, value in enumerate(test_values): lml.hyperparameters = hyperparameters_to_evaluate[i, ...] truth = lml.compute_log_likelihood() assert value == truth
def __init__(self, domain_bounds, points_sampled=None): """Construct a MOE optimizable experiment. **Required arguments:** :param domain_bounds: The bounds for the optimization experiment :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension **Optional arguments:** :param points_sampled: The historic points sampled and their objective function values :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation """ _domain_bounds = [ ClosedInterval(bound[0], bound[1]) for bound in domain_bounds ] self.domain = TensorProductDomain(_domain_bounds) self.historical_data = HistoricalData( self.domain.dim, sample_points=points_sampled, )
def base_setup(cls): """Set up parameters for test cases.""" domain_bounds_to_test = [ ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0]]), ClosedInterval.build_closed_intervals_from_list([[-10.0, 10.0]]), ClosedInterval.build_closed_intervals_from_list([[-500.0, -490.0]]), ClosedInterval.build_closed_intervals_from_list([[6000.0, 6000.001]]), ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0], [-1.0, 1.0]]), ClosedInterval.build_closed_intervals_from_list([[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]]), ClosedInterval.build_closed_intervals_from_list([[-7000.0, 10000.0], [-8000.0, -7999.0], [10000.06, 10000.0601]]), ] cls.domains_to_test = [TensorProductDomain(domain_bounds) for domain_bounds in domain_bounds_to_test] cls.num_points_to_test = (1, 2, 5, 10, 20)
class NullOptimizerTest(OptimalLearningTestCase): """Test the NullOptimizer on a simple objective. NullOptimizer should do nothing. Multistarting it should be the same as a 'dumb' search over points. """ @T.class_setup def base_setup(self): """Set up a test case for optimizing a simple quadratic polynomial.""" self.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * self.dim self.domain = TensorProductDomain(domain_bounds) maxima_point = numpy.full(self.dim, 0.5) current_point = numpy.zeros(self.dim) self.polynomial = QuadraticFunction(maxima_point, current_point) self.null_optimizer = NullOptimizer(self.domain, self.polynomial) def test_null_optimizer(self): """Test that null optimizer does not change current_point.""" current_point_old = self.null_optimizer.objective_function.current_point self.null_optimizer.optimize() current_point_new = self.null_optimizer.objective_function.current_point self.assert_vector_within_relative(current_point_old, current_point_new, 0.0) def test_multistarted_null_optimizer(self): """Test that multistarting null optimizer just evalutes the function and indentifies the max.""" num_points = 15 points = self.domain.generate_uniform_random_points_in_domain( num_points) truth = numpy.empty(num_points) for i, point in enumerate(points): self.null_optimizer.objective_function.current_point = point truth[ i] = self.null_optimizer.objective_function.compute_objective_function( ) best_index = numpy.argmax(truth) truth_best_point = points[best_index, ...] test_best_point, test_values = multistart_optimize( self.null_optimizer, starting_points=points) self.assert_vector_within_relative(test_best_point, truth_best_point, 0.0) self.assert_vector_within_relative(test_values, truth, 0.0)
def test_multistart_analytic_expected_improvement_optimization(self): """Check that multistart optimization (gradient descent) can find the optimum point to sample (using 1D analytic EI).""" numpy.random.seed(3148) index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20)) domain, gaussian_process = self.gp_test_environments[index] max_num_steps = 200 # this is generally *too few* steps; we configure it this way so the test will run quickly max_num_restarts = 5 num_steps_averaged = 0 gamma = 0.2 pre_mult = 1.5 max_relative_change = 1.0 tolerance = 1.0e-7 gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) num_multistarts = 3 points_to_sample = domain.generate_random_point_in_domain() ei_eval = ExpectedImprovement(gaussian_process, points_to_sample) # expand the domain so that we are definitely not doing constrained optimization expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 2.0)] * self.dim) num_to_sample = 1 repeated_domain = RepeatedDomain(ei_eval.num_to_sample, expanded_domain) ei_optimizer = GradientDescentOptimizer(repeated_domain, ei_eval, gd_parameters) best_point = multistart_expected_improvement_optimization( ei_optimizer, num_multistarts, num_to_sample) # Check that gradients are small ei_eval.current_point = best_point gradient = ei_eval.compute_grad_expected_improvement() self.assert_vector_within_relative(gradient, numpy.zeros(gradient.shape), tolerance) # Check that output is in the domain assert repeated_domain.check_point_inside(best_point) is True
class NullOptimizerTest(OptimalLearningTestCase): """Test the NullOptimizer on a simple objective. NullOptimizer should do nothing. Multistarting it should be the same as a 'dumb' search over points. """ @T.class_setup def base_setup(self): """Set up a test case for optimizing a simple quadratic polynomial.""" self.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * self.dim self.domain = TensorProductDomain(domain_bounds) maxima_point = numpy.full(self.dim, 0.5) current_point = numpy.zeros(self.dim) self.polynomial = QuadraticFunction(maxima_point, current_point) self.null_optimizer = NullOptimizer(self.domain, self.polynomial) def test_null_optimizer(self): """Test that null optimizer does not change current_point.""" current_point_old = self.null_optimizer.objective_function.current_point self.null_optimizer.optimize() current_point_new = self.null_optimizer.objective_function.current_point self.assert_vector_within_relative(current_point_old, current_point_new, 0.0) def test_multistarted_null_optimizer(self): """Test that multistarting null optimizer just evalutes the function and indentifies the max.""" num_points = 15 points = self.domain.generate_uniform_random_points_in_domain(num_points) truth = numpy.empty(num_points) for i, point in enumerate(points): self.null_optimizer.objective_function.current_point = point truth[i] = self.null_optimizer.objective_function.compute_objective_function() best_index = numpy.argmax(truth) truth_best_point = points[best_index, ...] test_best_point, test_values = multistart_optimize(self.null_optimizer, starting_points=points) self.assert_vector_within_relative(test_best_point, truth_best_point, 0.0) self.assert_vector_within_relative(test_values, truth, 0.0)
def __init__(self, domain_bounds, points_sampled=None): """Construct a MOE optimizable experiment. **Required arguments:** :param domain_bounds: The bounds for the optimization experiment :type domain_bounds: An iterable of iterables describing the [min, max] of the domain for each dimension **Optional arguments:** :param points_sampled: The historic points sampled and their objective function values :type points_sampled: An iterable of iterables describing the [point, value, noise] of each objective function evaluation """ _domain_bounds = [ClosedInterval(bound[0], bound[1]) for bound in domain_bounds] self.domain = TensorProductDomain(_domain_bounds) self.historical_data = HistoricalData( self.domain.dim, sample_points=points_sampled, )
def test_gradient_descent_optimizer_constrained(self): """Check that gradient descent can find the global optimum (in a domain) when the true optimum is outside.""" # Domain where the optimum, (0.5, 0.5, 0.5), lies outside the domain domain_bounds = [ ClosedInterval(0.05, 0.32), ClosedInterval(0.05, 0.6), ClosedInterval(0.05, 0.32) ] domain = TensorProductDomain(domain_bounds) gradient_descent_optimizer = GradientDescentOptimizer( domain, self.polynomial, self.gd_parameters) # Work out what the maxima point woudl be given the domain constraints (i.e., project to the nearest point on domain) constrained_optimum_point = self.polynomial.optimum_point for i, bounds in enumerate(domain_bounds): if constrained_optimum_point[i] > bounds.max: constrained_optimum_point[i] = bounds.max elif constrained_optimum_point[i] < bounds.min: constrained_optimum_point[i] = bounds.min tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess initial_value = gradient_descent_optimizer.objective_function.compute_objective_function( ) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, constrained_optimum_point, tolerance) # Verify optimized value is better than initial guess final_value = self.polynomial.compute_objective_function() assert final_value >= initial_value # Verify derivative: only get 0 derivative if the coordinate lies inside domain boundaries gradient = self.polynomial.compute_grad_objective_function() for i, bounds in enumerate(domain_bounds): if bounds.is_inside(self.polynomial.optimum_point[i]): self.assert_scalar_within_relative(gradient[i], 0.0, tolerance)
def plot_ato_cor(num_points, num_discretization): dim = 8 num_func = 4 num_repl = 100 search_domain = TensorProductDomain( [ClosedInterval(0.0, 20.0) for i in range(dim)]) average_points = search_domain.generate_uniform_random_points_in_domain( num_points) func_name_0_list = ["vanilla", "var2", "var3", "var4"] func_name_1_list = ["var3", "var4", "vanilla", "var2"] func_name_2_list = ["var4", "vanilla", "var2", "var3"] with open("{0}/mkg_ato.pickle".format(hyper_dir), 'rb') as f: data = pickle.load(f) hyper_param = data['hyperparam'] kg_cov_cpp = cppMixedSquareExponential(hyperparameters=hyper_param) info_1 = 1 info_2 = 2 x_coords = np.linspace(0.0, 20.0, num=num_discretization) cor_IS = np.zeros( (num_func * num_repl * num_points, num_discretization, dim)) cor_delta_gp = np.zeros( (num_func * num_repl * num_points, num_discretization, dim)) count = 0 for func_idx in range(num_func): for repl_no in range(num_repl): hist_data = construct_hist_data_from_pickle( dim=dim, directory=data_dir, IS_filename_dict={ 0: "kg_atoC_{0}_repl_{1}".format(func_name_0_list[func_idx], repl_no), 1: "kg_atoC_{0}_repl_{1}".format(func_name_1_list[func_idx], repl_no), 2: "kg_atoC_{0}_repl_{1}".format(func_name_2_list[func_idx], repl_no) }, combine_IS=True, sign=-1.0) kg_gp_cpp = GaussianProcessNew(kg_cov_cpp, hist_data, num_IS_in=2) for the_point in average_points: for which_dim in range(dim): cor_IS[count, :, which_dim] = compute_correlation_info_source( the_point, info_1, info_2, which_dim, x_coords, kg_gp_cpp) cor_delta_gp[count, :, which_dim] = compute_correlation_delta_gp( the_point, info_1, info_2, which_dim, x_coords, kg_gp_cpp) count += 1 print "ato, ct {0}".format(count) with open("{0}/ato_plot_data.pickle".format(plot_dir), "wb") as f: pickle.dump( { "cor_is": cor_IS, "cor_delta": cor_delta_gp, "x": x_coords }, f) plot_cor(x_coords, cor_IS, cor_delta_gp, dim, plot_dir, "ato")
class GradientDescentOptimizerTest(OptimalLearningTestCase): r"""Test Gradient Descent on a simple quadratic objective. We check GD in an unconstrained setting, a constrained setting, and we test multistarting it. We don't test the stochastic averaging option meaningfully. We check that the optimizer will average the number of steps specified by input. We also check that the simple unconstrained case can also be solved with averaging on\*. \* This is not much of a test. The problem is convex and isotropic so GD will take a more or less straight path to the maxima. Averaging can only reduce the accuracy of the solve. TODO(GH-179): Build a simple stochastic objective and test the stochastic component fully. """ @T.class_setup def base_setup(self): """Set up a test case for optimizing a simple quadratic polynomial.""" self.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * self.dim self.domain = TensorProductDomain(domain_bounds) maxima_point = numpy.full(self.dim, 0.5) current_point = numpy.zeros(self.dim) self.polynomial = QuadraticFunction(maxima_point, current_point) max_num_steps = 250 max_num_restarts = 10 num_steps_averaged = 0 gamma = 0.7 # smaller gamma would lead to faster convergence, but we don't want to make the problem too easy pre_mult = 1.0 max_relative_change = 0.8 tolerance = 1.0e-12 self.gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) approx_grad = False max_func_evals = 150000 max_metric_correc = 10 factr = 1000.0 pgtol = 1e-10 epsilon = 1e-8 self.BFGS_parameters = LBFGSBParameters( approx_grad, max_func_evals, max_metric_correc, factr, pgtol, epsilon, ) def test_gradient_descent_optimizer(self): """Check that gradient descent can find the optimum of the quadratic test objective.""" # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that gradient descent does not move from the optima if we start it there. gradient_descent_optimizer = GradientDescentOptimizer(self.domain, self.polynomial, self.gd_parameters) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 0.0) # Start at a wrong point and check optimization tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance) def test_get_averaging_range(self): """Test the method used to produce what interval to average over in Polyak-Ruppert averaging.""" num_steps_total = 250 end = num_steps_total + 1 num_steps_averaged_input_list = [-1, 0, 1, 20, 100, 249, 250, 251, 10000] truth_list = [(1, end), (250, end), (250, end), (231, end), (151, end), (2, end), (1, end), (1, end), (1, end)] for i, truth in enumerate(truth_list): start, end = GradientDescentOptimizer._get_averaging_range(num_steps_averaged_input_list[i], num_steps_total) T.assert_equal(start, truth[0]) T.assert_equal(end, truth[1]) def test_gradient_descent_optimizer_with_averaging(self): """Check that gradient descent can find the optimum of the quadratic test objective with averaging on. This test doesn't exercise the purpose of averaging (i.e., this objective isn't stochastic), but it does check that it at least runs. """ num_steps_averaged = self.gd_parameters.max_num_steps * 3 / 4 gd_parameters_averaging = GradientDescentParameters( self.gd_parameters.max_num_steps, self.gd_parameters.max_num_restarts, num_steps_averaged, self.gd_parameters.gamma, self.gd_parameters.pre_mult, self.gd_parameters.max_relative_change, self.gd_parameters.tolerance, ) # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that gradient descent does not move from the optima if we start it there. gradient_descent_optimizer = GradientDescentOptimizer(self.domain, self.polynomial, gd_parameters_averaging) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 0.0) # Start at a wrong point and check optimization tolerance = 2.0e-10 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance) def test_gradient_descent_optimizer_constrained(self): """Check that gradient descent can find the global optimum (in a domain) when the true optimum is outside.""" # Domain where the optimum, (0.5, 0.5, 0.5), lies outside the domain domain_bounds = [ClosedInterval(0.05, 0.32), ClosedInterval(0.05, 0.6), ClosedInterval(0.05, 0.32)] domain = TensorProductDomain(domain_bounds) gradient_descent_optimizer = GradientDescentOptimizer(domain, self.polynomial, self.gd_parameters) # Work out what the maxima point woudl be given the domain constraints (i.e., project to the nearest point on domain) constrained_optimum_point = self.polynomial.optimum_point for i, bounds in enumerate(domain_bounds): if constrained_optimum_point[i] > bounds.max: constrained_optimum_point[i] = bounds.max elif constrained_optimum_point[i] < bounds.min: constrained_optimum_point[i] = bounds.min tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess initial_value = gradient_descent_optimizer.objective_function.compute_objective_function() gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, constrained_optimum_point, tolerance) # Verify optimized value is better than initial guess final_value = self.polynomial.compute_objective_function() T.assert_gt(final_value, initial_value) # Verify derivative: only get 0 derivative if the coordinate lies inside domain boundaries gradient = self.polynomial.compute_grad_objective_function() for i, bounds in enumerate(domain_bounds): if bounds.is_inside(self.polynomial.optimum_point[i]): self.assert_scalar_within_relative(gradient[i], 0.0, tolerance) def test_multistarted_gradient_descent_optimizer_crippled_start(self): """Check that multistarted GD is finding the best result from GD.""" # Only allow 1 GD iteration. gd_parameters_crippled = GradientDescentParameters( 1, 1, self.gd_parameters.num_steps_averaged, self.gd_parameters.gamma, self.gd_parameters.pre_mult, self.gd_parameters.max_relative_change, self.gd_parameters.tolerance, ) gradient_descent_optimizer_crippled = GradientDescentOptimizer(self.domain, self.polynomial, gd_parameters_crippled) num_points = 15 points = self.domain.generate_uniform_random_points_in_domain(num_points) multistart_optimizer = MultistartOptimizer(gradient_descent_optimizer_crippled, num_points) test_best_point, _ = multistart_optimizer.optimize(random_starts=points) # This point set won't include the optimum so multistart GD won't find it. for value in (test_best_point - self.polynomial.optimum_point): T.assert_not_equal(value, 0.0) points_with_opt = numpy.append(points, self.polynomial.optimum_point.reshape((1, self.polynomial.dim)), axis=0) test_best_point, _ = multistart_optimizer.optimize(random_starts=points_with_opt) # This point set will include the optimum so multistart GD will find it. for value in (test_best_point - self.polynomial.optimum_point): T.assert_equal(value, 0.0) def test_multistarted_gradient_descent_optimizer(self): """Check that multistarted GD can find the optimum in a 'very' large domain.""" # Set a large domain: a single GD run is unlikely to reach the optimum domain_bounds = [ClosedInterval(-10.0, 10.0)] * self.dim domain = TensorProductDomain(domain_bounds) tolerance = 2.0e-10 num_points = 10 gradient_descent_optimizer = GradientDescentOptimizer(domain, self.polynomial, self.gd_parameters) multistart_optimizer = MultistartOptimizer(gradient_descent_optimizer, num_points) output, _ = multistart_optimizer.optimize() # Verify coordinates self.assert_vector_within_relative(output, self.polynomial.optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance) def test_bfgs_optimizer(self): """Check that BFGS can find the optimum of the quadratic test objective.""" # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that gradient descent does not move from the optima if we start it there. bfgs_optimizer = LBFGSBOptimizer(self.domain, self.polynomial, self.BFGS_parameters) bfgs_optimizer.optimize() output = bfgs_optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 0.0) # Start at a wrong point and check optimization tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) bfgs_optimizer.objective_function.current_point = initial_guess bfgs_optimizer.optimize() output = bfgs_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance) def test_multistarted_bfgs_optimizer(self): """Check that multistarted GD can find the optimum in a 'very' large domain.""" # Set a large domain: a single GD run is unlikely to reach the optimum domain_bounds = [ClosedInterval(-10.0, 10.0)] * self.dim domain = TensorProductDomain(domain_bounds) tolerance = 2.0e-10 num_points = 10 bfgs_optimizer = LBFGSBOptimizer(domain, self.polynomial, self.BFGS_parameters) multistart_optimizer = MultistartOptimizer(bfgs_optimizer, num_points) output, _ = multistart_optimizer.optimize() # Verify coordinates self.assert_vector_within_relative(output, self.polynomial.optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
def main(): args = docopt(__doc__) #Parse arguments mesh = args['<mesh>'] weights = np.load(args['<weightfile>']) C = np.load(args['<quad_const>']) b = np.load(args['<bounds>']) R = np.load(args['<affine>']) coil = args['<coil>'] loc_out = args['<loc_out>'] rot_out = args['<rot_out>'] cpus = int(args['--cpus']) or 8 tmpdir = args['--tmp-dir'] or os.getenv('TMPDIR') or "/tmp/" num_iters = args['--n-iters'] or 50 #Make search domain search_domain = TensorProductDomain([ ClosedInterval(b[0,0],b[0,1]), #X coord on quadratic surface ClosedInterval(b[1,0],b[1,1]), #Y coord on quadratic surface ClosedInterval(0,180) #Rotational angle ]) c_search_domain = cTensorProductDomain([ ClosedInterval(b[0,0],b[0,1]), ClosedInterval(b[1,0],b[1,1]), ClosedInterval(0,180) ]) #Make objective function f = FieldFunc(mesh_file=mesh, quad_surf_consts=C, surf_to_mesh_matrix=R, tet_weights=weights, field_dir=tmpdir, coil=coil, cpus=cpus) #Generate historical points hist_pts = int(cpus * 1.5) init_pts = search_domain.generate_uniform_random_points_in_domain(hist_pts) observations = -f.evaluate(init_pts) hist_data = HistoricalData(dim = 3, num_derivatives= 0) hist_data.append_sample_points([SamplePoint(inp,o,0.0) for o,inp in zip(observations,init_pts)]) #Set up model specifications prior = DefaultPrior(n_dims = 3 + 2, num_noise=1) gp_ll = GaussianProcessLogLikelihoodMCMC(historical_data=hist_data, derivatives=[], prior=prior, chain_length=1000, burnin_steps=2000, n_hypers=2**4, noisy=False) gp_ll.train() #Initialize grad desc params sgd_params = cGDParams(num_multistarts=200, max_num_steps=50, max_num_restarts=2, num_steps_averaged=4, gamma=0.7, pre_mult=1.0, max_relative_change=0.5, tolerance=1.0e-10) num_samples = int(cpus*1.3) best_point_history = [] for i in np.arange(0,num_iters): #Optimize qEI and pick samples points_to_sample, ei = gen_sample_from_qei(gp_ll.models[0], c_search_domain, sgd_params=sgd_params, num_samples=num_samples, num_mc=2**10) #Collect observations sampled_points = -f.evaluate(points_to_sample) evidence = [SamplePoint(c,v,0.0) for c,v in zip(points_to_sample, sampled_points)] #Update model gp_ll.add_sampled_points(evidence) gp_ll.train() #Pull model and pull values gp = gp_ll.models[0] min_point = np.argmin(gp._points_sampled_value) min_val = np.min(gp._points_sampled_value) best_coord = gp.get_historical_data_copy().points_sampled[min_point] print('Recommended Points:') print(points_to_sample) print('Expected Improvement: {}'.format(ei)) print('Current Best:') print('f(x*)=',min_val) print('Coord:', best_coord) best_point_history.append(min_val) #Once sampling is done take the best point and transform it back into native space preaff_loc = geolib.map_param_2_surf(best_coord[0],best_coord[1],C) preaff_rot,_ = geolib.map_rot_2_surf(best_coord[0],best_coord[1],best_coord[2],C) loc = np.matmul(R,preaff_loc) rot = np.matmul(R,preaff_rot) np.savetxt(loc_out,loc) np.savetxt(rot_out,rot)
def main(): args = docopt(__doc__) # Parse arguments mesh = args['<mesh>'] weights = np.load(args['<weightfile>']) init_centroid = np.genfromtxt(args['<init_centroid>']) coil = args['<coil>'] output_file = args['<output_file>'] cpus = int(args['--cpus']) or 8 tmpdir = args['--tmp-dir'] or os.getenv('TMPDIR') or "/tmp/" num_iters = int(args['--n-iters']) or 50 min_samps = int(args['--min-var-samps']) or 10 tol = float(args['--convergence']) or 0.001 history = args['--history'] skip_convergence = args['--skip-convergence'] options = args['--options'] if options: with open(options, 'r') as f: opts = json.load(f) logging.info("Using custom options file {}".format(options)) logging.info("{}".format('\''.join( [f"{k}:{v}" for k, v in opts.items()]))) else: opts = {} logging.info('Using {} cpus'.format(cpus)) f = FieldFunc(mesh_file=mesh, initial_centroid=init_centroid, tet_weights=weights, coil=coil, field_dir=tmpdir, cpus=cpus, **opts) # Make search domain search_domain = TensorProductDomain([ ClosedInterval(f.bounds[0, 0], f.bounds[0, 1]), ClosedInterval(f.bounds[1, 0], f.bounds[1, 1]), ClosedInterval(0, 180) ]) c_search_domain = cTensorProductDomain([ ClosedInterval(f.bounds[0, 0], f.bounds[0, 1]), ClosedInterval(f.bounds[1, 0], f.bounds[1, 1]), ClosedInterval(0, 180) ]) # Generate historical points prior = DefaultPrior(n_dims=3 + 2, num_noise=1) prior.tophat = TophatPrior(-2, 5) prior.ln_prior = NormalPrior(12.5, 1.6) hist_pts = cpus i = 0 init_pts = search_domain.generate_uniform_random_points_in_domain(hist_pts) observations = -f.evaluate(init_pts) hist_data = HistoricalData(dim=3, num_derivatives=0) hist_data.append_sample_points( [SamplePoint(inp, o, 0.0) for o, inp in zip(observations, init_pts)]) # Train GP model gp_ll = GaussianProcessLogLikelihoodMCMC(historical_data=hist_data, derivatives=[], prior=prior, chain_length=1000, burnin_steps=2000, n_hypers=2**4, noisy=False) gp_ll.train() # Initialize grad desc params sgd_params = cGDParams(num_multistarts=200, max_num_steps=50, max_num_restarts=5, num_steps_averaged=4, gamma=0.7, pre_mult=1.0, max_relative_change=0.5, tolerance=1.0e-10) num_samples = int(cpus * 1.3) best_point_history = [] # Sum of errors buffer var_buffer = deque(maxlen=min_samps) for i in np.arange(0, num_iters): # Optimize qEI and pick samples points_to_sample, ei = gen_sample_from_qei(gp_ll.models[0], c_search_domain, sgd_params=sgd_params, num_samples=num_samples, num_mc=2**10) # Collect observations sampled_points = -f.evaluate(points_to_sample) evidence = [ SamplePoint(c, v, 0.0) for c, v in zip(points_to_sample, sampled_points) ] # Update model gp_ll.add_sampled_points(evidence) gp_ll.train() # Pull model and pull values gp = gp_ll.models[0] min_point = np.argmin(gp._points_sampled_value) min_val = np.min(gp._points_sampled_value) best_coord = gp.get_historical_data_copy().points_sampled[min_point] logging.info('Iteration {} of {}'.format(i, num_iters)) logging.info('Recommended Points:') logging.info(points_to_sample) logging.info('Expected Improvement: {}'.format(ei)) logging.info('Current Best:') logging.info(f'f(x*)= {min_val}') logging.info(f'Coord: {best_coord}') best_point_history.append(str(min_val)) if history: with open(history, 'w') as buf: buf.write('\n'.join(best_point_history)) # Convergence check if (len(var_buffer) == var_buffer.maxlen) and not skip_convergence: deviation = sum([abs(x - min_val) for x in var_buffer]) if deviation < tol: logging.info('Convergence reached!') logging.info('Deviation: {}'.format(deviation)) logging.info('History length: {}'.format(var_buffer.maxlen)) logging.info('Tolerance: {}'.format(tol)) break var_buffer.append(min_val) # Save position and orientation matrix np.savetxt(output_file, best_coord)
def multistart_hyperparameter_optimization( hyperparameter_optimizer, num_multistarts, randomness=None, max_num_threads=DEFAULT_MAX_NUM_THREADS, status=None, ): r"""Select the hyperparameters that maximize the specified log likelihood measure of model fit (over the historical data) within the specified domain. .. Note:: The following comments are copied from :func:`moe.optimal_learning.python.cpp_wrappers.log_likelihood.multistart_hyperparameter_optimization`. See :class:`moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLogMarginalLikelihood` and :class:`moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLeaveOneOutLogLikelihood` for an overview of some example log likelihood-like measures. Optimizers are: null ('dumb' search), gradient descent, newton Newton is the suggested optimizer, which is not presently available in Python (use the C++ interface). In Python, gradient descent is suggested. TODO(GH-57): Implement hessians and Newton's method. 'dumb' search means this will just evaluate the objective log likelihood measure at num_multistarts 'points' (hyperparameters) in the domain, uniformly sampled using latin hypercube sampling. See gpp_python_common.cpp for C++ enum declarations laying out the options for objective and optimizer types. Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for sizing the domain and gd_parameters.num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely. Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a true optima (i.e., the gradient may be substantially nonzero). .. WARNING:: this function fails if NO improvement can be found! In that case, the output will always be the first randomly chosen point. status will report failure. TODO(GH-56): Allow callers to pass in a source of randomness. :param hyperparameter_optimizer: object that optimizes (e.g., gradient descent, newton) the desired log_likelihood measure over a domain (wrt the hyperparameters of covariance) :type hyperparameter_optimizer: interfaces.optimization_interfaces.OptimizerInterface subclass :param num_multistarts: number of times to multistart ``hyperparameter_optimizer`` :type num_multistarts: int > 0 :param randomness: random source used to generate multistart points (UNUSED) :type randomness: (UNUSED) :param max_num_threads: maximum number of threads to use, >= 1 (UNUSED) :type max_num_threads: int > 0 :param status: (output) status messages (e.g., reporting on optimizer success, etc.) :type status: dict :return: hyperparameters that maximize the specified log likelihood measure within the specified domain :rtype: array of float64 with shape (log_likelihood_evaluator.num_hyperparameters) """ # Producing the random starts in log10 space improves robustness by clustering some extra points near 0 domain_bounds_log10 = numpy.log10( hyperparameter_optimizer.domain._domain_bounds) domain_log10 = TensorProductDomain( ClosedInterval.build_closed_intervals_from_list(domain_bounds_log10)) random_starts = domain_log10.generate_uniform_random_points_in_domain( num_points=num_multistarts) random_starts = numpy.power(10.0, random_starts) best_hyperparameters, _ = multistart_optimize( hyperparameter_optimizer, starting_points=random_starts) # TODO(GH-59): Have GD actually indicate whether updates were found, e.g., in an IOContainer-like structure. found_flag = True if status is not None: status["gradient_descent_found_update"] = found_flag return best_hyperparameters
__author__ = 'jialeiwang' # construct problem instance given CMD args # format: run_pes.py ${benchmark_name} ${func_idx} ${repl_no} argv = sys.argv[1:] if argv[0].find("pes") < 0: raise ValueError("benchmark is not pes!") problem = identify_problem(argv, bucket) num_threads = 6 num_multistart = 6 # Transform data to (0,1)^d space lower_bounds = problem.obj_func_min._search_domain[:, 0] upper_bounds = problem.obj_func_min._search_domain[:, 1] unit_domain = TensorProductDomain([ ClosedInterval(0.0, 1.0) for _ in range(len(problem.obj_func_min._search_domain)) ]) transformed_data = HistoricalData(problem.obj_func_min.getDim() + 1) for pt, val, var in zip(problem.hist_data.points_sampled, problem.hist_data.points_sampled_value, problem.hist_data.points_sampled_noise_variance): transformed_data.append_sample_points([ [ numpy.concatenate( ([pt[0]], scale_forward(pt[1:], lower_bounds, upper_bounds))), val, var ], ]) # entropy search begins
if IS != 0 and IS != 1: raise ValueError("IS has to be 0 or 1") elif IS == 0: val += numpy.random.normal() elif IS == 1: val += 2. * numpy.sin(10.0 * x[0] + 5.0 * x[1]) return self._mult * val def noise_and_cost_func(self, IS, x): if IS != 0 and IS != 1: raise ValueError("IS has to be 0 or 1") return (1., 50) if IS == 0 else (1e-6, 1.) def getFuncName(self): return 'rbNew' if __name__ == "__main__": rb_remi = RosenbrockRemi() python_search_domain = TensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in rb_remi._search_domain ]) pts = python_search_domain.generate_uniform_random_points_in_domain(1000) print "rb remi" print "IS0: {0}".format(numpy.mean([rb_remi.evaluate(0, x) for x in pts])) print "IS1: {0}".format(numpy.mean([rb_remi.evaluate(1, x) for x in pts])) print "rb new" rb_new = RosenbrockNew() print "IS0: {0}".format(numpy.mean([rb_new.evaluate(0, x) for x in pts])) print "IS1: {0}".format(numpy.mean([rb_new.evaluate(1, x) for x in pts]))
class GradientDescentOptimizerTest(OptimalLearningTestCase): r"""Test Gradient Descent on a simple quadratic objective. We check GD in an unconstrained setting, a constrained setting, and we test multistarting it. We don't test the stochastic averaging option meaningfully. We check that the optimizer will average the number of steps specified by input. We also check that the simple unconstrained case can also be solved with averaging on\*. \* This is not much of a test. The problem is convex and isotropic so GD will take a more or less straight path to the maxima. Averaging can only reduce the accuracy of the solve. TODO(GH-179): Build a simple stochastic objective and test the stochastic component fully. """ @T.class_setup def base_setup(self): """Set up a test case for optimizing a simple quadratic polynomial.""" self.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * self.dim self.domain = TensorProductDomain(domain_bounds) maxima_point = numpy.full(self.dim, 0.5) current_point = numpy.zeros(self.dim) self.polynomial = QuadraticFunction(maxima_point, current_point) max_num_steps = 250 max_num_restarts = 10 num_steps_averaged = 0 gamma = 0.7 # smaller gamma would lead to faster convergence, but we don't want to make the problem too easy pre_mult = 1.0 max_relative_change = 0.8 tolerance = 1.0e-12 self.gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) approx_grad = False max_func_evals = 150000 max_metric_correc = 10 factr = 1000.0 pgtol = 1e-10 epsilon = 1e-8 self.BFGS_parameters = LBFGSBParameters( approx_grad, max_func_evals, max_metric_correc, factr, pgtol, epsilon, ) def test_gradient_descent_optimizer(self): """Check that gradient descent can find the optimum of the quadratic test objective.""" # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that gradient descent does not move from the optima if we start it there. gradient_descent_optimizer = GradientDescentOptimizer( self.domain, self.polynomial, self.gd_parameters) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 0.0) # Start at a wrong point and check optimization tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance) def test_get_averaging_range(self): """Test the method used to produce what interval to average over in Polyak-Ruppert averaging.""" num_steps_total = 250 end = num_steps_total + 1 num_steps_averaged_input_list = [ -1, 0, 1, 20, 100, 249, 250, 251, 10000 ] truth_list = [(1, end), (250, end), (250, end), (231, end), (151, end), (2, end), (1, end), (1, end), (1, end)] for i, truth in enumerate(truth_list): start, end = GradientDescentOptimizer._get_averaging_range( num_steps_averaged_input_list[i], num_steps_total) T.assert_equal(start, truth[0]) T.assert_equal(end, truth[1]) def test_gradient_descent_optimizer_with_averaging(self): """Check that gradient descent can find the optimum of the quadratic test objective with averaging on. This test doesn't exercise the purpose of averaging (i.e., this objective isn't stochastic), but it does check that it at least runs. """ num_steps_averaged = self.gd_parameters.max_num_steps * 3 / 4 gd_parameters_averaging = GradientDescentParameters( self.gd_parameters.max_num_steps, self.gd_parameters.max_num_restarts, num_steps_averaged, self.gd_parameters.gamma, self.gd_parameters.pre_mult, self.gd_parameters.max_relative_change, self.gd_parameters.tolerance, ) # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that gradient descent does not move from the optima if we start it there. gradient_descent_optimizer = GradientDescentOptimizer( self.domain, self.polynomial, gd_parameters_averaging) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 0.0) # Start at a wrong point and check optimization tolerance = 2.0e-10 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance) def test_gradient_descent_optimizer_constrained(self): """Check that gradient descent can find the global optimum (in a domain) when the true optimum is outside.""" # Domain where the optimum, (0.5, 0.5, 0.5), lies outside the domain domain_bounds = [ ClosedInterval(0.05, 0.32), ClosedInterval(0.05, 0.6), ClosedInterval(0.05, 0.32) ] domain = TensorProductDomain(domain_bounds) gradient_descent_optimizer = GradientDescentOptimizer( domain, self.polynomial, self.gd_parameters) # Work out what the maxima point woudl be given the domain constraints (i.e., project to the nearest point on domain) constrained_optimum_point = self.polynomial.optimum_point for i, bounds in enumerate(domain_bounds): if constrained_optimum_point[i] > bounds.max: constrained_optimum_point[i] = bounds.max elif constrained_optimum_point[i] < bounds.min: constrained_optimum_point[i] = bounds.min tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess initial_value = gradient_descent_optimizer.objective_function.compute_objective_function( ) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, constrained_optimum_point, tolerance) # Verify optimized value is better than initial guess final_value = self.polynomial.compute_objective_function() T.assert_gt(final_value, initial_value) # Verify derivative: only get 0 derivative if the coordinate lies inside domain boundaries gradient = self.polynomial.compute_grad_objective_function() for i, bounds in enumerate(domain_bounds): if bounds.is_inside(self.polynomial.optimum_point[i]): self.assert_scalar_within_relative(gradient[i], 0.0, tolerance) def test_multistarted_gradient_descent_optimizer_crippled_start(self): """Check that multistarted GD is finding the best result from GD.""" # Only allow 1 GD iteration. gd_parameters_crippled = GradientDescentParameters( 1, 1, self.gd_parameters.num_steps_averaged, self.gd_parameters.gamma, self.gd_parameters.pre_mult, self.gd_parameters.max_relative_change, self.gd_parameters.tolerance, ) gradient_descent_optimizer_crippled = GradientDescentOptimizer( self.domain, self.polynomial, gd_parameters_crippled) num_points = 15 points = self.domain.generate_uniform_random_points_in_domain( num_points) multistart_optimizer = MultistartOptimizer( gradient_descent_optimizer_crippled, num_points) test_best_point, _ = multistart_optimizer.optimize( random_starts=points) # This point set won't include the optimum so multistart GD won't find it. for value in (test_best_point - self.polynomial.optimum_point): T.assert_not_equal(value, 0.0) points_with_opt = numpy.append(points, self.polynomial.optimum_point.reshape( (1, self.polynomial.dim)), axis=0) test_best_point, _ = multistart_optimizer.optimize( random_starts=points_with_opt) # This point set will include the optimum so multistart GD will find it. for value in (test_best_point - self.polynomial.optimum_point): T.assert_equal(value, 0.0) def test_multistarted_gradient_descent_optimizer(self): """Check that multistarted GD can find the optimum in a 'very' large domain.""" # Set a large domain: a single GD run is unlikely to reach the optimum domain_bounds = [ClosedInterval(-10.0, 10.0)] * self.dim domain = TensorProductDomain(domain_bounds) tolerance = 2.0e-10 num_points = 10 gradient_descent_optimizer = GradientDescentOptimizer( domain, self.polynomial, self.gd_parameters) multistart_optimizer = MultistartOptimizer(gradient_descent_optimizer, num_points) output, _ = multistart_optimizer.optimize() # Verify coordinates self.assert_vector_within_relative(output, self.polynomial.optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance) def test_bfgs_optimizer(self): """Check that BFGS can find the optimum of the quadratic test objective.""" # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that gradient descent does not move from the optima if we start it there. bfgs_optimizer = LBFGSBOptimizer(self.domain, self.polynomial, self.BFGS_parameters) bfgs_optimizer.optimize() output = bfgs_optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 0.0) # Start at a wrong point and check optimization tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) bfgs_optimizer.objective_function.current_point = initial_guess bfgs_optimizer.optimize() output = bfgs_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance) def test_multistarted_bfgs_optimizer(self): """Check that multistarted GD can find the optimum in a 'very' large domain.""" # Set a large domain: a single GD run is unlikely to reach the optimum domain_bounds = [ClosedInterval(-10.0, 10.0)] * self.dim domain = TensorProductDomain(domain_bounds) tolerance = 2.0e-10 num_points = 10 bfgs_optimizer = LBFGSBOptimizer(domain, self.polynomial, self.BFGS_parameters) multistart_optimizer = MultistartOptimizer(bfgs_optimizer, num_points) output, _ = multistart_optimizer.optimize() # Verify coordinates self.assert_vector_within_relative(output, self.polynomial.optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
def get_moe_domain(self): return TensorProductDomain([ ClosedInterval(bound[0], bound[1]) for bound in self._search_domain ])
def multistart_hyperparameter_optimization( hyperparameter_optimizer, num_multistarts, randomness=None, max_num_threads=DEFAULT_MAX_NUM_THREADS, status=None, ): r"""Select the hyperparameters that maximize the specified log likelihood measure of model fit (over the historical data) within the specified domain. .. Note:: The following comments are copied from :func:`moe.optimal_learning.python.cpp_wrappers.log_likelihood.multistart_hyperparameter_optimization`. See :class:`moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLogMarginalLikelihood` and :class:`moe.optimal_learning.python.python_version.log_likelihood.GaussianProcessLeaveOneOutLogLikelihood` for an overview of some example log likelihood-like measures. Optimizers are: null ('dumb' search), gradient descent, newton Newton is the suggested optimizer, which is not presently available in Python (use the C++ interface). In Python, gradient descent is suggested. TODO(GH-57): Implement hessians and Newton's method. 'dumb' search means this will just evaluate the objective log likelihood measure at num_multistarts 'points' (hyperparameters) in the domain, uniformly sampled using latin hypercube sampling. See gpp_python_common.cpp for C++ enum declarations laying out the options for objective and optimizer types. Currently, during optimization, we recommend that the coordinates of the initial guesses not differ from the coordinates of the optima by more than about 1 order of magnitude. This is a very (VERY!) rough guideline for sizing the domain and gd_parameters.num_multistarts; i.e., be wary of sets of initial guesses that cover the space too sparsely. Solution is guaranteed to lie within the region specified by "domain"; note that this may not be a true optima (i.e., the gradient may be substantially nonzero). .. WARNING:: this function fails if NO improvement can be found! In that case, the output will always be the first randomly chosen point. status will report failure. TODO(GH-56): Allow callers to pass in a source of randomness. :param hyperparameter_optimizer: object that optimizes (e.g., gradient descent, newton) the desired log_likelihood measure over a domain (wrt the hyperparameters of covariance) :type hyperparameter_optimizer: interfaces.optimization_interfaces.OptimizerInterface subclass :param num_multistarts: number of times to multistart ``hyperparameter_optimizer`` :type num_multistarts: int > 0 :param randomness: random source used to generate multistart points (UNUSED) :type randomness: (UNUSED) :param max_num_threads: maximum number of threads to use, >= 1 (UNUSED) :type max_num_threads: int > 0 :param status: status messages (e.g., reporting on optimizer success, etc.) :type status: dict :return: hyperparameters that maximize the specified log likelihood measure within the specified domain :rtype: array of float64 with shape (log_likelihood_evaluator.num_hyperparameters) """ # Producing the random starts in log10 space improves robustness by clustering some extra points near 0 domain_bounds_log10 = numpy.log10(hyperparameter_optimizer.domain._domain_bounds) domain_log10 = TensorProductDomain(ClosedInterval.build_closed_intervals_from_list(domain_bounds_log10)) random_starts = domain_log10.generate_uniform_random_points_in_domain(num_points=num_multistarts) random_starts = numpy.power(10.0, random_starts) best_hyperparameters, _ = multistart_optimize(hyperparameter_optimizer, starting_points=random_starts) # TODO(GH-59): Have GD actually indicate whether updates were found, e.g., in an IOContainer-like structure. found_flag = True if status is not None: status["gradient_descent_found_update"] = found_flag return best_hyperparameters