Exemplo n.º 1
0
    def test_multistart_qei_expected_improvement_dfo(self):
        """Check that multistart optimization (BFGS) can find the optimum point to sample (using 2-EI)."""
        numpy.random.seed(7860)
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20))
        domain, gaussian_process = self.gp_test_environments[index]

        tolerance = 6.0e-5
        num_multistarts = 3

        # Expand the domain so that we are definitely not doing constrained optimization
        expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 3.0)] *
                                              self.dim)
        num_to_sample = 2
        repeated_domain = RepeatedDomain(num_to_sample, expanded_domain)

        num_mc_iterations = 100000
        # Just any random point that won't be optimal
        points_to_sample = repeated_domain.generate_random_point_in_domain()
        ei_eval = ExpectedImprovement(gaussian_process,
                                      points_to_sample,
                                      num_mc_iterations=num_mc_iterations)
        # Compute EI and its gradient for the sake of comparison
        ei_initial = ei_eval.compute_expected_improvement()

        ei_optimizer = LBFGSBOptimizer(repeated_domain, ei_eval,
                                       self.BFGS_parameters)
        best_point = multistart_expected_improvement_optimization(
            ei_optimizer, num_multistarts, num_to_sample)

        # Check that gradients are "small" or on border. MC is very inaccurate near 0, so use finite difference
        # gradient instead.
        ei_eval.current_point = best_point
        ei_final = ei_eval.compute_expected_improvement()

        finite_diff_grad = numpy.zeros(best_point.shape)
        h_value = 0.00001
        for i in range(best_point.shape[0]):
            for j in range(best_point.shape[1]):
                best_point[i, j] += h_value
                ei_eval.current_point = best_point
                ei_upper = ei_eval.compute_expected_improvement()
                best_point[i, j] -= 2 * h_value
                ei_eval.current_point = best_point
                ei_lower = ei_eval.compute_expected_improvement()
                best_point[i, j] += h_value
                finite_diff_grad[i, j] = (ei_upper - ei_lower) / (2 * h_value)

        self.assert_vector_within_relative(finite_diff_grad,
                                           numpy.zeros(finite_diff_grad.shape),
                                           tolerance)

        # Check that output is in the domain
        assert repeated_domain.check_point_inside(best_point) is True

        # Since we didn't really converge to the optimal EI (too costly), do some other sanity checks
        # EI should have improved
        assert ei_final >= ei_initial
Exemplo n.º 2
0
    def test_multistart_qei_expected_improvement_dfo(self):
        """Check that multistart optimization (BFGS) can find the optimum point to sample (using 2-EI)."""
        numpy.random.seed(7860)
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20))
        domain, gaussian_process = self.gp_test_environments[index]

        tolerance = 6.0e-5
        num_multistarts = 3

        # Expand the domain so that we are definitely not doing constrained optimization
        expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 3.0)] * self.dim)
        num_to_sample = 2
        repeated_domain = RepeatedDomain(num_to_sample, expanded_domain)

        num_mc_iterations = 100000
        # Just any random point that won't be optimal
        points_to_sample = repeated_domain.generate_random_point_in_domain()
        ei_eval = ExpectedImprovement(gaussian_process, points_to_sample, num_mc_iterations=num_mc_iterations)
        # Compute EI and its gradient for the sake of comparison
        ei_initial = ei_eval.compute_expected_improvement()

        ei_optimizer = LBFGSBOptimizer(repeated_domain, ei_eval, self.BFGS_parameters)
        best_point = multistart_expected_improvement_optimization(ei_optimizer, num_multistarts, num_to_sample)

        # Check that gradients are "small" or on border. MC is very inaccurate near 0, so use finite difference
        # gradient instead.
        ei_eval.current_point = best_point
        ei_final = ei_eval.compute_expected_improvement()

        finite_diff_grad = numpy.zeros(best_point.shape)
        h_value = 0.00001
        for i in range(best_point.shape[0]):
            for j in range(best_point.shape[1]):
                best_point[i, j] += h_value
                ei_eval.current_point = best_point
                ei_upper = ei_eval.compute_expected_improvement()
                best_point[i, j] -= 2 * h_value
                ei_eval.current_point = best_point
                ei_lower = ei_eval.compute_expected_improvement()
                best_point[i, j] += h_value
                finite_diff_grad[i, j] = (ei_upper - ei_lower) / (2 * h_value)

        self.assert_vector_within_relative(finite_diff_grad, numpy.zeros(finite_diff_grad.shape), tolerance)

        # Check that output is in the domain
        assert repeated_domain.check_point_inside(best_point) is True

        # Since we didn't really converge to the optimal EI (too costly), do some other sanity checks
        # EI should have improved
        assert ei_final >= ei_initial
Exemplo n.º 3
0
    def test_multistart_monte_carlo_expected_improvement_optimization(self):
        """Check that multistart optimization (gradient descent) can find the optimum point to sample (using 2-EI)."""
        numpy.random.seed(7858)  # TODO(271): Monte Carlo only works for this seed
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20))
        domain, gaussian_process = self.gp_test_environments[index]

        max_num_steps = 75  # this is *too few* steps; we configure it this way so the test will run quickly
        max_num_restarts = 5
        num_steps_averaged = 50
        gamma = 0.2
        pre_mult = 1.5
        max_relative_change = 1.0
        tolerance = 3.0e-2  # really large tolerance b/c converging with monte-carlo (esp in Python) is expensive
        gd_parameters = GradientDescentParameters(
            max_num_steps,
            max_num_restarts,
            num_steps_averaged,
            gamma,
            pre_mult,
            max_relative_change,
            tolerance,
        )
        num_multistarts = 2

        # Expand the domain so that we are definitely not doing constrained optimization
        expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 2.0)] * self.dim)
        num_to_sample = 2
        repeated_domain = RepeatedDomain(num_to_sample, expanded_domain)

        num_mc_iterations = 10000
        # Just any random point that won't be optimal
        points_to_sample = repeated_domain.generate_random_point_in_domain()
        ei_eval = ExpectedImprovement(gaussian_process, points_to_sample, num_mc_iterations=num_mc_iterations)
        # Compute EI and its gradient for the sake of comparison
        ei_initial = ei_eval.compute_expected_improvement(force_monte_carlo=True)  # TODO(271) Monte Carlo only works for this seed
        grad_ei_initial = ei_eval.compute_grad_expected_improvement()

        ei_optimizer = GradientDescentOptimizer(repeated_domain, ei_eval, gd_parameters)
        best_point = multistart_expected_improvement_optimization(ei_optimizer, num_multistarts, num_to_sample)

        # Check that gradients are "small"
        ei_eval.current_point = best_point
        ei_final = ei_eval.compute_expected_improvement(force_monte_carlo=True)  # TODO(271) Monte Carlo only works for this seed
        grad_ei_final = ei_eval.compute_grad_expected_improvement()
        self.assert_vector_within_relative(grad_ei_final, numpy.zeros(grad_ei_final.shape), tolerance)

        # Check that output is in the domain
        assert repeated_domain.check_point_inside(best_point) is True

        # Since we didn't really converge to the optimal EI (too costly), do some other sanity checks
        # EI should have improved
        assert ei_final >= ei_initial

        # grad EI should have improved
        for index in numpy.ndindex(grad_ei_final.shape):
            assert numpy.fabs(grad_ei_final[index]) <= numpy.fabs(grad_ei_initial[index])
Exemplo n.º 4
0
    def test_multistart_analytic_expected_improvement_optimization(self):
        """Check that multistart optimization (gradient descent) can find the optimum point to sample (using 1D analytic EI)."""
        numpy.random.seed(3148)
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20))
        domain, gaussian_process = self.gp_test_environments[index]

        max_num_steps = 200  # this is generally *too few* steps; we configure it this way so the test will run quickly
        max_num_restarts = 5
        num_steps_averaged = 0
        gamma = 0.2
        pre_mult = 1.5
        max_relative_change = 1.0
        tolerance = 1.0e-7
        gd_parameters = GradientDescentParameters(
            max_num_steps,
            max_num_restarts,
            num_steps_averaged,
            gamma,
            pre_mult,
            max_relative_change,
            tolerance,
        )
        num_multistarts = 3

        points_to_sample = domain.generate_random_point_in_domain()
        ei_eval = ExpectedImprovement(gaussian_process, points_to_sample)

        # expand the domain so that we are definitely not doing constrained optimization
        expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 2.0)] *
                                              self.dim)

        num_to_sample = 1
        repeated_domain = RepeatedDomain(ei_eval.num_to_sample,
                                         expanded_domain)
        ei_optimizer = GradientDescentOptimizer(repeated_domain, ei_eval,
                                                gd_parameters)
        best_point = multistart_expected_improvement_optimization(
            ei_optimizer, num_multistarts, num_to_sample)

        # Check that gradients are small
        ei_eval.current_point = best_point
        gradient = ei_eval.compute_grad_expected_improvement()
        self.assert_vector_within_relative(gradient,
                                           numpy.zeros(gradient.shape),
                                           tolerance)

        # Check that output is in the domain
        assert repeated_domain.check_point_inside(best_point) is True
Exemplo n.º 5
0
    def test_evaluate_ei_at_points(self):
        """Check that ``evaluate_expected_improvement_at_point_list`` computes and orders results correctly (using 1D analytic EI)."""
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 5))
        domain, gaussian_process = self.gp_test_environments[index]

        points_to_sample = domain.generate_random_point_in_domain()
        ei_eval = ExpectedImprovement(gaussian_process, points_to_sample)

        num_to_eval = 10
        # Add in a newaxis to make num_to_sample explicitly 1
        points_to_evaluate = domain.generate_uniform_random_points_in_domain(num_to_eval)[:, numpy.newaxis, :]

        test_values = ei_eval.evaluate_at_point_list(points_to_evaluate)

        for i, value in enumerate(test_values):
            ei_eval.current_point = points_to_evaluate[i, ...]
            truth = ei_eval.compute_expected_improvement()
            assert value == truth
Exemplo n.º 6
0
    def test_multistart_analytic_expected_improvement_optimization(self):
        """Check that multistart optimization (gradient descent) can find the optimum point to sample (using 1D analytic EI)."""
        numpy.random.seed(3148)
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20))
        domain, gaussian_process = self.gp_test_environments[index]

        max_num_steps = 200  # this is generally *too few* steps; we configure it this way so the test will run quickly
        max_num_restarts = 5
        num_steps_averaged = 0
        gamma = 0.2
        pre_mult = 1.5
        max_relative_change = 1.0
        tolerance = 1.0e-7
        gd_parameters = GradientDescentParameters(
            max_num_steps,
            max_num_restarts,
            num_steps_averaged,
            gamma,
            pre_mult,
            max_relative_change,
            tolerance,
        )
        num_multistarts = 3

        points_to_sample = domain.generate_random_point_in_domain()
        ei_eval = ExpectedImprovement(gaussian_process, points_to_sample)

        # expand the domain so that we are definitely not doing constrained optimization
        expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 2.0)] * self.dim)

        num_to_sample = 1
        repeated_domain = RepeatedDomain(ei_eval.num_to_sample, expanded_domain)
        ei_optimizer = GradientDescentOptimizer(repeated_domain, ei_eval, gd_parameters)
        best_point = multistart_expected_improvement_optimization(ei_optimizer, num_multistarts, num_to_sample)

        # Check that gradients are small
        ei_eval.current_point = best_point
        gradient = ei_eval.compute_grad_expected_improvement()
        self.assert_vector_within_relative(gradient, numpy.zeros(gradient.shape), tolerance)

        # Check that output is in the domain
        assert repeated_domain.check_point_inside(best_point) is True
Exemplo n.º 7
0
    def compute_next_points_to_sample_response(self, params, optimizer_method_name, route_name, *args, **kwargs):
        """Compute the next points to sample (and their expected improvement) using optimizer_method_name from params in the request.

        .. Warning:: Attempting to find ``num_to_sample`` optimal points with
          ``num_sampled < num_to_sample`` historical points sampled can cause matrix issues under
          some conditions. Try requesting ``num_to_sample < num_sampled`` points for better
          performance. To bootstrap more points try sampling at random, or from a grid.

        :param request_params: the deserialized REST request, containing ei_optimizer_parameters and gp_historical_info
        :type request_params: a deserialized self.request_schema object as a dict
        :param optimizer_method_name: the optimization method to use
        :type optimizer_method_name: string in :const:`moe.views.constant.NEXT_POINTS_OPTIMIZER_METHOD_NAMES`
        :param route_name: name of the route being called
        :type route_name: string in :const:`moe.views.constant.ALL_REST_ROUTES_ROUTE_NAME_TO_ENDPOINT`
        :param ``*args``: extra args to be passed to optimization method
        :param ``**kwargs``: extra kwargs to be passed to optimization method

        """
        points_being_sampled = numpy.array(params.get('points_being_sampled'))
        num_to_sample = params.get('num_to_sample')
        num_mc_iterations = params.get('mc_iterations')
        max_num_threads = params.get('max_num_threads')

        gaussian_process = _make_gp_from_params(params)

        ei_opt_status = {}
        # TODO(GH-89): Make the optimal_learning library handle this case 'organically' with
        # reasonable default behavior and remove hacks like this one.
        if gaussian_process.num_sampled == 0:
            # If there is no initial data we bootstrap with random points
            py_domain = _make_domain_from_params(params, python_version=True)
            next_points = py_domain.generate_uniform_random_points_in_domain(num_to_sample)
            ei_opt_status['found_update'] = True
            expected_improvement_evaluator = PythonExpectedImprovement(
                    gaussian_process,
                    points_being_sampled=points_being_sampled,
                    num_mc_iterations=num_mc_iterations,
                    )
        else:
            # Calculate the next best points to sample given the historical data

            optimizer_class, optimizer_parameters, num_random_samples = _make_optimizer_parameters_from_params(params)

            if optimizer_class == python_optimization.LBFGSBOptimizer:
                domain = RepeatedDomain(num_to_sample, _make_domain_from_params(params, python_version=True))
                expected_improvement_evaluator = PythonExpectedImprovement(
                        gaussian_process,
                        points_being_sampled=points_being_sampled,
                        num_mc_iterations=num_mc_iterations,
                        mvndst_parameters=_make_mvndst_parameters_from_params(params)
                        )

                opt_method = getattr(moe.optimal_learning.python.python_version.expected_improvement, optimizer_method_name)
            else:
                domain = _make_domain_from_params(params, python_version=False)
                expected_improvement_evaluator = ExpectedImprovement(
                        gaussian_process,
                        points_being_sampled=points_being_sampled,
                        num_mc_iterations=num_mc_iterations,
                        )

                opt_method = getattr(moe.optimal_learning.python.cpp_wrappers.expected_improvement, optimizer_method_name)

            expected_improvement_optimizer = optimizer_class(
                    domain,
                    expected_improvement_evaluator,
                    optimizer_parameters,
                    num_random_samples=num_random_samples,
                    )

            with timing_context(EPI_OPTIMIZATION_TIMING_LABEL):
                next_points = opt_method(
                    expected_improvement_optimizer,
                    params.get('optimizer_info')['num_multistarts'],  # optimizer_parameters.num_multistarts,
                    num_to_sample,
                    max_num_threads=max_num_threads,
                    status=ei_opt_status,
                    *args,
                    **kwargs
                )

        # TODO(GH-285): Use analytic q-EI here
        # TODO(GH-314): Need to resolve poential issue with NaNs before using q-EI here
        # It may be sufficient to check found_update == False in ei_opt_status
        # and then use q-EI, else set EI = 0.
        expected_improvement_evaluator.current_point = next_points
        # The C++ may fail to compute EI with some ``next_points`` inputs (e.g.,
        # ``points_to_sample`` and ``points_begin_sampled`` are too close
        # together or too close to ``points_sampled``). We catch the exception when this happens
        # and attempt a more numerically robust option.
        try:
            expected_improvement = expected_improvement_evaluator.compute_expected_improvement()
        except Exception as exception:
            self.log.info('EI computation failed, probably b/c GP-variance matrix is singular. Error: {0:s}'.format(exception))

            # ``_compute_expected_improvement_monte_carlo`` in
            # :class:`moe.optimal_learning.python.python_version.expected_improvement.ExpectedImprovement`
            # has a more reliable (but very expensive) way to deal with singular variance matrices.
            python_ei_eval = PythonExpectedImprovement(
                expected_improvement_evaluator._gaussian_process,
                points_to_sample=next_points,
                points_being_sampled=points_being_sampled,
                num_mc_iterations=num_mc_iterations,
            )
            expected_improvement = python_ei_eval.compute_expected_improvement(force_monte_carlo=True)

        return self.form_response({
                'endpoint': route_name,
                'points_to_sample': next_points.tolist(),
                'status': {
                    'expected_improvement': expected_improvement,
                    'optimizer_success': ei_opt_status,
                    },
                })
Exemplo n.º 8
0
    def compute_next_points_to_sample_response(self, params,
                                               optimizer_method_name,
                                               route_name, *args, **kwargs):
        """Compute the next points to sample (and their expected improvement) using optimizer_method_name from params in the request.

        .. Warning:: Attempting to find ``num_to_sample`` optimal points with
          ``num_sampled < num_to_sample`` historical points sampled can cause matrix issues under
          some conditions. Try requesting ``num_to_sample < num_sampled`` points for better
          performance. To bootstrap more points try sampling at random, or from a grid.

        :param request_params: the deserialized REST request, containing ei_optimizer_parameters and gp_historical_info
        :type request_params: a deserialized self.request_schema object as a dict
        :param optimizer_method_name: the optimization method to use
        :type optimizer_method_name: string in :const:`moe.views.constant.NEXT_POINTS_OPTIMIZER_METHOD_NAMES`
        :param route_name: name of the route being called
        :type route_name: string in :const:`moe.views.constant.ALL_REST_ROUTES_ROUTE_NAME_TO_ENDPOINT`
        :param ``*args``: extra args to be passed to optimization method
        :param ``**kwargs``: extra kwargs to be passed to optimization method

        """
        points_being_sampled = numpy.array(params.get('points_being_sampled'))
        num_to_sample = params.get('num_to_sample')
        num_mc_iterations = params.get('mc_iterations')
        max_num_threads = params.get('max_num_threads')

        gaussian_process = _make_gp_from_params(params)

        ei_opt_status = {}
        # TODO(GH-89): Make the optimal_learning library handle this case 'organically' with
        # reasonable default behavior and remove hacks like this one.
        if gaussian_process.num_sampled == 0:
            # If there is no initial data we bootstrap with random points
            py_domain = _make_domain_from_params(params, python_version=True)
            next_points = py_domain.generate_uniform_random_points_in_domain(
                num_to_sample)
            ei_opt_status['found_update'] = True
            expected_improvement_evaluator = PythonExpectedImprovement(
                gaussian_process,
                points_being_sampled=points_being_sampled,
                num_mc_iterations=num_mc_iterations,
            )
        else:
            # Calculate the next best points to sample given the historical data

            optimizer_class, optimizer_parameters, num_random_samples = _make_optimizer_parameters_from_params(
                params)

            if optimizer_class == python_optimization.LBFGSBOptimizer:
                domain = RepeatedDomain(
                    num_to_sample,
                    _make_domain_from_params(params, python_version=True))
                expected_improvement_evaluator = PythonExpectedImprovement(
                    gaussian_process,
                    points_being_sampled=points_being_sampled,
                    num_mc_iterations=num_mc_iterations,
                    mvndst_parameters=_make_mvndst_parameters_from_params(
                        params))

                opt_method = getattr(
                    moe.optimal_learning.python.python_version.
                    expected_improvement, optimizer_method_name)
            else:
                domain = _make_domain_from_params(params, python_version=False)
                expected_improvement_evaluator = ExpectedImprovement(
                    gaussian_process,
                    points_being_sampled=points_being_sampled,
                    num_mc_iterations=num_mc_iterations,
                )

                opt_method = getattr(
                    moe.optimal_learning.python.cpp_wrappers.
                    expected_improvement, optimizer_method_name)

            expected_improvement_optimizer = optimizer_class(
                domain,
                expected_improvement_evaluator,
                optimizer_parameters,
                num_random_samples=num_random_samples,
            )

            with timing_context(EPI_OPTIMIZATION_TIMING_LABEL):
                next_points = opt_method(
                    expected_improvement_optimizer,
                    params.get('optimizer_info')
                    ['num_multistarts'],  # optimizer_parameters.num_multistarts,
                    num_to_sample,
                    max_num_threads=max_num_threads,
                    status=ei_opt_status,
                    *args,
                    **kwargs)

        # TODO(GH-285): Use analytic q-EI here
        # TODO(GH-314): Need to resolve poential issue with NaNs before using q-EI here
        # It may be sufficient to check found_update == False in ei_opt_status
        # and then use q-EI, else set EI = 0.
        expected_improvement_evaluator.current_point = next_points
        # The C++ may fail to compute EI with some ``next_points`` inputs (e.g.,
        # ``points_to_sample`` and ``points_begin_sampled`` are too close
        # together or too close to ``points_sampled``). We catch the exception when this happens
        # and attempt a more numerically robust option.
        try:
            expected_improvement = expected_improvement_evaluator.compute_expected_improvement(
            )
        except Exception as exception:
            self.log.info(
                'EI computation failed, probably b/c GP-variance matrix is singular. Error: {0:s}'
                .format(exception))

            # ``_compute_expected_improvement_monte_carlo`` in
            # :class:`moe.optimal_learning.python.python_version.expected_improvement.ExpectedImprovement`
            # has a more reliable (but very expensive) way to deal with singular variance matrices.
            python_ei_eval = PythonExpectedImprovement(
                expected_improvement_evaluator._gaussian_process,
                points_to_sample=next_points,
                points_being_sampled=points_being_sampled,
                num_mc_iterations=num_mc_iterations,
            )
            expected_improvement = python_ei_eval.compute_expected_improvement(
                force_monte_carlo=True)

        return self.form_response({
            'endpoint': route_name,
            'points_to_sample': next_points.tolist(),
            'status': {
                'expected_improvement': expected_improvement,
                'optimizer_success': ei_opt_status,
            },
        })