Пример #1
0
    def test_python_and_cpp_return_same_cholesky_variance_and_gradient(self):
        """Compare chol_var/grad chol_var results from Python & C++, checking seeral random points per test case."""
        num_tests_per_case = 2
        var_tolerance = 3.0e-12
        # TODO(GH-240): set RNG seed for this case and restore toleranace to 3.0e-12 or better
        grad_var_tolerance = 3.0e-10

        for test_case in self.gp_test_environments:
            domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            cpp_cov = SquareExponential(python_cov.hyperparameters)
            cpp_gp = GaussianProcess(cpp_cov, historical_data)

            for num_to_sample in self.num_to_sample_list:
                for _ in xrange(num_tests_per_case):
                    points_to_sample = domain.generate_uniform_random_points_in_domain(num_to_sample)

                    cpp_var = cpp_gp.compute_cholesky_variance_of_points(points_to_sample)
                    python_var = python_gp.compute_cholesky_variance_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_var, cpp_var, var_tolerance)

                    cpp_grad_var = cpp_gp.compute_grad_cholesky_variance_of_points(points_to_sample)
                    python_grad_var = python_gp.compute_grad_cholesky_variance_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_grad_var, cpp_grad_var, grad_var_tolerance)
Пример #2
0
    def test_mean_var_interface_returns_same_as_cpp(self):
        """Test that the /gp/mean_var endpoint does the same thing as the C++ interface."""
        tolerance = 1.0e-11
        for test_case in self.gp_test_environments:
            python_domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            cpp_cov = SquareExponential(python_cov.hyperparameters)
            cpp_gp = GaussianProcess(cpp_cov, historical_data)

            points_to_evaluate = python_domain.generate_uniform_random_points_in_domain(
                10)

            # mean and var from C++
            cpp_mean = cpp_gp.compute_mean_of_points(points_to_evaluate)
            cpp_var = cpp_gp.compute_variance_of_points(points_to_evaluate)

            # mean and var from REST
            json_payload = self._build_json_payload(
                python_domain, python_cov, historical_data,
                points_to_evaluate.tolist())
            resp = self.testapp.post(self.endpoint, json_payload)
            resp_schema = GpMeanVarResponse()
            resp_dict = resp_schema.deserialize(json.loads(resp.body))
            rest_mean = numpy.asarray(resp_dict.get('mean'))
            rest_var = numpy.asarray(resp_dict.get('var'))

            self.assert_vector_within_relative(rest_mean, cpp_mean, tolerance)
            self.assert_vector_within_relative(rest_var, cpp_var, tolerance)
Пример #3
0
    def test_python_and_cpp_return_same_cholesky_variance_and_gradient(self):
        """Compare chol_var/grad chol_var results from Python & C++, checking seeral random points per test case."""
        num_tests_per_case = 2
        var_tolerance = 3.0e-12
        # TODO(GH-240): set RNG seed for this case and restore toleranace to 3.0e-12 or better
        grad_var_tolerance = 3.0e-10

        for test_case in self.gp_test_environments:
            domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            cpp_cov = SquareExponential(python_cov.hyperparameters)
            cpp_gp = GaussianProcess(cpp_cov, historical_data)

            for num_to_sample in self.num_to_sample_list:
                for _ in xrange(num_tests_per_case):
                    points_to_sample = domain.generate_uniform_random_points_in_domain(num_to_sample)

                    cpp_var = cpp_gp.compute_cholesky_variance_of_points(points_to_sample)
                    python_var = python_gp.compute_cholesky_variance_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_var, cpp_var, var_tolerance)

                    cpp_grad_var = cpp_gp.compute_grad_cholesky_variance_of_points(points_to_sample)
                    python_grad_var = python_gp.compute_grad_cholesky_variance_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_grad_var, cpp_grad_var, grad_var_tolerance)
Пример #4
0
    def test_mean_var_interface_returns_same_as_cpp(self):
        """Test that the /gp/mean_var endpoint does the same thing as the C++ interface."""
        tolerance = 1.0e-11
        for test_case in self.gp_test_environments:
            python_domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            cpp_cov = SquareExponential(python_cov.hyperparameters)
            cpp_gp = GaussianProcess(cpp_cov, historical_data)

            points_to_evaluate = python_domain.generate_uniform_random_points_in_domain(10)

            # mean and var from C++
            cpp_mean = cpp_gp.compute_mean_of_points(points_to_evaluate)
            cpp_var = cpp_gp.compute_variance_of_points(points_to_evaluate)

            # mean and var from REST
            json_payload = self._build_json_payload(python_domain, python_cov, historical_data, points_to_evaluate.tolist())
            resp = self.testapp.post(self.endpoint, json_payload)
            resp_schema = GpMeanVarResponse()
            resp_dict = resp_schema.deserialize(json.loads(resp.body))
            rest_mean = numpy.asarray(resp_dict.get('mean'))
            rest_var = numpy.asarray(resp_dict.get('var'))

            self.assert_vector_within_relative(rest_mean, cpp_mean, tolerance)
            self.assert_vector_within_relative(rest_var, cpp_var, tolerance)
Пример #5
0
    def test_python_and_cpp_return_same_mu_and_gradient(self):
        """Compare mu/grad mu results from Python & C++, checking seeral random points per test case."""
        num_tests_per_case = 4
        mu_tolerance = 3.0e-13
        grad_mu_tolerance = 3.0e-12

        for test_case in self.gp_test_environments:
            domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            cpp_cov = SquareExponential(python_cov.hyperparameters)
            cpp_gp = GaussianProcess(cpp_cov, historical_data)

            for num_to_sample in self.num_to_sample_list:
                for _ in xrange(num_tests_per_case):
                    points_to_sample = domain.generate_uniform_random_points_in_domain(
                        num_to_sample)

                    cpp_mu = cpp_gp.compute_mean_of_points(points_to_sample)
                    python_mu = python_gp.compute_mean_of_points(
                        points_to_sample)
                    self.assert_vector_within_relative(python_mu, cpp_mu,
                                                       mu_tolerance)

                    cpp_grad_mu = cpp_gp.compute_grad_mean_of_points(
                        points_to_sample)
                    python_grad_mu = python_gp.compute_grad_mean_of_points(
                        points_to_sample)
                    self.assert_vector_within_relative(python_grad_mu,
                                                       cpp_grad_mu,
                                                       grad_mu_tolerance)
Пример #6
0
    def test_sample_point_from_gp(self):
        """Test that sampling points from the GP works."""
        point_one = SamplePoint([0.0, 1.0], -1.0, 0.0)
        point_two = SamplePoint([2.0, 2.5], 1.0, 0.1)
        covariance = SquareExponential([1.0, 1.0, 1.0])
        historical_data = HistoricalData(len(point_one.point),
                                         [point_one, point_two])

        gaussian_process = GaussianProcess(covariance, historical_data)
        out_values = numpy.zeros(3)
        for i in xrange(3):
            out_values[i] = gaussian_process.sample_point_from_gp(
                point_two.point, 0.001)

        gaussian_process._gaussian_process.reset_to_most_recent_seed()
        out_values_test = numpy.ones(3)
        for i in xrange(3):
            out_values_test[i] = gaussian_process.sample_point_from_gp(
                point_two.point, 0.001)

        # Exact match b/c we should've run over the exact same computations
        self.assert_vector_within_relative(out_values_test, out_values, 0.0)

        # Sampling from a historical point (that had 0 noise) should produce the same value associated w/that point
        value = gaussian_process.sample_point_from_gp(point_one.point, 0.0)
        self.assert_scalar_within_relative(value, point_one.value,
                                           numpy.finfo(numpy.float64).eps)
Пример #7
0
    def optimize(self, do_optimize=True, **kwargs):

        if self.prior is None:
            self.p0 = numpy.random.rand(1 + self.dim + self._num_derivatives +
                                        1)
        else:
            self.p0 = self.prior.sample_from_prior(1)

        self.hypers = [optimize.minimize(self.nll, self.p0.ravel()).x]

        self.is_trained = True
        self._models = []
        hypers_list = []
        noises_list = []
        for sample in self.hypers:
            print sample
            if numpy.any((-20 > sample) + (sample > 20)):
                continue
            sample = numpy.exp(sample)
            # Instantiate a GP for each hyperparameter configuration
            cov_hyps = sample[:(self.dim + 1)]
            hypers_list.append(cov_hyps)
            se = SquareExponential(cov_hyps)
            if self.noisy:
                noise = sample[(self.dim + 1):]
            else:
                noise = numpy.array((1 + self._num_derivatives) * [1.e-8])
            noises_list.append(noise)
            model = GaussianProcess(se, noise, self._historical_data,
                                    self.derivatives)
            self._models.append(model)

        self._gaussian_process_mcmc = GaussianProcessMCMC(
            numpy.array(hypers_list), numpy.array(noises_list),
            self._historical_data, self.derivatives)
Пример #8
0
    def test_sample_point_from_gp(self):
        """Test that sampling points from the GP works."""
        point_one = SamplePoint([0.0, 1.0], -1.0, 0.0)
        point_two = SamplePoint([2.0, 2.5], 1.0, 0.1)
        covariance = SquareExponential([1.0, 1.0, 1.0])
        historical_data = HistoricalData(len(point_one.point), [point_one, point_two])

        gaussian_process = GaussianProcess(covariance, historical_data)
        out_values = numpy.zeros(3)
        for i in xrange(3):
            out_values[i] = gaussian_process.sample_point_from_gp(point_two.point, 0.001)

        gaussian_process._gaussian_process.reset_to_most_recent_seed()
        out_values_test = numpy.ones(3)
        for i in xrange(3):
            out_values_test[i] = gaussian_process.sample_point_from_gp(point_two.point, 0.001)

        # Exact match b/c we should've run over the exact same computations
        self.assert_vector_within_relative(out_values_test, out_values, 0.0)

        # Sampling from a historical point (that had 0 noise) should produce the same value associated w/that point
        value = gaussian_process.sample_point_from_gp(point_one.point, 0.0)
        self.assert_scalar_within_relative(value, point_one.value, numpy.finfo(numpy.float64).eps)
Пример #9
0
    def test_gp_construction_singular_covariance_matrix(self):
        """Test that the GaussianProcess ctor indicates a singular covariance matrix when points_sampled contains duplicates (0 noise)."""
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 1))
        domain, gaussian_process = self.gp_test_environments[index]
        point_one = SamplePoint([0.0] * domain.dim, 1.0, 0.0)
        # points two and three have duplicate coordinates and we have noise_variance = 0.0
        point_two = SamplePoint([1.0] * domain.dim, 1.0, 0.0)
        point_three = point_two

        historical_data = HistoricalData(len(point_one.point),
                                         [point_one, point_two, point_three])
        with pytest.raises(C_GP.SingularMatrixException):
            GaussianProcess(gaussian_process.get_covariance_copy(),
                            historical_data)
def moe_compute_best_pt_info(moe_exp, covariance_info, confidence=None,
                             mean_fxn_info=None, sample_pts=None, debug=False):
    if moe_exp.historical_data.num_sampled <= 0: return None, None
    covar = SquareExponential(covariance_info['hyperparameters'])
    cpp_gp = GaussianProcess(covar, moe_exp.historical_data,
                             mean_fxn_info=mean_fxn_info)
    if (sample_pts == None):
        sample_pts = np.array(moe_exp.historical_data.points_sampled)
    #moe_pts_r = np.array(moe_exp.historical_data.points_sampled)
    #moe_pts_d = moe_exp.domain.generate_uniform_random_points_in_domain(50)
    #sample_pts = np.concatenate((moe_pts_r, moe_pts_d), axis=0)
    cpp_mu = cpp_gp.compute_mean_of_points(sample_pts, debug)
    cpp_var = np.diag(cpp_gp.compute_variance_of_points(sample_pts))
    #print "sample_pts ", sample_pts, "\ncpp_mu ", cpp_mu, "\ncpp_var ", cpp_var
    if confidence is None:
        minidx = np.argmin(cpp_mu)
    else:
        upper_conf = scipy.stats.norm.interval(confidence, loc=cpp_mu,
                                               scale=np.sqrt(cpp_var))[1]
        minidx = np.argmin(upper_conf)
        #print " cpp_var ", cpp_var[minidx], " upper_conf ", upper_conf[minidx]
    #print "cpp_mu ", cpp_mu[minidx], " best_moe_pt ", sample_pts[minidx]
    return [sample_pts[minidx], cpp_mu[minidx], cpp_var[minidx]]
Пример #11
0
    def test_python_and_cpp_return_same_mu_and_gradient(self):
        """Compare mu/grad mu results from Python & C++, checking seeral random points per test case."""
        num_tests_per_case = 4
        mu_tolerance = 3.0e-13
        grad_mu_tolerance = 3.0e-12

        for test_case in self.gp_test_environments:
            domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            cpp_cov = SquareExponential(python_cov.hyperparameters)
            cpp_gp = GaussianProcess(cpp_cov, historical_data)

            for num_to_sample in self.num_to_sample_list:
                for _ in xrange(num_tests_per_case):
                    points_to_sample = domain.generate_uniform_random_points_in_domain(num_to_sample)

                    cpp_mu = cpp_gp.compute_mean_of_points(points_to_sample)
                    python_mu = python_gp.compute_mean_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_mu, cpp_mu, mu_tolerance)

                    cpp_grad_mu = cpp_gp.compute_grad_mean_of_points(points_to_sample)
                    python_grad_mu = python_gp.compute_grad_mean_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_grad_mu, cpp_grad_mu, grad_mu_tolerance)
Пример #12
0
def _make_gp_from_params(params):
    """Create and return a C++ backed gaussian_process from the request params as a dict.

    ``params`` has the following form::

        params = {
            'gp_historical_info': <instance of :class:`moe.views.schemas.base_schemas.GpHistoricalInfo`>,
            'domain_info': <instance of :class:`moe.views.schemas.base_schemas.DomainInfo`>,
            'covariance_info': <instance of :class:`moe.views.schemas.base_schemas.CovarianceInfo`>,
            }

    :param params: The request params dict
    :type params: dict

    """
    # Load up the info
    gp_historical_info = params.get("gp_historical_info")
    domain_info = params.get("domain_info")
    points_sampled = gp_historical_info.get('points_sampled')

    sample_point_list = []
    for point in points_sampled:
        sample_point_list.append(
            SamplePoint(
                point['point'],
                point['value'],
                point['value_var'],
            ))
    optimizer_info = params.get('optimizer_info', {})
    optimizer_type = optimizer_info.get('optimizer_type', None)

    if optimizer_type == L_BFGS_B_OPTIMIZER:
        covariance_of_process = _make_covariance_of_process_from_params(
            params, "python")
        gaussian_process = pythonGaussianProcess(
            covariance_of_process,
            HistoricalData(domain_info.get('dim'), sample_point_list),
        )
    else:
        covariance_of_process = _make_covariance_of_process_from_params(params)
        gaussian_process = GaussianProcess(
            covariance_of_process,
            HistoricalData(domain_info.get('dim'), sample_point_list),
        )

    return gaussian_process
Пример #13
0
    def test_interface_returns_same_as_cpp(self):
        """Test that the /gp/ei endpoint does the same thing as the C++ interface."""
        tolerance = 1.0e-11
        for test_case in self.gp_test_environments:
            python_domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            cpp_cov = SquareExponential(python_cov.hyperparameters)
            cpp_gp = GaussianProcess(cpp_cov, historical_data)

            points_to_evaluate = python_domain.generate_uniform_random_points_in_domain(
                10)

            # EI from C++
            expected_improvement_evaluator = ExpectedImprovement(
                cpp_gp,
                None,
            )
            # TODO(GH-99): Change test case to have the right shape:
            # (num_to_evaluate, num_to_sample, dim)
            # Here we assume the shape is (num_to_evaluate, dim) so we insert an axis, making num_to_sample = 1.
            # Also might be worth testing more num_to_sample values (will require manipulating C++ RNG state).
            cpp_expected_improvement = expected_improvement_evaluator.evaluate_at_point_list(
                points_to_evaluate[:, numpy.newaxis, :], )

            # EI from REST
            json_payload = self._build_json_payload(
                python_domain, python_cov, historical_data,
                points_to_evaluate.tolist())
            resp = self.testapp.post(self.endpoint, json_payload)
            resp_schema = GpEiResponse()
            resp_dict = resp_schema.deserialize(json.loads(resp.body))
            rest_expected_improvement = numpy.asarray(
                resp_dict.get('expected_improvement'))

            self.assert_vector_within_relative(
                rest_expected_improvement,
                cpp_expected_improvement,
                tolerance,
            )
Пример #14
0
    def train(self, do_optimize=True, **kwargs):
        """
        Performs MCMC sampling to sample hyperparameter configurations from the
        likelihood and trains for each sample a GP on X and y

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true we perform MCMC sampling otherwise we just use the
            hyperparameter specified in the kernel.
        """

        if do_optimize:
          # We have one walker for each hyperparameter configuration
          sampler = emcee.EnsembleSampler(self.n_chains, 1 + self.dim + self._num_derivatives + 1,
                                            self.compute_log_likelihood)

          # Do a burn-in in the first iteration
          if not self.burned:
            # Initialize the walkers by sampling from the prior
            if self.prior is None:
                self.p0 = numpy.random.rand(self.n_chains, 1 + self.dim + self._num_derivatives + 1)
            else:
                self.p0 = self.prior.sample_from_prior(self.n_chains)
            # Run MCMC sampling
            self.p0, _, _ = sampler.run_mcmc(self.p0, self.burnin_steps,
                                             rstate0=self.rng)

            self.burned = True

          # Start sampling
          pos, _, _ = sampler.run_mcmc(self.p0, self.chain_length,
                                       rstate0=self.rng)

          # Save the current position, it will be the start point in
          # the next iteration
          self.p0 = pos

          # Take the last samples from each walker
          self.hypers = sampler.chain[numpy.random.choice(self.n_chains, self.n_hypers), -1]

        self.is_trained = True
        self._models = []
        hypers_list = []
        noises_list = []
        for sample in self.hypers:
            if numpy.any((-20 > sample) + (sample > 20)):
                continue
            sample = numpy.exp(sample)
            # Instantiate a GP for each hyperparameter configuration
            cov_hyps = sample[:(self.dim+1)]
            hypers_list.append(cov_hyps)
            se = SquareExponential(cov_hyps)
            if self.noisy:
                noise = sample[(self.dim+1):]
            else:
                noise = numpy.array((1+self._num_derivatives)*[1.e-8])
            noises_list.append(noise)
            model = GaussianProcess(se, noise,
                                    self._historical_data,
                                    self.derivatives)
            self._models.append(model)

        self._gaussian_process_mcmc = GaussianProcessMCMC(numpy.array(hypers_list), numpy.array(noises_list),
                                                          self._historical_data, self.derivatives)
Пример #15
0
        ### update noise in historical data
        updated_points_sampled_noise_variance = create_array_points_sampled_noise_variance(
            current_hist_data.points_sampled, hyperparameters_noise)
        ### create new Historical data object with updated values
        new_historical_data = HistoricalData(dim=problem.obj_func_min.getDim())
        # new_historical_data.append_historical_data(current_hist_data.points_sampled,
        #                                            current_hist_data.points_sampled_value,
        #                                            updated_points_sampled_noise_variance)
        new_historical_data.append_historical_data(
            current_hist_data.points_sampled,
            current_hist_data.points_sampled_value,
            current_hist_data.points_sampled_noise_variance)

        ### Use new hyperparameters -- this requires instantiating a new GP object
        kg_cov_cpp = cppCovariance(hyperparameters=hypers_GP)
        kg_gp_cpp = GaussianProcess(kg_cov_cpp, new_historical_data)

    # ================================================================================================================ #
    #                                    Find s and point that maximize KG/cost                                        #
    # ================================================================================================================ #
    discrete_pts_x = problem.obj_func_min.get_moe_domain(
    ).generate_uniform_x_points_in_domain(num_discretization)
    discrete_pts = np.hstack(
        (problem.obj_func_min.getSearchDomain()[0, -1] * np.ones(
            (num_discretization, 1)), discrete_pts_x))
    all_mu = kg_gp_cpp.compute_mean_of_points(discrete_pts)
    sorted_idx = np.argsort(all_mu)
    all_S_xprime = discrete_pts[
        sorted_idx[-num_x_prime:], :]  # select the last num_x_prime samples

    # ================================================================================================================ #