Пример #1
0
    def quadrature(self):
        def _rp_emukit(x: np.ndarray) -> np.ndarray:
            n, d = x.shape
            res = self.r.sample(x)[0] * self.p(x)
            return np.array(res).reshape(n, 1)

        def rp_emukit():
            # Wrap around Emukit interface
            from emukit.core.loop.user_function import UserFunctionWrapper
            return UserFunctionWrapper(_rp_emukit), _rp_emukit

        budget = self.options['budget']
        phis = np.empty((budget, 1))
        rs = np.empty((budget, ))
        rqs = np.empty((budget, ))

        phi_init = self.p.mean.reshape(1, 1)
        r_init = np.array(self.r.sample(phi_init))

        kern = GPy.kern.RBF(input_dim=1, variance=0.1, lengthscale=0.5)

        rq_init = r_init * self.q.sample(phi_init)
        r_gp = GPy.models.GPRegression(phi_init, r_init.reshape(1, 1), kern)
        rq_gp = GPy.models.GPRegression(phi_init, rq_init.reshape(1, 1), kern)
        r_model = _wrap_emukit(r_gp)

        for i in range(1, budget):
            r_loop = VanillaBayesianQuadratureLoop(model=r_model)
            r_loop.run_loop(rp_emukit()[0], 1)
            phi = r_loop.loop_state.X[-1, :]
            r = r_loop.loop_state.Y[-1]
            rq = r * self.q.sample(phi)

            phis[i, :] = phi
            rs[i] = r
            rqs[i] = rq

            r_gp.set_XY(phis[1:i + 1, :], rs[1:i + 1].reshape(-1, 1))
            rq_gp.set_XY(phis[1:i + 1, :], rqs[1:i + 1].reshape(-1, 1))
            r_model = _wrap_emukit(r_gp)
            rq_model = _wrap_emukit(rq_gp)
            r_int = r_model.integrate()[0]
            q_int = rq_model.integrate()[0]
            self.results[i] = q_int / r_int
            if i % self.options['display_step'] == 1:
                print('Samples', phi, "Numerator: ", r_int, "Denominator",
                      q_int)
                if self.options['plot_iterations']:
                    self.draw_samples(i, phis, rs, rqs, r_gp, rq_gp)
                    print('Iteration', i)
                    plt.show()
        return self.results[-1]
Пример #2
0
def create_vanilla_bq_loop_with_rbf_kernel(X: np.ndarray, Y: np.ndarray, integral_bounds: List,
                                           rbf_lengthscale: float=1.0, rbf_variance: float=1.0) -> \
        VanillaBayesianQuadratureLoop:
    """

    :param X: initial training point locations, shape (n_points, input_dim)
    :param Y:  initial training point function values, shape (n_points, 1)
    :param integral_bounds: List of input_dim tuples, where input_dim is the dimensionality of the integral
    and the tuples contain the lower and upper bounds of the integral i.e.,
    [(lb_1, ub_1), (lb_2, ub_2), ..., (lb_D, ub_D)].
    :param rbf_lengthscale: the lengthscale of the rbf kernel, defaults to 1.
    :param rbf_variance: the variance of the rbf kernel, defaults to 1.
    :return: The vanilla BQ loop
    """

    if not len(integral_bounds) == X.shape[1]:
        D_bounds = len(integral_bounds)
        input_dim = X.shape[1]
        raise ValueError("number of integral bounds " + str(D_bounds) + " provided does not match the input dimension "
                                                                        + str(input_dim) + ".")
    if rbf_lengthscale <= 0:
        raise ValueError("rbf lengthscale must be positive. The current value is " + str(rbf_lengthscale) + ".")
    if rbf_variance <= 0:
        raise ValueError("rbf variance must be positive. The current value is " + str(rbf_variance) + ".")

    gpy_model = GPy.models.GPRegression(X=X, Y=Y, kernel=GPy.kern.RBF(input_dim=X.shape[1],
                                                                      lengthscale=rbf_lengthscale,
                                                                      variance=rbf_variance))
    emukit_rbf = RBFGPy(gpy_model.kern)
    emukit_qrbf = QuadratureRBF(emukit_rbf, integral_bounds=integral_bounds)
    emukit_model = BaseGaussianProcessGPy(kern=emukit_qrbf, gpy_model=gpy_model)
    emukit_method = VanillaBayesianQuadrature(base_gp=emukit_model)
    emukit_loop = VanillaBayesianQuadratureLoop(model=emukit_method)
    return emukit_loop
Пример #3
0
def create_vanilla_bq_loop_with_rbf_kernel(
    X: np.ndarray,
    Y: np.ndarray,
    integral_bounds: Optional[BoundsType] = None,
    measure: Optional[IntegrationMeasure] = None,
    rbf_lengthscale: float = 1.0,
    rbf_variance: float = 1.0,
) -> VanillaBayesianQuadratureLoop:
    """Creates a quadrature loop with a standard (vanilla) model.

    :param X: Initial training point locations, shape (n_points, input_dim).
    :param Y: Initial training point function values, shape (n_points, 1).
    :param integral_bounds: List of d tuples, where d is the dimensionality of the integral and the tuples contain the
                            lower and upper bounds of the integral
                            i.e., [(lb_1, ub_1), (lb_2, ub_2), ..., (lb_d, ub_d)].
                            Only used if ``measure`` is not given in which case the unnormalized Lebesgue measure is used.
    :param measure: An integration measure. Either ``measure`` or ``integral_bounds`` must be given.
                    If both ``integral_bounds`` and ``measure`` are given, ``integral_bounds`` is disregarded.
    :param rbf_lengthscale: The lengthscale of the rbf kernel, defaults to 1.
    :param rbf_variance: The variance of the rbf kernel, defaults to 1.
    :return: The vanilla BQ loop.

    """

    if measure is not None and measure.input_dim != X.shape[1]:
        raise ValueError(
            f"Dimensionality of measure ({measure.input_dim}) does not match the dimensionality of "
            f"the data ({X.shape[1]}).")

    if (integral_bounds is not None) and (len(integral_bounds) != X.shape[1]):
        raise ValueError(
            f"Dimension of integral bounds ({len(integral_bounds)}) does not match the input dimension "
            f"of X ({X.shape[1]}).")

    if rbf_lengthscale <= 0:
        raise ValueError(
            f"rbf lengthscale must be positive. The current value is {rbf_lengthscale}."
        )
    if rbf_variance <= 0:
        raise ValueError(
            f"rbf variance must be positive. The current value is {rbf_variance}."
        )

    gpy_model = GPy.models.GPRegression(X=X,
                                        Y=Y,
                                        kernel=GPy.kern.RBF(
                                            input_dim=X.shape[1],
                                            lengthscale=rbf_lengthscale,
                                            variance=rbf_variance))

    # This function handles the omittion if the integral bounds in case measure is also given.
    emukit_model = create_emukit_model_from_gpy_model(
        gpy_model=gpy_model, integral_bounds=integral_bounds, measure=measure)
    emukit_method = VanillaBayesianQuadrature(base_gp=emukit_model, X=X, Y=Y)
    emukit_loop = VanillaBayesianQuadratureLoop(model=emukit_method)
    return emukit_loop
Пример #4
0
def create_vanilla_bq_loop_with_rbf_kernel(
    X: np.ndarray,
    Y: np.ndarray,
    integral_bounds: Optional[List[Tuple[float, float]]],
    measure: Optional[IntegrationMeasure],
    rbf_lengthscale: float = 1.0,
    rbf_variance: float = 1.0,
) -> VanillaBayesianQuadratureLoop:
    """

    :param X: initial training point locations, shape (n_points, input_dim)
    :param Y:  initial training point function values, shape (n_points, 1)
    :param integral_bounds: List of input_dim tuples, where input_dim is the dimensionality of the integral
    and the tuples contain the lower and upper bounds of the integral i.e.,
    [(lb_1, ub_1), (lb_2, ub_2), ..., (lb_D, ub_D)]. None means infinite bounds.
    :param measure: the integration measure. None means the standard Lebesgue measure is used.
    :param rbf_lengthscale: the lengthscale of the rbf kernel, defaults to 1.
    :param rbf_variance: the variance of the rbf kernel, defaults to 1.
    :return: The vanilla BQ loop
    """

    if (integral_bounds is not None) and (len(integral_bounds) != X.shape[1]):
        D_bounds = len(integral_bounds)
        input_dim = X.shape[1]
        raise ValueError(
            "dimension of integral bounds provided ({}) does not match the input dimension of X ({}).".format(
                D_bounds, input_dim
            )
        )
    if rbf_lengthscale <= 0:
        raise ValueError("rbf lengthscale must be positive. The current value is {}.".format(rbf_lengthscale))
    if rbf_variance <= 0:
        raise ValueError("rbf variance must be positive. The current value is {}.".format(rbf_variance))

    gpy_model = GPy.models.GPRegression(
        X=X, Y=Y, kernel=GPy.kern.RBF(input_dim=X.shape[1], lengthscale=rbf_lengthscale, variance=rbf_variance)
    )

    emukit_model = create_emukit_model_from_gpy_model(
        gpy_model=gpy_model, integral_bounds=integral_bounds, measure=measure
    )
    emukit_method = VanillaBayesianQuadrature(base_gp=emukit_model, X=X, Y=Y)
    emukit_loop = VanillaBayesianQuadratureLoop(model=emukit_method)
    return emukit_loop
Пример #5
0
def loop():
    init_size = 5
    x_init = np.random.rand(init_size, 2)
    y_init = np.random.rand(init_size, 1)
    bounds = [(-1, 1), (0, 1)]

    gpy_model = GPy.models.GPRegression(X=x_init,
                                        Y=y_init,
                                        kernel=GPy.kern.RBF(
                                            input_dim=x_init.shape[1],
                                            lengthscale=1.0,
                                            variance=1.0))
    emukit_measure = LebesgueMeasure.from_bounds(bounds, normalized=False)
    emukit_qrbf = QuadratureRBFLebesgueMeasure(RBFGPy(gpy_model.kern),
                                               measure=emukit_measure)
    emukit_model = BaseGaussianProcessGPy(kern=emukit_qrbf,
                                          gpy_model=gpy_model)
    emukit_method = VanillaBayesianQuadrature(base_gp=emukit_model,
                                              X=x_init,
                                              Y=y_init)
    emukit_loop = VanillaBayesianQuadratureLoop(model=emukit_method)
    return emukit_loop, init_size, x_init, y_init
Пример #6
0
    def bq(self, verbose=True):
        """
        Marginalisation using vanilla Bayesian Quadrature - we use Amazon Emukit interface for this purpose
        :return:
        """
        def _rp_emukit(x: np.ndarray) -> np.ndarray:
            n, d = x.shape
            res = np.exp(self.model.log_sample(
                phi=np.exp(x))[0])  # + np.log(self.prior(x)))
            logging.info("Query point" + str(x) + " .Log Likelihood: " +
                         str(-np.log(res)))
            return np.array(res).reshape(n, 1)

        def rp_emukit():
            # Wrap around Emukit interface
            from emukit.core.loop.user_function import UserFunctionWrapper
            return UserFunctionWrapper(_rp_emukit), _rp_emukit

        start = time.time()

        budget = self.options['naive_bq_budget']
        test_x = self.model.X_test
        test_y = self.model.Y_test

        q = np.zeros((test_x.shape[0], budget + 1))

        log_phi_initial = np.zeros(self.dimensions).reshape(1, -1)
        r_initial = np.exp(
            self.model.log_sample(phi=np.exp(log_phi_initial))
            [0])  # + np.log(self.prior(log_phi_initial)))
        pred = np.zeros((test_x.shape[0], ))

        # Setting up kernel - Note we only marginalise over the lengthscale terms, other hyperparameters are set to the
        # MAP values.
        kern = GPy.kern.RBF(self.dimensions, variance=1., lengthscale=1.)

        r_gp = GPy.models.GPRegression(log_phi_initial,
                                       r_initial.reshape(1, -1), kern)
        r_model = self._wrap_emukit(r_gp)
        r_loop = VanillaBayesianQuadratureLoop(model=r_model)

        # Firstly, within the given allowance, compute an estimate of the model evidence. Model evidence is the common
        # denominator for all predictive distributions.
        r_loop.run_loop(user_function=rp_emukit()[0],
                        stopping_condition=budget)
        log_phi = r_loop.loop_state.X
        r = r_loop.loop_state.Y.reshape(-1)

        quad_time = time.time()

        r_int = r_model.integrate()[0]  # Model evidence
        print(
            "Estimate of model evidence: ",
            r_int,
        )
        print("Model log-evidence ", np.log(r_int))

        for i_x in range(test_x.shape[0]):

            # Note that we do not active sample again for q, we just use the same samples sampled when we compute
            # the log-evidence
            q_initial, _ = self.model.log_sample(phi=np.exp(log_phi_initial),
                                                 x=test_x[i_x, :])

            # Initialise GPy GP surrogate for and q(\phi)r(\phi)
            # Sample for q values
            q[i_x, 0] = q_initial
            for i_b in range(1, budget + 1):
                log_phi_i = log_phi[i_b, :]
                _, q_i = self.model.log_sample(phi=np.exp(log_phi_i),
                                               x=test_x[i_x, :])
                q[i_x, i_b] = q_i
            # Construct rq vector
            q_x = q[i_x, :]

            rq = r * q_x
            rq_gp = GPy.models.GPRegression(log_phi, rq.reshape(-1, 1), kern)
            rq_model = self._wrap_emukit(rq_gp)
            rq_int = rq_model.integrate()[0]

            # Now estimate the posterior

            pred[i_x] = rq_int / r_int

            logging.info('Progress: ' + str(i_x + 1) + '/' +
                         str(test_x.shape[0]))

        labels = pred.copy()
        labels[labels < 0.5] = 0
        labels[labels >= 0.5] = 1
        labels = np.squeeze(labels)
        non_zero = np.count_nonzero(np.squeeze(test_y) - np.squeeze(labels))
        accuracy, precision, recall, f1 = self.model.score(
            np.squeeze(test_y), labels)
        test_y = np.squeeze(test_y)
        # logging.info(pred, test_y)
        print("------- Vanilla BQ Summary -------")
        print("Number of mismatch: " + str(non_zero))
        print('Accuracy:', accuracy)
        print('Precision:', precision)
        print('Recall:', recall)
        print('F1: ', f1)
        if verbose:
            print("Ground truth labels: " + str(test_y))
            print("Predictions: " + str(labels))
            print('Predictive Probabilities: ' + str(pred))
        end = time.time()
        print("Active Sampling Time: ", quad_time - start)
        print("Total Time elapsed: ", end - start)
        return accuracy, precision, recall, f1