示例#1
0
文件: gp.py 项目: stken2050/GPy
    def set_XY(self, X=None, Y=None):
        """
        Set the input / output data of the model
        This is useful if we wish to change our existing data but maintain the same model

        :param X: input observations
        :type X: np.ndarray
        :param Y: output observations
        :type Y: np.ndarray
        """
        self.update_model(False)
        if Y is not None:
            if self.normalizer is not None:
                self.normalizer.scale_by(Y)
                self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
                self.Y = Y
            else:
                self.Y = ObsAr(Y)
                self.Y_normalized = self.Y
        if X is not None:
            if self.X in self.parameters:
                # LVM models
                if isinstance(self.X, VariationalPosterior):
                    assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
                    self.unlink_parameter(self.X)
                    self.X = X
                    self.link_parameter(self.X)
                else:
                    self.unlink_parameter(self.X)
                    from ..core import Param
                    self.X = Param('latent mean',X)
                    self.link_parameter(self.X)
            else:
                self.X = ObsAr(X)
        self.update_model(True)
示例#2
0
    def set_XY(self, X=None, Y=None):
        # print("set_XY: ")
        # print("X.shape: ",X.shape)
        # print("Y.shape: ",Y.shape)
        """
        Set the input / output data of the model
        This is useful if we wish to change our existing data but maintain the same model

        :param X: input observations
        :type X: np.ndarray
        :param Y: output observations
        :type Y: np.ndarray
        """
        X_list = []
        Y_list = []
        for i in np.arange(Y.shape[1]):
            X_list.append(X.copy())
            Y_list.append(np.atleast_2d(Y[:, i]).T)

        # print("len(X_list): ",len(X_list))
        # print("len(Y_list): ",len(Y_list))

        X, Y, self.output_index = util.multioutput.build_XY(X_list, Y_list)
        self.Y_metadata = {'output_index': self.output_index}

        # print("after build_XY: ")
        # print("X.shape: ",X.shape)
        # print("Y.shape: ",Y.shape)
        # print("self.output_index: ",self.output_index)

        self.update_model(False)
        if Y is not None:
            if self.normalizer is not None:
                self.normalizer.scale_by(Y)
                self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
                self.Y = Y
            else:
                self.Y = ObsAr(Y)
                self.Y_normalized = self.Y
        if X is not None:
            if self.X in self.parameters:
                # LVM models
                if isinstance(self.X, VariationalPosterior):
                    assert isinstance(
                        X, type(self.X)
                    ), "The given X must have the same type as the X in the model!"
                    index = self.X._parent_index_
                    self.unlink_parameter(self.X)
                    self.X = X
                    self.link_parameter(self.X, index=index)
                else:
                    index = self.X._parent_index_
                    self.unlink_parameter(self.X)
                    from ..core import Param
                    self.X = Param('latent mean', X)
                    self.link_parameter(self.X, index=index)
            else:
                self.X = ObsAr(X)
        self.update_model(True)
示例#3
0
 def set_XY(self, X=None, Y=None):
     self.update_model(False)
     if Y is not None:
         if self.normalizer is not None:
             self.normalizer.scale_by(Y)
             self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
             self.Y = Y
         else:
             self.Y = ObsAr(Y)
             self.Y_normalized = self.Y
     if X is not None:
         self.X_untransformed = ObsAr(X)
     self.update_model(True)
示例#4
0
    def set_XY_group(self, X=None, Y=None, A=None):
        """
            Set the input / output data of the model
            This is useful if we wish to change our existing data but maintain the same model
            # NOTE: this only provides update X,Y,A, but does not provides sequential updates. The input should be ALL previous data points, instead of only current round's data point.  

            :param X: input observations
            :type X: np.ndarray
            :param Y: output observations
            :type Y: np.ndarray
        """
        self.update_model(False)
        if Y is not None:
            if self.normalizer is not None:
                self.normalizer.scale_by(Y)
                self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
                self.Y = Y
            else:
                self.Y = ObsAr(Y)
                self.Y_normalized = self.Y
        if X is not None:
            if self.X in self.parameters:
                # LVM models
                if isinstance(self.X, VariationalPosterior):
                    assert isinstance(
                        X, type(self.X)
                    ), "The given X must have the same type as the X in the model!"
                    index = self.X._parent_index_
                    self.unlink_parameter(self.X)
                    self.X = X
                    self.link_parameter(self.X, index=index)
                else:
                    index = self.X._parent_index_
                    self.unlink_parameter(self.X)
                    from ..core import Param
                    self.X = Param('latent mean', X)
                    self.link_parameter(self.X, index=index)
            else:
                self.X = ObsAr(X)

        # add update to A
        if A is not None:
            self.A = A

        self.update_model(True)
示例#5
0
    def set_Y(self, Y):
        """
        Set the output data of the model

        :param Y: output observations
        :type Y: np.ndarray or ObsArray
        """
        assert isinstance(Y, (np.ndarray, ObsAr))
        state = self.update_model()
        self.update_model(False)
        if self.normalizer is not None:
            self.normalizer.scale_by(Y)
            self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
            self.Y = Y
        else:
            self.Y = ObsAr(Y) if isinstance(Y, np.ndarray) else Y
            self.Y_normalized = self.Y
        self.update_model(state)
示例#6
0
 def comp_K(self, Z, qX):
     if self.Xs is None or self.Xs.shape != qX.mean.shape:
         from paramz import ObsAr
         self.Xs = ObsAr(np.empty((self.degree, ) + qX.mean.shape))
     mu, S = qX.mean.values, qX.variance.values
     S_sq = np.sqrt(S)
     for i in range(self.degree):
         self.Xs[i] = self.locs[i] * S_sq + mu
     return self.Xs
示例#7
0
    def set_XY(self, X=None, Y=None):
        if isinstance(X, list):
            X, _, self.output_index = util.multioutput.build_XY(X, None)
        if isinstance(Y, list):
            _, Y, self.output_index = util.multioutput.build_XY(Y, Y)

        self.update_model(False)
        if Y is not None:
            self.Y = ObsAr(Y)
            self.Y_normalized = self.Y
        if X is not None:
            self.X = ObsAr(X)

        self.Y_metadata = {
            'output_index': self.output_index,
            'trials': np.ones(self.output_index.shape)
        }
        if isinstance(self.inference_method, expectation_propagation.EP):
            self.inference_method.reset()
        self.update_model(True)
示例#8
0
    def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'):
        Model.__init__(self, name=name)
        # accept the construction arguments
        self.X1 = ObsAr(X1)
        self.X2 = ObsAr(X2)
        self.Y = Y
        self.kern1, self.kern2 = kern1, kern2
        self.link_parameter(self.kern1)
        self.link_parameter(self.kern2)

        self.likelihood = likelihoods.Gaussian()
        self.likelihood.variance = noise_var
        self.link_parameter(self.likelihood)

        self.num_data1, self.input_dim1 = self.X1.shape
        self.num_data2, self.input_dim2 = self.X2.shape

        assert kern1.input_dim == self.input_dim1
        assert kern2.input_dim == self.input_dim2
        assert Y.shape == (self.num_data1, self.num_data2)
示例#9
0
    def set_X(self, X):
        """
        Set the input data of the model

        :param X: input observations
        :type X: np.ndarray
        """
        assert isinstance(X, np.ndarray)
        state = self.update_model()
        self.update_model(False)
        self.X = ObsAr(X)
        self.update_model(state)
示例#10
0
    def test_inference_EP_non_classification(self):
        from paramz import ObsAr
        X, Y, Y_extra_noisy = self.genNoisyData()
        deg_freedom = 5.
        init_noise_var = 0.08
        lik_studentT = GPy.likelihoods.StudentT(deg_free=deg_freedom,
                                                sigma2=init_noise_var)
        # like_gaussian_noise = GPy.likelihoods.MixedNoise()
        k = GPy.kern.RBF(1, variance=2., lengthscale=1.1)
        ep_inf_alt = GPy.inference.latent_function_inference.expectation_propagation.EP(
            max_iters=4, delta=0.5)
        # ep_inf_nested = GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode='nested', max_iters=100, delta=0.5)
        m = GPy.core.GP(X=X,
                        Y=Y_extra_noisy,
                        kernel=k,
                        likelihood=lik_studentT,
                        inference_method=ep_inf_alt)
        K = m.kern.K(X)
        post_params, ga_approx, cav_params, log_Z_tilde = m.inference_method.expectation_propagation(
            K, ObsAr(Y_extra_noisy), lik_studentT, None)

        mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
        p, m, d = m.inference_method._inference(Y_extra_noisy,
                                                K,
                                                ga_approx,
                                                cav_params,
                                                lik_studentT,
                                                Y_metadata=None,
                                                Z_tilde=log_Z_tilde)
        p0, m0, d0 = super(
            GPy.inference.latent_function_inference.expectation_propagation.EP,
            ep_inf_alt).inference(
                k,
                X,
                lik_studentT,
                mu_tilde[:, None],
                mean_function=None,
                variance=1. / ga_approx.tau,
                K=K,
                Z_tilde=log_Z_tilde +
                np.sum(-0.5 * np.log(ga_approx.tau) + 0.5 *
                       (ga_approx.v * ga_approx.v * 1. / ga_approx.tau)))

        assert (np.sum(
            np.array([
                m - m0,
                np.sum(d['dL_dK'] - d0['dL_dK']),
                np.sum(d['dL_dthetaL'] - d0['dL_dthetaL']),
                np.sum(d['dL_dm'] - d0['dL_dm']),
                np.sum(p._woodbury_vector - p0._woodbury_vector),
                np.sum(p.woodbury_inv - p0.woodbury_inv)
            ])) < 1e6)
示例#11
0
    def set_XY(self, X=None, Y=None):
        if isinstance(X, list):
            X, _, self.output_index = util.multioutput.build_XY(X, None)
        if isinstance(Y, list):
            _, Y, self.output_index = util.multioutput.build_XY(Y, Y)

        self.update_model(False)
        if Y is not None:
            if self.normalizer is not None:
                self.normalizer.scale_by(Y)
                self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
                self.Y = Y
            else:
                self.Y = ObsAr(Y)
                self.Y_normalized = self.Y
        if X is not None:
            self.X = ObsAr(X)

        self.Y_metadata = {
            "output_index": self.output_index,
            "trials": np.ones(self.output_index.shape),
        }

        self.update_model(True)
示例#12
0
    def test_inference_EP(self):
        from paramz import ObsAr
        X, Y = self.genData()
        lik = GPy.likelihoods.Bernoulli()
        k = GPy.kern.RBF(1, variance=7., lengthscale=0.2)
        inf = GPy.inference.latent_function_inference.expectation_propagation.EP(
            max_iters=30, delta=0.5)
        self.model = GPy.core.GP(X=X,
                                 Y=Y,
                                 kernel=k,
                                 inference_method=inf,
                                 likelihood=lik)
        K = self.model.kern.K(X)

        post_params, ga_approx, cav_params, log_Z_tilde = self.model.inference_method.expectation_propagation(
            K, ObsAr(Y), lik, None)

        mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
        p, m, d = self.model.inference_method._inference(Y,
                                                         K,
                                                         ga_approx,
                                                         cav_params,
                                                         lik,
                                                         Y_metadata=None,
                                                         Z_tilde=log_Z_tilde)
        p0, m0, d0 = super(
            GPy.inference.latent_function_inference.expectation_propagation.EP,
            inf).inference(
                k,
                X,
                lik,
                mu_tilde[:, None],
                mean_function=None,
                variance=1. / ga_approx.tau,
                K=K,
                Z_tilde=log_Z_tilde +
                np.sum(-0.5 * np.log(ga_approx.tau) + 0.5 *
                       (ga_approx.v * ga_approx.v * 1. / ga_approx.tau)))

        assert (np.sum(
            np.array([
                m - m0,
                np.sum(d['dL_dK'] - d0['dL_dK']),
                np.sum(d['dL_dthetaL'] - d0['dL_dthetaL']),
                np.sum(d['dL_dm'] - d0['dL_dm']),
                np.sum(p._woodbury_vector - p0._woodbury_vector),
                np.sum(p.woodbury_inv - p0.woodbury_inv)
            ])) < 1e6)
示例#13
0
    def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
        if self.always_reset:
            self.reset()

        num_data, output_dim = Y.shape
        assert output_dim == 1, "ep in 1D only (for now!)"

        if Lm is None:
            Kmm = kern.K(Z)
            Lm = jitchol(Kmm)

        if psi1 is None:
            try:
                Kmn = kern.K(Z, X)
            except TypeError:
                Kmn = kern.psi1(Z, X).T
        else:
            Kmn = psi1.T

        if self.ep_mode=="nested":
            #Force EP at each step of the optimization
            self._ep_approximation = None
            post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
        elif self.ep_mode=="alternated":
            if getattr(self, '_ep_approximation', None) is None:
                #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
                post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
            else:
                #if we've already run EP, just use the existing approximation stored in self._ep_approximation
                post_params, ga_approx, log_Z_tilde = self._ep_approximation
        else:
            raise ValueError("ep_mode value not valid")

        mu_tilde = ga_approx.v / ga_approx.tau.astype(float)

        return super(EPDTC, self).inference(kern, X, Z, likelihood, ObsAr(mu_tilde[:,None]),
                                            mean_function=mean_function,
                                            Y_metadata=Y_metadata,
                                            precision=ga_approx.tau,
                                            Lm=Lm, dL_dKmm=dL_dKmm,
                                            psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=log_Z_tilde)
示例#14
0
文件: gp.py 项目: uberstig/GPy
    def __init__(self,
                 X,
                 Y,
                 kernel,
                 likelihood,
                 mean_function=None,
                 inference_method=None,
                 name='gp',
                 Y_metadata=None,
                 normalizer=False):
        super(GP, self).__init__(name)

        assert X.ndim == 2
        if isinstance(X, (ObsAr, VariationalPosterior)):
            self.X = X.copy()
        else:
            self.X = ObsAr(X)

        self.num_data, self.input_dim = self.X.shape

        assert Y.ndim == 2
        logger.info("initializing Y")

        if normalizer is True:
            self.normalizer = Standardize()
        elif normalizer is False:
            self.normalizer = None
        else:
            self.normalizer = normalizer

        if self.normalizer is not None:
            self.normalizer.scale_by(Y)
            self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
            self.Y = Y
        elif isinstance(Y, np.ndarray):
            self.Y = ObsAr(Y)
            self.Y_normalized = self.Y
        else:
            self.Y = Y
            self.Y_normalized = self.Y

        if Y.shape[0] != self.num_data:
            #There can be cases where we want inputs than outputs, for example if we have multiple latent
            #function values
            warnings.warn("There are more rows in your input data X, \
                         than in your output data Y, be VERY sure this is what you want"
                          )
        _, self.output_dim = self.Y.shape

        assert ((Y_metadata is None) or isinstance(Y_metadata, dict))
        self.Y_metadata = Y_metadata

        assert isinstance(kernel, kern.Kern)
        #assert self.input_dim == kernel.input_dim
        self.kern = kernel

        assert isinstance(likelihood, likelihoods.Likelihood)
        self.likelihood = likelihood

        if self.kern._effective_input_dim != self.X.shape[1]:
            warnings.warn(
                "Your kernel has a different input dimension {} then the given X dimension {}. Be very sure this is what you want and you have not forgotten to set the right input dimenion in your kernel"
                .format(self.kern._effective_input_dim, self.X.shape[1]))

        #handle the mean function
        self.mean_function = mean_function
        if mean_function is not None:
            assert isinstance(self.mean_function, Mapping)
            assert mean_function.input_dim == self.input_dim
            assert mean_function.output_dim == self.output_dim
            self.link_parameter(mean_function)

        #find a sensible inference method
        logger.info("initializing inference method")
        if inference_method is None:
            if isinstance(likelihood, likelihoods.Gaussian) or isinstance(
                    likelihood, likelihoods.MixedNoise):
                inference_method = exact_gaussian_inference.ExactGaussianInference(
                )
            else:
                inference_method = expectation_propagation.EP()
                print("defaulting to " + str(inference_method) +
                      " for latent function inference")
        self.inference_method = inference_method

        logger.info("adding kernel and likelihood as parameters")
        self.link_parameter(self.kern)
        self.link_parameter(self.likelihood)
        self.posterior = None
示例#15
0
文件: gp.py 项目: uberstig/GPy
class GP(Model):
    """
    General purpose Gaussian process model

    :param X: input observations
    :param Y: output observations
    :param kernel: a GPy kernel, defaults to rbf+white
    :param likelihood: a GPy likelihood
    :param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP
    :rtype: model object
    :param Norm normalizer:
        normalize the outputs Y.
        Prediction will be un-normalized using this normalizer.
        If normalizer is True, we will normalize using Standardize.
        If normalizer is False, no normalization will be done.

    .. Note:: Multiple independent outputs are allowed using columns of Y


    """
    def __init__(self,
                 X,
                 Y,
                 kernel,
                 likelihood,
                 mean_function=None,
                 inference_method=None,
                 name='gp',
                 Y_metadata=None,
                 normalizer=False):
        super(GP, self).__init__(name)

        assert X.ndim == 2
        if isinstance(X, (ObsAr, VariationalPosterior)):
            self.X = X.copy()
        else:
            self.X = ObsAr(X)

        self.num_data, self.input_dim = self.X.shape

        assert Y.ndim == 2
        logger.info("initializing Y")

        if normalizer is True:
            self.normalizer = Standardize()
        elif normalizer is False:
            self.normalizer = None
        else:
            self.normalizer = normalizer

        if self.normalizer is not None:
            self.normalizer.scale_by(Y)
            self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
            self.Y = Y
        elif isinstance(Y, np.ndarray):
            self.Y = ObsAr(Y)
            self.Y_normalized = self.Y
        else:
            self.Y = Y
            self.Y_normalized = self.Y

        if Y.shape[0] != self.num_data:
            #There can be cases where we want inputs than outputs, for example if we have multiple latent
            #function values
            warnings.warn("There are more rows in your input data X, \
                         than in your output data Y, be VERY sure this is what you want"
                          )
        _, self.output_dim = self.Y.shape

        assert ((Y_metadata is None) or isinstance(Y_metadata, dict))
        self.Y_metadata = Y_metadata

        assert isinstance(kernel, kern.Kern)
        #assert self.input_dim == kernel.input_dim
        self.kern = kernel

        assert isinstance(likelihood, likelihoods.Likelihood)
        self.likelihood = likelihood

        if self.kern._effective_input_dim != self.X.shape[1]:
            warnings.warn(
                "Your kernel has a different input dimension {} then the given X dimension {}. Be very sure this is what you want and you have not forgotten to set the right input dimenion in your kernel"
                .format(self.kern._effective_input_dim, self.X.shape[1]))

        #handle the mean function
        self.mean_function = mean_function
        if mean_function is not None:
            assert isinstance(self.mean_function, Mapping)
            assert mean_function.input_dim == self.input_dim
            assert mean_function.output_dim == self.output_dim
            self.link_parameter(mean_function)

        #find a sensible inference method
        logger.info("initializing inference method")
        if inference_method is None:
            if isinstance(likelihood, likelihoods.Gaussian) or isinstance(
                    likelihood, likelihoods.MixedNoise):
                inference_method = exact_gaussian_inference.ExactGaussianInference(
                )
            else:
                inference_method = expectation_propagation.EP()
                print("defaulting to " + str(inference_method) +
                      " for latent function inference")
        self.inference_method = inference_method

        logger.info("adding kernel and likelihood as parameters")
        self.link_parameter(self.kern)
        self.link_parameter(self.likelihood)
        self.posterior = None

    def to_dict(self, save_data=True):
        """
        Convert the object into a json serializable dictionary.
        Note: It uses the private method _save_to_input_dict of the parent.

        :param boolean save_data: if true, it adds the training data self.X and self.Y to the dictionary
        :return dict: json serializable dictionary containing the needed information to instantiate the object
        """
        input_dict = super(GP, self)._save_to_input_dict()
        input_dict["class"] = "GPy.core.GP"
        if not save_data:
            input_dict["X"] = None
            input_dict["Y"] = None
        else:
            try:
                input_dict["X"] = self.X.values.tolist()
            except:
                input_dict["X"] = self.X.tolist()
            try:
                input_dict["Y"] = self.Y.values.tolist()
            except:
                input_dict["Y"] = self.Y.tolist()
        input_dict["kernel"] = self.kern.to_dict()
        input_dict["likelihood"] = self.likelihood.to_dict()
        if self.mean_function is not None:
            input_dict["mean_function"] = self.mean_function.to_dict()
        input_dict["inference_method"] = self.inference_method.to_dict()
        #FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
        if self.Y_metadata is not None:
            input_dict["Y_metadata"] = self.Y_metadata
        if self.normalizer is not None:
            input_dict["normalizer"] = self.normalizer.to_dict()
        return input_dict

    @staticmethod
    def _format_input_dict(input_dict, data=None):
        import GPy
        import numpy as np
        if (input_dict['X'] is None) or (input_dict['Y'] is None):
            assert (data is not None)
            input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(
                data[1])
        elif data is not None:
            warnings.warn(
                "WARNING: The model has been saved with X,Y! The original values are being overridden!"
            )
            input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(
                data[1])
        else:
            input_dict["X"], input_dict["Y"] = np.array(
                input_dict['X']), np.array(input_dict['Y'])
        input_dict["kernel"] = GPy.kern.Kern.from_dict(input_dict["kernel"])
        input_dict[
            "likelihood"] = GPy.likelihoods.likelihood.Likelihood.from_dict(
                input_dict["likelihood"])
        mean_function = input_dict.get("mean_function")
        if mean_function is not None:
            input_dict["mean_function"] = GPy.core.mapping.Mapping.from_dict(
                mean_function)
        else:
            input_dict["mean_function"] = mean_function
        input_dict[
            "inference_method"] = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(
                input_dict["inference_method"])

        #FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
        Y_metadata = input_dict.get("Y_metadata")
        input_dict["Y_metadata"] = Y_metadata

        normalizer = input_dict.get("normalizer")
        if normalizer is not None:
            input_dict["normalizer"] = GPy.util.normalizer._Norm.from_dict(
                normalizer)
        else:
            input_dict["normalizer"] = normalizer
        return input_dict

    @staticmethod
    def _build_from_input_dict(input_dict, data=None):
        input_dict = GP._format_input_dict(input_dict, data)
        return GP(**input_dict)

    def save_model(self, output_filename, compress=True, save_data=True):
        self._save_model(output_filename, compress=True, save_data=True)

    # The predictive variable to be used to predict using the posterior object's
    # woodbury_vector and woodbury_inv is defined as predictive_variable
    # as long as the posterior has the right woodbury entries.
    # It is the input variable used for the covariance between
    # X_star and the posterior of the GP.
    # This is usually just a link to self.X (full GP) or self.Z (sparse GP).
    # Make sure to name this variable and the predict functions will "just work"
    # In maths the predictive variable is:
    #         K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
    #         W_{pp} := \texttt{Woodbury inv}
    #         p := _predictive_variable

    @property
    def _predictive_variable(self):
        return self.X

    def set_XY(self, X=None, Y=None):
        """
        Set the input / output data of the model
        This is useful if we wish to change our existing data but maintain the same model

        :param X: input observations
        :type X: np.ndarray
        :param Y: output observations
        :type Y: np.ndarray
        """
        self.update_model(False)
        if Y is not None:
            if self.normalizer is not None:
                self.normalizer.scale_by(Y)
                self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
                self.Y = Y
            else:
                self.Y = ObsAr(Y)
                self.Y_normalized = self.Y
        if X is not None:
            if self.X in self.parameters:
                # LVM models
                if isinstance(self.X, VariationalPosterior):
                    assert isinstance(
                        X, type(self.X)
                    ), "The given X must have the same type as the X in the model!"
                    index = self.X._parent_index_
                    self.unlink_parameter(self.X)
                    self.X = X
                    self.link_parameter(self.X, index=index)
                else:
                    index = self.X._parent_index_
                    self.unlink_parameter(self.X)
                    from ..core import Param
                    self.X = Param('latent mean', X)
                    self.link_parameter(self.X, index=index)
            else:
                self.X = ObsAr(X)
        self.update_model(True)

    def set_X(self, X):
        """
        Set the input data of the model

        :param X: input observations
        :type X: np.ndarray
        """
        self.set_XY(X=X)

    def set_Y(self, Y):
        """
        Set the output data of the model

        :param X: output observations
        :type X: np.ndarray
        """
        self.set_XY(Y=Y)

    def parameters_changed(self):
        """
        Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
        In particular in the GP class this method re-performs inference, recalculating the posterior and log marginal likelihood and gradients of the model

        .. warning::
            This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
            this method yourself, there may be unexpected consequences.
        """
        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(
            self.kern, self.X, self.likelihood, self.Y_normalized,
            self.mean_function, self.Y_metadata)
        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
        if self.mean_function is not None:
            self.mean_function.update_gradients(self.grad_dict['dL_dm'],
                                                self.X)

    def log_likelihood(self):
        """
        The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
        """
        return self._log_marginal_likelihood

    def _raw_predict(self, Xnew, full_cov=False, kern=None):
        """
        For making predictions, does not account for normalization or likelihood

        full_cov is a boolean which defines whether the full covariance matrix
        of the prediction is computed. If full_cov is False (default), only the
        diagonal of the covariance is returned.

        .. math::
            p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
                        = N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
            \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
        """
        mu, var = self.posterior._raw_predict(
            kern=self.kern if kern is None else kern,
            Xnew=Xnew,
            pred_var=self._predictive_variable,
            full_cov=full_cov)
        if self.mean_function is not None:
            mu += self.mean_function.f(Xnew)
        return mu, var

    def predict(self,
                Xnew,
                full_cov=False,
                Y_metadata=None,
                kern=None,
                likelihood=None,
                include_likelihood=True):
        """
        Predict the function(s) at the new point(s) Xnew. This includes the
        likelihood variance added to the predicted underlying function
        (usually referred to as f).

        In order to predict without adding in the likelihood give
        `include_likelihood=False`, or refer to self.predict_noiseless().

        :param Xnew: The points at which to make a prediction
        :type Xnew: np.ndarray (Nnew x self.input_dim)
        :param full_cov: whether to return the full covariance matrix, or just
                         the diagonal
        :type full_cov: bool
        :param Y_metadata: metadata about the predicting point to pass to the
                           likelihood
        :param kern: The kernel to use for prediction (defaults to the model
                     kern). this is useful for examining e.g. subprocesses.
        :param include_likelihood: Whether or not to add likelihood noise to
                                   the predicted underlying latent function f.
        :type include_likelihood: bool

        :returns: (mean, var):
            mean: posterior mean, a Numpy array, Nnew x self.input_dim
            var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False,
                 Nnew x Nnew otherwise

            If full_cov and self.input_dim > 1, the return shape of var is
            Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return
            shape is Nnew x Nnew. This is to allow for different normalizations
            of the output dimensions.

        Note: If you want the predictive quantiles (e.g. 95% confidence
        interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
        """

        # Predict the latent function values
        mean, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)

        if include_likelihood:
            # now push through likelihood
            if likelihood is None:
                likelihood = self.likelihood
            mean, var = likelihood.predictive_values(mean,
                                                     var,
                                                     full_cov,
                                                     Y_metadata=Y_metadata)

        if self.normalizer is not None:
            mean = self.normalizer.inverse_mean(mean)

            # We need to create 3d array for the full covariance matrix with
            # multiple outputs.
            if full_cov & (mean.shape[1] > 1):
                var = self.normalizer.inverse_covariance(var)
            else:
                var = self.normalizer.inverse_variance(var)

        return mean, var

    def predict_noiseless(self,
                          Xnew,
                          full_cov=False,
                          Y_metadata=None,
                          kern=None):
        """
        Convenience function to predict the underlying function of the GP (often
        referred to as f) without adding the likelihood variance on the
        prediction function.

        This is most likely what you want to use for your predictions.

        :param Xnew: The points at which to make a prediction
        :type Xnew: np.ndarray (Nnew x self.input_dim)
        :param full_cov: whether to return the full covariance matrix, or just
                         the diagonal
        :type full_cov: bool
        :param Y_metadata: metadata about the predicting point to pass to the likelihood
        :param kern: The kernel to use for prediction (defaults to the model
                     kern). this is useful for examining e.g. subprocesses.

        :returns: (mean, var):
            mean: posterior mean, a Numpy array, Nnew x self.input_dim
            var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise

           If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
           This is to allow for different normalizations of the output dimensions.

        Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
        """
        return self.predict(Xnew, full_cov, Y_metadata, kern, None, False)

    def predict_quantiles(self,
                          X,
                          quantiles=(2.5, 97.5),
                          Y_metadata=None,
                          kern=None,
                          likelihood=None):
        """
        Get the predictive quantiles around the prediction at X

        :param X: The points at which to make a prediction
        :type X: np.ndarray (Xnew x self.input_dim)
        :param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
        :type quantiles: tuple
        :param kern: optional kernel to use for prediction
        :type predict_kw: dict
        :returns: list of quantiles for each X and predictive quantiles for interval combination
        :rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
        """
        m, v = self._raw_predict(X, full_cov=False, kern=kern)
        if likelihood is None:
            likelihood = self.likelihood

        quantiles = likelihood.predictive_quantiles(m,
                                                    v,
                                                    quantiles,
                                                    Y_metadata=Y_metadata)

        if self.normalizer is not None:
            quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
        return quantiles

    def predictive_gradients(self, Xnew, kern=None):
        """
        Compute the derivatives of the predicted latent function with respect
        to X*

        Given a set of points at which to predict X* (size [N*,Q]), compute the
        derivatives of the mean and variance. Resulting arrays are sized:
            dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP
            (usually one).

        Note that this is not the same as computing the mean and variance of
        the derivative of the function!

         dv_dX*  -- [N*, Q],    (since all outputs have the same variance)
        :param X: The points at which to get the predictive gradients
        :type X: np.ndarray (Xnew x self.input_dim)
        :returns: dmu_dX, dv_dX
        :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]

        """
        if kern is None:
            kern = self.kern
        mean_jac = np.empty((Xnew.shape[0], Xnew.shape[1], self.output_dim))

        for i in range(self.output_dim):
            mean_jac[:, :, i] = kern.gradients_X(
                self.posterior.woodbury_vector[:, i:i + 1].T, Xnew,
                self._predictive_variable)

        # Gradients wrt the diagonal part k_{xx}
        dv_dX = kern.gradients_X_diag(np.ones(Xnew.shape[0]), Xnew)

        # Grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
        if self.posterior.woodbury_inv.ndim == 3:
            var_jac = np.empty(dv_dX.shape +
                               (self.posterior.woodbury_inv.shape[2], ))
            var_jac[:] = dv_dX[:, :, None]
            for i in range(self.posterior.woodbury_inv.shape[2]):
                alpha = -2. * np.dot(kern.K(Xnew, self._predictive_variable),
                                     self.posterior.woodbury_inv[:, :, i])
                var_jac[:, :, i] += kern.gradients_X(alpha, Xnew,
                                                     self._predictive_variable)
        else:
            var_jac = dv_dX
            alpha = -2. * np.dot(kern.K(Xnew, self._predictive_variable),
                                 self.posterior.woodbury_inv)
            var_jac += kern.gradients_X(alpha, Xnew, self._predictive_variable)
        return mean_jac, var_jac

    def predict_jacobian(self, Xnew, kern=None, full_cov=False):
        """
        Compute the derivatives of the posterior of the GP.

        Given a set of points at which to predict X* (size [N*,Q]), compute the
        mean and variance of the derivative. Resulting arrays are sized:

         dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
          Note that this is the mean and variance of the derivative,
          not the derivative of the mean and variance! (See predictive_gradients for that)

         dv_dX*  -- [N*, Q],    (since all outputs have the same variance)
          If there is missing data, it is not implemented for now, but
          there will be one output variance per output dimension.

        :param X: The points at which to get the predictive gradients.
        :type X: np.ndarray (Xnew x self.input_dim)
        :param kern: The kernel to compute the jacobian for.
        :param boolean full_cov: whether to return the cross-covariance terms between
        the N* Jacobian vectors

        :returns: dmu_dX, dv_dX
        :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
        """
        if kern is None:
            kern = self.kern

        mean_jac = np.empty((Xnew.shape[0], Xnew.shape[1], self.output_dim))

        for i in range(self.output_dim):
            mean_jac[:, :, i] = kern.gradients_X(
                self.posterior.woodbury_vector[:, i:i + 1].T, Xnew,
                self._predictive_variable)

        dK_dXnew_full = np.empty(
            (self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1]))
        one = np.ones((1, 1))
        for i in range(self._predictive_variable.shape[0]):
            dK_dXnew_full[i] = kern.gradients_X(one, Xnew,
                                                self._predictive_variable[[i]])

        if full_cov:
            dK2_dXdX = kern.gradients_XX(one, Xnew)
        else:
            dK2_dXdX = kern.gradients_XX_diag(one, Xnew)
            #dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
            #for i in range(Xnew.shape[0]):
            #    dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])

        def compute_cov_inner(wi):
            if full_cov:
                var_jac = dK2_dXdX - np.einsum(
                    'qnm,msr->nsqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full
                )  # n,s = Xnew.shape[0], m = pred_var.shape[0]
            else:
                var_jac = dK2_dXdX - np.einsum(
                    'qnm,mnr->nqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
            return var_jac

        if self.posterior.woodbury_inv.ndim == 3:  # Missing data:
            if full_cov:
                var_jac = np.empty(
                    (Xnew.shape[0], Xnew.shape[0], Xnew.shape[1],
                     Xnew.shape[1], self.output_dim))
                for d in range(self.posterior.woodbury_inv.shape[2]):
                    var_jac[:, :, :, :, d] = compute_cov_inner(
                        self.posterior.woodbury_inv[:, :, d])
            else:
                var_jac = np.empty((Xnew.shape[0], Xnew.shape[1],
                                    Xnew.shape[1], self.output_dim))
                for d in range(self.posterior.woodbury_inv.shape[2]):
                    var_jac[:, :, :, d] = compute_cov_inner(
                        self.posterior.woodbury_inv[:, :, d])
        else:
            var_jac = compute_cov_inner(self.posterior.woodbury_inv)
        return mean_jac, var_jac

    def predict_wishart_embedding(self,
                                  Xnew,
                                  kern=None,
                                  mean=True,
                                  covariance=True):
        """
        Predict the wishart embedding G of the GP. This is the density of the
        input of the GP defined by the probabilistic function mapping f.
        G = J_mean.T*J_mean + output_dim*J_cov.

        :param array-like Xnew: The points at which to evaluate the magnification.
        :param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification.

        Supplying only a part of the learning kernel gives insights into the density
        of the specific kernel part of the input function. E.g. one can see how dense the
        linear part of a kernel is compared to the non-linear part etc.
        """
        if kern is None:
            kern = self.kern

        mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
        mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
        Sigma = np.zeros(mumuT.shape)
        if var_jac.ndim == 4:  # Missing data
            Sigma = var_jac.sum(-1)
        else:
            Sigma = self.output_dim * var_jac

        G = 0.
        if mean:
            G += mumuT
        if covariance:
            G += Sigma
        return G

    def predict_wishard_embedding(self,
                                  Xnew,
                                  kern=None,
                                  mean=True,
                                  covariance=True):
        warnings.warn(
            "Wrong naming, use predict_wishart_embedding instead. Will be removed in future versions!",
            DeprecationWarning)
        return self.predict_wishart_embedding(Xnew, kern, mean, covariance)

    def predict_magnification(self,
                              Xnew,
                              kern=None,
                              mean=True,
                              covariance=True,
                              dimensions=None):
        """
        Predict the magnification factor as

        sqrt(det(G))

        for each point N in Xnew.

        :param bool mean: whether to include the mean of the wishart embedding.
        :param bool covariance: whether to include the covariance of the wishart embedding.
        :param array-like dimensions: which dimensions of the input space to use [defaults to self.get_most_significant_input_dimensions()[:2]]
        """
        G = self.predict_wishart_embedding(Xnew, kern, mean, covariance)
        if dimensions is None:
            dimensions = self.get_most_significant_input_dimensions()[:2]
        G = G[:, dimensions][:, :, dimensions]
        from ..util.linalg import jitchol
        mag = np.empty(Xnew.shape[0])
        for n in range(Xnew.shape[0]):
            try:
                mag[n] = np.sqrt(
                    np.exp(2 * np.sum(np.log(np.diag(jitchol(G[n, :, :]))))))
            except:
                mag[n] = np.sqrt(np.linalg.det(G[n, :, :]))
        return mag

    def posterior_samples_f(self, X, size=10, full_cov=True, **predict_kwargs):
        """
        Samples the posterior GP at the points X.

        :param X: The points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim)
        :param size: the number of a posteriori samples.
        :type size: int.
        :param full_cov: whether to return the full covariance matrix, or just the diagonal.
        :type full_cov: bool.
        :returns: fsim: set of simulations
        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
        """
        m, v = self._raw_predict(X, full_cov=full_cov, **predict_kwargs)
        if self.normalizer is not None:
            m, v = self.normalizer.inverse_mean(
                m), self.normalizer.inverse_variance(v)

        def sim_one_dim(m, v):
            if not full_cov:
                return np.random.multivariate_normal(m.flatten(),
                                                     np.diag(v.flatten()),
                                                     size).T
            else:
                return np.random.multivariate_normal(m.flatten(), v, size).T

        if self.output_dim == 1:
            return sim_one_dim(m, v)
        else:
            fsim = np.empty((self.output_dim, X.shape[0], size))
            for d in range(self.output_dim):
                if full_cov and v.ndim == 3:
                    fsim[d] = sim_one_dim(m[:, d], v[:, :, d])
                elif (not full_cov) and v.ndim == 2:
                    fsim[d] = sim_one_dim(m[:, d], v[:, d])
                else:
                    fsim[d] = sim_one_dim(m[:, d], v)
        return fsim

    def posterior_samples(self,
                          X,
                          size=10,
                          full_cov=False,
                          Y_metadata=None,
                          likelihood=None,
                          **predict_kwargs):
        """
        Samples the posterior GP at the points X.

        :param X: the points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim.)
        :param size: the number of a posteriori samples.
        :type size: int.
        :param full_cov: whether to return the full covariance matrix, or just the diagonal.
        :type full_cov: bool.
        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
        :type noise_model: integer.
        :returns: Ysim: set of simulations,
        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
        """
        fsim = self.posterior_samples_f(X,
                                        size,
                                        full_cov=full_cov,
                                        **predict_kwargs)
        if likelihood is None:
            likelihood = self.likelihood
        if fsim.ndim == 3:
            for d in range(fsim.shape[0]):
                fsim[d] = likelihood.samples(fsim[d], Y_metadata=Y_metadata)
        else:
            fsim = likelihood.samples(fsim, Y_metadata=Y_metadata)
        return fsim

    def input_sensitivity(self, summarize=True):
        """
        Returns the sensitivity for each dimension of this model
        """
        return self.kern.input_sensitivity(summarize=summarize)

    def get_most_significant_input_dimensions(self, which_indices=None):
        return self.kern.get_most_significant_input_dimensions(which_indices)

    def optimize(self,
                 optimizer=None,
                 start=None,
                 messages=False,
                 max_iters=1000,
                 ipython_notebook=True,
                 clear_after_finish=False,
                 **kwargs):
        """
        Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
        kwargs are passed to the optimizer. They can be:

        :param max_iters: maximum number of function evaluations
        :type max_iters: int
        :messages: whether to display during optimisation
        :type messages: bool
        :param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
        :type optimizer: string
        :param bool ipython_notebook: whether to use ipython notebook widgets or not.
        :param bool clear_after_finish: if in ipython notebook, we can clear the widgets after optimization.
        """
        self.inference_method.on_optimization_start()
        try:
            ret = super(GP, self).optimize(optimizer, start, messages,
                                           max_iters, ipython_notebook,
                                           clear_after_finish, **kwargs)
        except KeyboardInterrupt:
            print(
                "KeyboardInterrupt caught, calling on_optimization_end() to round things up"
            )
            self.inference_method.on_optimization_end()
            raise
        return ret

    def infer_newX(self, Y_new, optimize=True):
        """
        Infer X for the new observed data *Y_new*.

        :param Y_new: the new observed data for inference
        :type Y_new: numpy.ndarray
        :param optimize: whether to optimize the location of new X (True by default)
        :type optimize: boolean
        :return: a tuple containing the posterior estimation of X and the model that optimize X
        :rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` and numpy.ndarray, :class:`~GPy.core.model.Model`)
        """
        from ..inference.latent_function_inference.inferenceX import infer_newX
        return infer_newX(self, Y_new, optimize=optimize)

    def log_predictive_density(self, x_test, y_test, Y_metadata=None):
        """
        Calculation of the log predictive density

        .. math:
            p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})

        :param x_test: test locations (x_{*})
        :type x_test: (Nx1) array
        :param y_test: test observations (y_{*})
        :type y_test: (Nx1) array
        :param Y_metadata: metadata associated with the test points
        """
        mu_star, var_star = self._raw_predict(x_test)
        return self.likelihood.log_predictive_density(y_test,
                                                      mu_star,
                                                      var_star,
                                                      Y_metadata=Y_metadata)

    def log_predictive_density_sampling(self,
                                        x_test,
                                        y_test,
                                        Y_metadata=None,
                                        num_samples=1000):
        """
        Calculation of the log predictive density by sampling

        .. math:
            p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})

        :param x_test: test locations (x_{*})
        :type x_test: (Nx1) array
        :param y_test: test observations (y_{*})
        :type y_test: (Nx1) array
        :param Y_metadata: metadata associated with the test points
        :param num_samples: number of samples to use in monte carlo integration
        :type num_samples: int
        """
        mu_star, var_star = self._raw_predict(x_test)
        return self.likelihood.log_predictive_density_sampling(
            y_test,
            mu_star,
            var_star,
            Y_metadata=Y_metadata,
            num_samples=num_samples)

    def posterior_covariance_between_points(self, X1, X2):
        """
        Computes the posterior covariance between points.

        :param X1: some input observations
        :param X2: other input observations
        """
        return self.posterior.covariance_between_points(
            self.kern, self.X, X1, X2)
示例#16
0
文件: mrd.py 项目: zcmail/GPy
    def __init__(self, Ylist, input_dim, X=None, X_variance=None,
                 initx = 'PCA', initz = 'permute',
                 num_inducing=10, Z=None, kernel=None,
                 inference_method=None, likelihoods=None, name='mrd',
                 Ynames=None, normalizer=False, stochastic=False, batchsize=10):

        self.logger = logging.getLogger(self.__class__.__name__)
        self.num_inducing = num_inducing

        if isinstance(Ylist, dict):
            Ynames, Ylist = zip(*Ylist.items())

        self.logger.debug("creating observable arrays")
        self.Ylist = [ObsAr(Y) for Y in Ylist]
        #The next line is a fix for Python 3. It replicates the python 2 behaviour from the above comprehension
        Y = Ylist[-1]

        if Ynames is None:
            self.logger.debug("creating Ynames")
            Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
        self.names = Ynames
        assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict"

        if inference_method is None:
            self.inference_method = InferenceMethodList([VarDTC() for _ in range(len(self.Ylist))])
        else:
            assert isinstance(inference_method, InferenceMethodList), "please provide one inference method per Y in the list and provide it as InferenceMethodList, inference_method given: {}".format(inference_method)
            self.inference_method = inference_method

        if X is None:
            X, fracs = self._init_X(input_dim, initx, Ylist)
        else:
            fracs = [X.var(0)]*len(Ylist)

        Z = self._init_Z(initz, X, input_dim)
        self.Z = Param('inducing inputs', Z)
        self.num_inducing = self.Z.shape[0] # ensure M==N if M>N

        # sort out the kernels
        self.logger.info("building kernels")
        if kernel is None:
            from ..kern import RBF
            kernels = [RBF(input_dim, ARD=1, lengthscale=1./fracs[i]) for i in range(len(Ylist))]
        elif isinstance(kernel, Kern):
            kernels = []
            for i in range(len(Ylist)):
                k = kernel.copy()
                kernels.append(k)
        else:
            assert len(kernel) == len(Ylist), "need one kernel per output"
            assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
            kernels = kernel

        self.variational_prior = NormalPrior()
        #self.X = NormalPosterior(X, X_variance)

        if likelihoods is None:
            likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
        else: likelihoods = likelihoods

        self.logger.info("adding X and Z")
        super(MRD, self).__init__(Y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,
                 Z=self.Z, kernel=None, inference_method=self.inference_method, likelihood=Gaussian(),
                 name='manifold relevance determination', normalizer=None,
                 missing_data=False, stochastic=False, batchsize=1)

        self._log_marginal_likelihood = 0

        self.unlink_parameter(self.likelihood)
        self.unlink_parameter(self.kern)

        if isinstance(batchsize, int):
            batchsize = itertools.repeat(batchsize)

        self.bgplvms = []

        for i, n, k, l, Y, im, bs in zip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
            assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
            md = np.isnan(Y).any()
            spgp = BayesianGPLVMMiniBatch(Y, input_dim, X, X_variance,
                                          Z=Z, kernel=k, likelihood=l,
                                          inference_method=im, name=n,
                                          normalizer=normalizer,
                                          missing_data=md,
                                          stochastic=stochastic,
                                          batchsize=bs)
            spgp.kl_factr = 1./len(Ynames)
            spgp.unlink_parameter(spgp.Z)
            spgp.unlink_parameter(spgp.X)
            del spgp.Z
            del spgp.X
            spgp.Z = self.Z
            spgp.X = self.X
            self.link_parameter(spgp, i+2)
            self.bgplvms.append(spgp)

        b = self.bgplvms[0]
        self.posterior = b.posterior
        self.kern = b.kern
        self.likelihood = b.likelihood

        self.logger.info("init done")
示例#17
0
    def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):

        num_data, output_dim = Y.shape
        assert output_dim == 1, "This EP methods only works for 1D outputs"

        # Makes computing the sign quicker if we work with numpy arrays rather
        # than ObsArrays
        Y = Y.values.copy()

        #Initial values - Marginal moments
        Z_hat = np.zeros(num_data,dtype=np.float64)
        mu_hat = np.zeros(num_data,dtype=np.float64)
        sigma2_hat = np.zeros(num_data,dtype=np.float64)

        tau_cav = np.empty(num_data,dtype=np.float64)
        v_cav = np.empty(num_data,dtype=np.float64)

        #initial values - Gaussian factors
        #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
        LLT0 = Kmm.copy()
        Lm = jitchol(LLT0) #K_m = L_m L_m^\top
        Vm,info = dtrtrs(Lm,Kmn,lower=1)
        # Lmi = dtrtri(Lm)
        # Kmmi = np.dot(Lmi.T,Lmi)
        # KmmiKmn = np.dot(Kmmi,Kmn)
        # Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
        Qnn_diag = np.sum(Vm*Vm,-2) #diag(Knm Kmm^(-1) Kmn)
        #diag.add(LLT0, 1e-8)
        if self.old_mutilde is None:
            #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
            LLT = LLT0.copy() #Sigma = K.copy()
            mu = np.zeros(num_data)
            Sigma_diag = Qnn_diag.copy() + 1e-8
            tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
        else:
            assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
            mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
            tau_tilde = v_tilde/mu_tilde
            mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde)
            Sigma_diag += 1e-8
            # TODO: Check the log-marginal under both conditions and choose the best one

        #Approximation
        tau_diff = self.epsilon + 1.
        v_diff = self.epsilon + 1.
        tau_tilde_old = np.nan
        v_tilde_old = np.nan
        iterations = 0
        while  ((tau_diff > self.epsilon) or (v_diff > self.epsilon)) and (iterations < self.max_iters):
            update_order = np.random.permutation(num_data)
            for i in update_order:
                #Cavity distribution parameters
                tau_cav[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
                v_cav[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
                if Y_metadata is not None:
                    # Pick out the relavent metadata for Yi
                    Y_metadata_i = {}
                    for key in list(Y_metadata.keys()):
                        Y_metadata_i[key] = Y_metadata[key][i, :]
                else:
                    Y_metadata_i = None

                #Marginal moments
                Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i)
                #Site parameters update
                delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
                delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
                tau_tilde_prev = tau_tilde[i]
                tau_tilde[i] += delta_tau

                # Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
                # to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
                # update the marginal likelihood without inestability issues.
                if tau_tilde[i] < np.finfo(float).eps:
                    tau_tilde[i] = np.finfo(float).eps
                    delta_tau = tau_tilde[i] - tau_tilde_prev
                v_tilde[i] += delta_v

                #Posterior distribution parameters update
                if self.parallel_updates == False:
                    #DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
                    DSYR(LLT,Kmn[:,i].copy(),delta_tau)
                    L = jitchol(LLT)
                    V,info = dtrtrs(L,Kmn,lower=1)
                    Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps)  #diag(K_nm (L L^\top)^(-1)) K_mn
                    si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
                    mu += (delta_v-delta_tau*mu[i])*si
                    #mu = np.dot(Sigma, v_tilde)

            #(re) compute Sigma, Sigma_diag and mu using full Cholesky decompy
            mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde)
            Sigma_diag = np.maximum(Sigma_diag, np.finfo(float).eps)

            #monitor convergence
            if iterations>0:
                tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
                v_diff = np.mean(np.square(v_tilde-v_tilde_old))
            tau_tilde_old = tau_tilde.copy()
            v_tilde_old = v_tilde.copy()
            iterations += 1

        mu_tilde = v_tilde/tau_tilde
        mu_cav = v_cav/tau_cav
        sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde

        log_Z_tilde = (np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
                         + 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))

        self.old_mutilde = mu_tilde
        self.old_vtilde = v_tilde

        return mu, Sigma_diag, ObsAr(mu_tilde[:,None]), tau_tilde, log_Z_tilde
示例#18
0
    def __init__(self,
                 X,
                 Y,
                 Z,
                 kernels,
                 name='gp_msgp',
                 interpolation_method=None,
                 grid_dims=None,
                 normalize=False):
        super(GPMSGP, self).__init__(name)

        self.X = ObsAr(X)  #Not sure what Obsar

        if grid_dims is None:
            dims = [None] * len(Z)
            max_dim_ii = 0
            for ii in range(len(Z)):
                dims[ii] = np.arange(max_dim_ii,
                                     max_dim_ii + np.shape(Z[ii])[1])
                max_dim_ii = dims[ii - 1][-1] + 1
            grid_dims = dims
        else:
            grid_dims_to_create_id = []
            grid_dims_create = []
            grid_create_args = []
            n_grid_dims = len(grid_dims)
            for ii in range(n_grid_dims):
                if isinstance(Z[ii], dict):
                    grid_dims_to_create_id.append(ii)
                    grid_dims_create.append(grid_dims[ii])
                    grid_create_args.append(Z[ii])

            if len(grid_dims_to_create_id) > 1:
                Z_create = self.create_grid(grid_create_args,
                                            grid_dims=grid_dims_create)

                for ii in range(len(grid_dims_to_create_id)):
                    Z[grid_dims_to_create_id[ii]] = Z_create[ii]

        self.Z = Z
        self.input_grid_dims = grid_dims
        """
        if isinstance(Z,dict): #automatically create the grid
            Z,self.input_grid_dims = self.create_grid(Z,grid_dims = grid_dims)
            self.Z = Z
        else:
            
            self.input_grid_dims = grid_dims
            self.Z = Z
        """

        if normalize:
            with_mean = True
            with_std = True
        else:
            with_mean = False
            with_std = False

        self.normalizer = StandardScaler(with_mean=with_mean,
                                         with_std=with_std)
        self.X = self.normalizer.fit_transform(self.X)
        self.Z_normalizers = [None] * len(self.Z)

        self.Z_normalizers = [
            StandardScaler(with_mean=with_mean, with_std=with_std).fit(X_z)
            for X_z in self.Z
        ]
        self.Z = [
            self.Z_normalizers[ii].transform(self.Z[ii])
            for ii in range(len(self.Z))
        ]

        self.num_data, self.input_dim = self.X.shape

        assert Y.ndim == 2

        self.Y = ObsAr(Y)

        self.Y_metadata = None  #TO-DO: do we even need this?

        assert np.shape(Y)[0] == self.num_data

        _, self.output_dim = self.Y.shape

        #check if kernels is a list or just a single kernel
        #and then check if every object in list is a kernel

        try:
            for kernel in kernels:
                assert isinstance(kernel, Kern)

        except TypeError:
            assert isinstance(kernels, Kern)
            kernels = list([kernels])

        self.inference_method = GridGaussianInference()

        self.likelihood = likelihoods.Gaussian()  #TO-DO: do we even need this?

        self.kern = KernGrid(kernels,
                             self.likelihood,
                             self.input_grid_dims,
                             interpolation_method=interpolation_method)

        self.mean_function = Constant(self.input_dim, self.output_dim)
        self.kern.update_Z(Z)
        ##for test set n_neighbors = 4
        self.kern.init_interpolation_method(n_neighbors=8)
        self.kern.update_X_Y(X, Y)

        ## register the parameters for optimization (paramz)
        self.link_parameter(self.kern)
        self.link_parameter(self.likelihood)

        ## need to do this in the case that someone wants to do prediction without/before
        ## hyperparameter optimization
        self.parameters_changed()
        self.posterior_prediction = self.inference_method.update_prediction_vectors(
            self.kern, self.posterior, self.grad_dict, self.likelihood)
示例#19
0
    def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):
        num_data, output_dim = Y.shape
        assert output_dim == 1, "This EP methods only works for 1D outputs"

        LLT0 = Kmm.copy()
        #diag.add(LLT0, 1e-8)

        Lm = jitchol(LLT0)
        Lmi = dtrtri(Lm)
        Kmmi = np.dot(Lmi.T,Lmi)
        KmmiKmn = np.dot(Kmmi,Kmn)
        Qnn_diag = np.sum(Kmn*KmmiKmn,-2)

        #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
        mu = np.zeros(num_data)
        LLT = Kmm.copy() #Sigma = K.copy()
        Sigma_diag = Qnn_diag.copy() + 1e-8

        #Initial values - Marginal moments
        Z_hat = np.zeros(num_data,dtype=np.float64)
        mu_hat = np.zeros(num_data,dtype=np.float64)
        sigma2_hat = np.zeros(num_data,dtype=np.float64)

        tau_cav = np.empty(num_data,dtype=np.float64)
        v_cav = np.empty(num_data,dtype=np.float64)

        #initial values - Gaussian factors
        if self.old_mutilde is None:
            tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
        else:
            assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
            mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
            tau_tilde = v_tilde/mu_tilde

        #Approximation
        tau_diff = self.epsilon + 1.
        v_diff = self.epsilon + 1.
        iterations = 0
        tau_tilde_old = 0.
        v_tilde_old = 0.
        update_order = np.random.permutation(num_data)

        while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
            for i in update_order:
                #Cavity distribution parameters
                tau_cav[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
                v_cav[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
                #Marginal moments
                Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i])#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
                #Site parameters update
                delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
                delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
                tau_tilde[i] += delta_tau
                v_tilde[i] += delta_v
                #Posterior distribution parameters update

                #DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
                DSYR(LLT,Kmn[:,i].copy(),delta_tau)
                L = jitchol(LLT+np.eye(LLT.shape[0])*1e-7)

                V,info = dtrtrs(L,Kmn,lower=1)
                Sigma_diag = np.sum(V*V,-2)
                si = np.sum(V.T*V[:,i],-1)
                mu += (delta_v-delta_tau*mu[i])*si
                #mu = np.dot(Sigma, v_tilde)

            #(re) compute Sigma and mu using full Cholesky decompy
            LLT = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
            #diag.add(LLT, 1e-8)
            L = jitchol(LLT)
            V, _ = dtrtrs(L,Kmn,lower=1)
            V2, _ = dtrtrs(L.T,V,lower=0)
            #Sigma_diag = np.sum(V*V,-2)
            #Knmv_tilde = np.dot(Kmn,v_tilde)
            #mu = np.dot(V2.T,Knmv_tilde)
            Sigma = np.dot(V2.T,V2)
            mu = np.dot(Sigma,v_tilde)

            #monitor convergence
            #if iterations>0:
            tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
            v_diff = np.mean(np.square(v_tilde-v_tilde_old))

            tau_tilde_old = tau_tilde.copy()
            v_tilde_old = v_tilde.copy()

            # Only to while loop once:?
            tau_diff = 0
            v_diff = 0
            iterations += 1

        mu_tilde = v_tilde/tau_tilde
        mu_cav = v_cav/tau_cav
        sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
        Z_tilde = np.exp(np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
                         + 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))
        return mu, Sigma, ObsAr(mu_tilde[:,None]), tau_tilde, Z_tilde