def set_XY(self, X=None, Y=None): """ Set the input / output data of the model This is useful if we wish to change our existing data but maintain the same model :param X: input observations :type X: np.ndarray :param Y: output observations :type Y: np.ndarray """ self.update_model(False) if Y is not None: if self.normalizer is not None: self.normalizer.scale_by(Y) self.Y_normalized = ObsAr(self.normalizer.normalize(Y)) self.Y = Y else: self.Y = ObsAr(Y) self.Y_normalized = self.Y if X is not None: if self.X in self.parameters: # LVM models if isinstance(self.X, VariationalPosterior): assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!" self.unlink_parameter(self.X) self.X = X self.link_parameter(self.X) else: self.unlink_parameter(self.X) from ..core import Param self.X = Param('latent mean',X) self.link_parameter(self.X) else: self.X = ObsAr(X) self.update_model(True)
def set_XY(self, X=None, Y=None): # print("set_XY: ") # print("X.shape: ",X.shape) # print("Y.shape: ",Y.shape) """ Set the input / output data of the model This is useful if we wish to change our existing data but maintain the same model :param X: input observations :type X: np.ndarray :param Y: output observations :type Y: np.ndarray """ X_list = [] Y_list = [] for i in np.arange(Y.shape[1]): X_list.append(X.copy()) Y_list.append(np.atleast_2d(Y[:, i]).T) # print("len(X_list): ",len(X_list)) # print("len(Y_list): ",len(Y_list)) X, Y, self.output_index = util.multioutput.build_XY(X_list, Y_list) self.Y_metadata = {'output_index': self.output_index} # print("after build_XY: ") # print("X.shape: ",X.shape) # print("Y.shape: ",Y.shape) # print("self.output_index: ",self.output_index) self.update_model(False) if Y is not None: if self.normalizer is not None: self.normalizer.scale_by(Y) self.Y_normalized = ObsAr(self.normalizer.normalize(Y)) self.Y = Y else: self.Y = ObsAr(Y) self.Y_normalized = self.Y if X is not None: if self.X in self.parameters: # LVM models if isinstance(self.X, VariationalPosterior): assert isinstance( X, type(self.X) ), "The given X must have the same type as the X in the model!" index = self.X._parent_index_ self.unlink_parameter(self.X) self.X = X self.link_parameter(self.X, index=index) else: index = self.X._parent_index_ self.unlink_parameter(self.X) from ..core import Param self.X = Param('latent mean', X) self.link_parameter(self.X, index=index) else: self.X = ObsAr(X) self.update_model(True)
def set_XY(self, X=None, Y=None): self.update_model(False) if Y is not None: if self.normalizer is not None: self.normalizer.scale_by(Y) self.Y_normalized = ObsAr(self.normalizer.normalize(Y)) self.Y = Y else: self.Y = ObsAr(Y) self.Y_normalized = self.Y if X is not None: self.X_untransformed = ObsAr(X) self.update_model(True)
def set_XY_group(self, X=None, Y=None, A=None): """ Set the input / output data of the model This is useful if we wish to change our existing data but maintain the same model # NOTE: this only provides update X,Y,A, but does not provides sequential updates. The input should be ALL previous data points, instead of only current round's data point. :param X: input observations :type X: np.ndarray :param Y: output observations :type Y: np.ndarray """ self.update_model(False) if Y is not None: if self.normalizer is not None: self.normalizer.scale_by(Y) self.Y_normalized = ObsAr(self.normalizer.normalize(Y)) self.Y = Y else: self.Y = ObsAr(Y) self.Y_normalized = self.Y if X is not None: if self.X in self.parameters: # LVM models if isinstance(self.X, VariationalPosterior): assert isinstance( X, type(self.X) ), "The given X must have the same type as the X in the model!" index = self.X._parent_index_ self.unlink_parameter(self.X) self.X = X self.link_parameter(self.X, index=index) else: index = self.X._parent_index_ self.unlink_parameter(self.X) from ..core import Param self.X = Param('latent mean', X) self.link_parameter(self.X, index=index) else: self.X = ObsAr(X) # add update to A if A is not None: self.A = A self.update_model(True)
def set_Y(self, Y): """ Set the output data of the model :param Y: output observations :type Y: np.ndarray or ObsArray """ assert isinstance(Y, (np.ndarray, ObsAr)) state = self.update_model() self.update_model(False) if self.normalizer is not None: self.normalizer.scale_by(Y) self.Y_normalized = ObsAr(self.normalizer.normalize(Y)) self.Y = Y else: self.Y = ObsAr(Y) if isinstance(Y, np.ndarray) else Y self.Y_normalized = self.Y self.update_model(state)
def comp_K(self, Z, qX): if self.Xs is None or self.Xs.shape != qX.mean.shape: from paramz import ObsAr self.Xs = ObsAr(np.empty((self.degree, ) + qX.mean.shape)) mu, S = qX.mean.values, qX.variance.values S_sq = np.sqrt(S) for i in range(self.degree): self.Xs[i] = self.locs[i] * S_sq + mu return self.Xs
def set_XY(self, X=None, Y=None): if isinstance(X, list): X, _, self.output_index = util.multioutput.build_XY(X, None) if isinstance(Y, list): _, Y, self.output_index = util.multioutput.build_XY(Y, Y) self.update_model(False) if Y is not None: self.Y = ObsAr(Y) self.Y_normalized = self.Y if X is not None: self.X = ObsAr(X) self.Y_metadata = { 'output_index': self.output_index, 'trials': np.ones(self.output_index.shape) } if isinstance(self.inference_method, expectation_propagation.EP): self.inference_method.reset() self.update_model(True)
def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'): Model.__init__(self, name=name) # accept the construction arguments self.X1 = ObsAr(X1) self.X2 = ObsAr(X2) self.Y = Y self.kern1, self.kern2 = kern1, kern2 self.link_parameter(self.kern1) self.link_parameter(self.kern2) self.likelihood = likelihoods.Gaussian() self.likelihood.variance = noise_var self.link_parameter(self.likelihood) self.num_data1, self.input_dim1 = self.X1.shape self.num_data2, self.input_dim2 = self.X2.shape assert kern1.input_dim == self.input_dim1 assert kern2.input_dim == self.input_dim2 assert Y.shape == (self.num_data1, self.num_data2)
def set_X(self, X): """ Set the input data of the model :param X: input observations :type X: np.ndarray """ assert isinstance(X, np.ndarray) state = self.update_model() self.update_model(False) self.X = ObsAr(X) self.update_model(state)
def test_inference_EP_non_classification(self): from paramz import ObsAr X, Y, Y_extra_noisy = self.genNoisyData() deg_freedom = 5. init_noise_var = 0.08 lik_studentT = GPy.likelihoods.StudentT(deg_free=deg_freedom, sigma2=init_noise_var) # like_gaussian_noise = GPy.likelihoods.MixedNoise() k = GPy.kern.RBF(1, variance=2., lengthscale=1.1) ep_inf_alt = GPy.inference.latent_function_inference.expectation_propagation.EP( max_iters=4, delta=0.5) # ep_inf_nested = GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode='nested', max_iters=100, delta=0.5) m = GPy.core.GP(X=X, Y=Y_extra_noisy, kernel=k, likelihood=lik_studentT, inference_method=ep_inf_alt) K = m.kern.K(X) post_params, ga_approx, cav_params, log_Z_tilde = m.inference_method.expectation_propagation( K, ObsAr(Y_extra_noisy), lik_studentT, None) mu_tilde = ga_approx.v / ga_approx.tau.astype(float) p, m, d = m.inference_method._inference(Y_extra_noisy, K, ga_approx, cav_params, lik_studentT, Y_metadata=None, Z_tilde=log_Z_tilde) p0, m0, d0 = super( GPy.inference.latent_function_inference.expectation_propagation.EP, ep_inf_alt).inference( k, X, lik_studentT, mu_tilde[:, None], mean_function=None, variance=1. / ga_approx.tau, K=K, Z_tilde=log_Z_tilde + np.sum(-0.5 * np.log(ga_approx.tau) + 0.5 * (ga_approx.v * ga_approx.v * 1. / ga_approx.tau))) assert (np.sum( np.array([ m - m0, np.sum(d['dL_dK'] - d0['dL_dK']), np.sum(d['dL_dthetaL'] - d0['dL_dthetaL']), np.sum(d['dL_dm'] - d0['dL_dm']), np.sum(p._woodbury_vector - p0._woodbury_vector), np.sum(p.woodbury_inv - p0.woodbury_inv) ])) < 1e6)
def set_XY(self, X=None, Y=None): if isinstance(X, list): X, _, self.output_index = util.multioutput.build_XY(X, None) if isinstance(Y, list): _, Y, self.output_index = util.multioutput.build_XY(Y, Y) self.update_model(False) if Y is not None: if self.normalizer is not None: self.normalizer.scale_by(Y) self.Y_normalized = ObsAr(self.normalizer.normalize(Y)) self.Y = Y else: self.Y = ObsAr(Y) self.Y_normalized = self.Y if X is not None: self.X = ObsAr(X) self.Y_metadata = { "output_index": self.output_index, "trials": np.ones(self.output_index.shape), } self.update_model(True)
def test_inference_EP(self): from paramz import ObsAr X, Y = self.genData() lik = GPy.likelihoods.Bernoulli() k = GPy.kern.RBF(1, variance=7., lengthscale=0.2) inf = GPy.inference.latent_function_inference.expectation_propagation.EP( max_iters=30, delta=0.5) self.model = GPy.core.GP(X=X, Y=Y, kernel=k, inference_method=inf, likelihood=lik) K = self.model.kern.K(X) post_params, ga_approx, cav_params, log_Z_tilde = self.model.inference_method.expectation_propagation( K, ObsAr(Y), lik, None) mu_tilde = ga_approx.v / ga_approx.tau.astype(float) p, m, d = self.model.inference_method._inference(Y, K, ga_approx, cav_params, lik, Y_metadata=None, Z_tilde=log_Z_tilde) p0, m0, d0 = super( GPy.inference.latent_function_inference.expectation_propagation.EP, inf).inference( k, X, lik, mu_tilde[:, None], mean_function=None, variance=1. / ga_approx.tau, K=K, Z_tilde=log_Z_tilde + np.sum(-0.5 * np.log(ga_approx.tau) + 0.5 * (ga_approx.v * ga_approx.v * 1. / ga_approx.tau))) assert (np.sum( np.array([ m - m0, np.sum(d['dL_dK'] - d0['dL_dK']), np.sum(d['dL_dthetaL'] - d0['dL_dthetaL']), np.sum(d['dL_dm'] - d0['dL_dm']), np.sum(p._woodbury_vector - p0._woodbury_vector), np.sum(p.woodbury_inv - p0.woodbury_inv) ])) < 1e6)
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None): if self.always_reset: self.reset() num_data, output_dim = Y.shape assert output_dim == 1, "ep in 1D only (for now!)" if Lm is None: Kmm = kern.K(Z) Lm = jitchol(Kmm) if psi1 is None: try: Kmn = kern.K(Z, X) except TypeError: Kmn = kern.psi1(Z, X).T else: Kmn = psi1.T if self.ep_mode=="nested": #Force EP at each step of the optimization self._ep_approximation = None post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) elif self.ep_mode=="alternated": if getattr(self, '_ep_approximation', None) is None: #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) else: #if we've already run EP, just use the existing approximation stored in self._ep_approximation post_params, ga_approx, log_Z_tilde = self._ep_approximation else: raise ValueError("ep_mode value not valid") mu_tilde = ga_approx.v / ga_approx.tau.astype(float) return super(EPDTC, self).inference(kern, X, Z, likelihood, ObsAr(mu_tilde[:,None]), mean_function=mean_function, Y_metadata=Y_metadata, precision=ga_approx.tau, Lm=Lm, dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=log_Z_tilde)
def __init__(self, X, Y, kernel, likelihood, mean_function=None, inference_method=None, name='gp', Y_metadata=None, normalizer=False): super(GP, self).__init__(name) assert X.ndim == 2 if isinstance(X, (ObsAr, VariationalPosterior)): self.X = X.copy() else: self.X = ObsAr(X) self.num_data, self.input_dim = self.X.shape assert Y.ndim == 2 logger.info("initializing Y") if normalizer is True: self.normalizer = Standardize() elif normalizer is False: self.normalizer = None else: self.normalizer = normalizer if self.normalizer is not None: self.normalizer.scale_by(Y) self.Y_normalized = ObsAr(self.normalizer.normalize(Y)) self.Y = Y elif isinstance(Y, np.ndarray): self.Y = ObsAr(Y) self.Y_normalized = self.Y else: self.Y = Y self.Y_normalized = self.Y if Y.shape[0] != self.num_data: #There can be cases where we want inputs than outputs, for example if we have multiple latent #function values warnings.warn("There are more rows in your input data X, \ than in your output data Y, be VERY sure this is what you want" ) _, self.output_dim = self.Y.shape assert ((Y_metadata is None) or isinstance(Y_metadata, dict)) self.Y_metadata = Y_metadata assert isinstance(kernel, kern.Kern) #assert self.input_dim == kernel.input_dim self.kern = kernel assert isinstance(likelihood, likelihoods.Likelihood) self.likelihood = likelihood if self.kern._effective_input_dim != self.X.shape[1]: warnings.warn( "Your kernel has a different input dimension {} then the given X dimension {}. Be very sure this is what you want and you have not forgotten to set the right input dimenion in your kernel" .format(self.kern._effective_input_dim, self.X.shape[1])) #handle the mean function self.mean_function = mean_function if mean_function is not None: assert isinstance(self.mean_function, Mapping) assert mean_function.input_dim == self.input_dim assert mean_function.output_dim == self.output_dim self.link_parameter(mean_function) #find a sensible inference method logger.info("initializing inference method") if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian) or isinstance( likelihood, likelihoods.MixedNoise): inference_method = exact_gaussian_inference.ExactGaussianInference( ) else: inference_method = expectation_propagation.EP() print("defaulting to " + str(inference_method) + " for latent function inference") self.inference_method = inference_method logger.info("adding kernel and likelihood as parameters") self.link_parameter(self.kern) self.link_parameter(self.likelihood) self.posterior = None
def __init__(self, Ylist, input_dim, X=None, X_variance=None, initx = 'PCA', initz = 'permute', num_inducing=10, Z=None, kernel=None, inference_method=None, likelihoods=None, name='mrd', Ynames=None, normalizer=False, stochastic=False, batchsize=10): self.logger = logging.getLogger(self.__class__.__name__) self.num_inducing = num_inducing if isinstance(Ylist, dict): Ynames, Ylist = zip(*Ylist.items()) self.logger.debug("creating observable arrays") self.Ylist = [ObsAr(Y) for Y in Ylist] #The next line is a fix for Python 3. It replicates the python 2 behaviour from the above comprehension Y = Ylist[-1] if Ynames is None: self.logger.debug("creating Ynames") Ynames = ['Y{}'.format(i) for i in range(len(Ylist))] self.names = Ynames assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict" if inference_method is None: self.inference_method = InferenceMethodList([VarDTC() for _ in range(len(self.Ylist))]) else: assert isinstance(inference_method, InferenceMethodList), "please provide one inference method per Y in the list and provide it as InferenceMethodList, inference_method given: {}".format(inference_method) self.inference_method = inference_method if X is None: X, fracs = self._init_X(input_dim, initx, Ylist) else: fracs = [X.var(0)]*len(Ylist) Z = self._init_Z(initz, X, input_dim) self.Z = Param('inducing inputs', Z) self.num_inducing = self.Z.shape[0] # ensure M==N if M>N # sort out the kernels self.logger.info("building kernels") if kernel is None: from ..kern import RBF kernels = [RBF(input_dim, ARD=1, lengthscale=1./fracs[i]) for i in range(len(Ylist))] elif isinstance(kernel, Kern): kernels = [] for i in range(len(Ylist)): k = kernel.copy() kernels.append(k) else: assert len(kernel) == len(Ylist), "need one kernel per output" assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" kernels = kernel self.variational_prior = NormalPrior() #self.X = NormalPosterior(X, X_variance) if likelihoods is None: likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))] else: likelihoods = likelihoods self.logger.info("adding X and Z") super(MRD, self).__init__(Y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing, Z=self.Z, kernel=None, inference_method=self.inference_method, likelihood=Gaussian(), name='manifold relevance determination', normalizer=None, missing_data=False, stochastic=False, batchsize=1) self._log_marginal_likelihood = 0 self.unlink_parameter(self.likelihood) self.unlink_parameter(self.kern) if isinstance(batchsize, int): batchsize = itertools.repeat(batchsize) self.bgplvms = [] for i, n, k, l, Y, im, bs in zip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize): assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another" md = np.isnan(Y).any() spgp = BayesianGPLVMMiniBatch(Y, input_dim, X, X_variance, Z=Z, kernel=k, likelihood=l, inference_method=im, name=n, normalizer=normalizer, missing_data=md, stochastic=stochastic, batchsize=bs) spgp.kl_factr = 1./len(Ynames) spgp.unlink_parameter(spgp.Z) spgp.unlink_parameter(spgp.X) del spgp.Z del spgp.X spgp.Z = self.Z spgp.X = self.X self.link_parameter(spgp, i+2) self.bgplvms.append(spgp) b = self.bgplvms[0] self.posterior = b.posterior self.kern = b.kern self.likelihood = b.likelihood self.logger.info("init done")
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata): num_data, output_dim = Y.shape assert output_dim == 1, "This EP methods only works for 1D outputs" # Makes computing the sign quicker if we work with numpy arrays rather # than ObsArrays Y = Y.values.copy() #Initial values - Marginal moments Z_hat = np.zeros(num_data,dtype=np.float64) mu_hat = np.zeros(num_data,dtype=np.float64) sigma2_hat = np.zeros(num_data,dtype=np.float64) tau_cav = np.empty(num_data,dtype=np.float64) v_cav = np.empty(num_data,dtype=np.float64) #initial values - Gaussian factors #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) LLT0 = Kmm.copy() Lm = jitchol(LLT0) #K_m = L_m L_m^\top Vm,info = dtrtrs(Lm,Kmn,lower=1) # Lmi = dtrtri(Lm) # Kmmi = np.dot(Lmi.T,Lmi) # KmmiKmn = np.dot(Kmmi,Kmn) # Qnn_diag = np.sum(Kmn*KmmiKmn,-2) Qnn_diag = np.sum(Vm*Vm,-2) #diag(Knm Kmm^(-1) Kmn) #diag.add(LLT0, 1e-8) if self.old_mutilde is None: #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) LLT = LLT0.copy() #Sigma = K.copy() mu = np.zeros(num_data) Sigma_diag = Qnn_diag.copy() + 1e-8 tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data)) else: assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!" mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde tau_tilde = v_tilde/mu_tilde mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde) Sigma_diag += 1e-8 # TODO: Check the log-marginal under both conditions and choose the best one #Approximation tau_diff = self.epsilon + 1. v_diff = self.epsilon + 1. tau_tilde_old = np.nan v_tilde_old = np.nan iterations = 0 while ((tau_diff > self.epsilon) or (v_diff > self.epsilon)) and (iterations < self.max_iters): update_order = np.random.permutation(num_data) for i in update_order: #Cavity distribution parameters tau_cav[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i] v_cav[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i] if Y_metadata is not None: # Pick out the relavent metadata for Yi Y_metadata_i = {} for key in list(Y_metadata.keys()): Y_metadata_i[key] = Y_metadata[key][i, :] else: Y_metadata_i = None #Marginal moments Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i) #Site parameters update delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) tau_tilde_prev = tau_tilde[i] tau_tilde[i] += delta_tau # Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible # to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to # update the marginal likelihood without inestability issues. if tau_tilde[i] < np.finfo(float).eps: tau_tilde[i] = np.finfo(float).eps delta_tau = tau_tilde[i] - tau_tilde_prev v_tilde[i] += delta_v #Posterior distribution parameters update if self.parallel_updates == False: #DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i])) DSYR(LLT,Kmn[:,i].copy(),delta_tau) L = jitchol(LLT) V,info = dtrtrs(L,Kmn,lower=1) Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i] mu += (delta_v-delta_tau*mu[i])*si #mu = np.dot(Sigma, v_tilde) #(re) compute Sigma, Sigma_diag and mu using full Cholesky decompy mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde) Sigma_diag = np.maximum(Sigma_diag, np.finfo(float).eps) #monitor convergence if iterations>0: tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old)) v_diff = np.mean(np.square(v_tilde-v_tilde_old)) tau_tilde_old = tau_tilde.copy() v_tilde_old = v_tilde.copy() iterations += 1 mu_tilde = v_tilde/tau_tilde mu_cav = v_cav/tau_cav sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde log_Z_tilde = (np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde) + 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde)) self.old_mutilde = mu_tilde self.old_vtilde = v_tilde return mu, Sigma_diag, ObsAr(mu_tilde[:,None]), tau_tilde, log_Z_tilde
def __init__(self, X, Y, Z, kernels, name='gp_msgp', interpolation_method=None, grid_dims=None, normalize=False): super(GPMSGP, self).__init__(name) self.X = ObsAr(X) #Not sure what Obsar if grid_dims is None: dims = [None] * len(Z) max_dim_ii = 0 for ii in range(len(Z)): dims[ii] = np.arange(max_dim_ii, max_dim_ii + np.shape(Z[ii])[1]) max_dim_ii = dims[ii - 1][-1] + 1 grid_dims = dims else: grid_dims_to_create_id = [] grid_dims_create = [] grid_create_args = [] n_grid_dims = len(grid_dims) for ii in range(n_grid_dims): if isinstance(Z[ii], dict): grid_dims_to_create_id.append(ii) grid_dims_create.append(grid_dims[ii]) grid_create_args.append(Z[ii]) if len(grid_dims_to_create_id) > 1: Z_create = self.create_grid(grid_create_args, grid_dims=grid_dims_create) for ii in range(len(grid_dims_to_create_id)): Z[grid_dims_to_create_id[ii]] = Z_create[ii] self.Z = Z self.input_grid_dims = grid_dims """ if isinstance(Z,dict): #automatically create the grid Z,self.input_grid_dims = self.create_grid(Z,grid_dims = grid_dims) self.Z = Z else: self.input_grid_dims = grid_dims self.Z = Z """ if normalize: with_mean = True with_std = True else: with_mean = False with_std = False self.normalizer = StandardScaler(with_mean=with_mean, with_std=with_std) self.X = self.normalizer.fit_transform(self.X) self.Z_normalizers = [None] * len(self.Z) self.Z_normalizers = [ StandardScaler(with_mean=with_mean, with_std=with_std).fit(X_z) for X_z in self.Z ] self.Z = [ self.Z_normalizers[ii].transform(self.Z[ii]) for ii in range(len(self.Z)) ] self.num_data, self.input_dim = self.X.shape assert Y.ndim == 2 self.Y = ObsAr(Y) self.Y_metadata = None #TO-DO: do we even need this? assert np.shape(Y)[0] == self.num_data _, self.output_dim = self.Y.shape #check if kernels is a list or just a single kernel #and then check if every object in list is a kernel try: for kernel in kernels: assert isinstance(kernel, Kern) except TypeError: assert isinstance(kernels, Kern) kernels = list([kernels]) self.inference_method = GridGaussianInference() self.likelihood = likelihoods.Gaussian() #TO-DO: do we even need this? self.kern = KernGrid(kernels, self.likelihood, self.input_grid_dims, interpolation_method=interpolation_method) self.mean_function = Constant(self.input_dim, self.output_dim) self.kern.update_Z(Z) ##for test set n_neighbors = 4 self.kern.init_interpolation_method(n_neighbors=8) self.kern.update_X_Y(X, Y) ## register the parameters for optimization (paramz) self.link_parameter(self.kern) self.link_parameter(self.likelihood) ## need to do this in the case that someone wants to do prediction without/before ## hyperparameter optimization self.parameters_changed() self.posterior_prediction = self.inference_method.update_prediction_vectors( self.kern, self.posterior, self.grad_dict, self.likelihood)
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata): num_data, output_dim = Y.shape assert output_dim == 1, "This EP methods only works for 1D outputs" LLT0 = Kmm.copy() #diag.add(LLT0, 1e-8) Lm = jitchol(LLT0) Lmi = dtrtri(Lm) Kmmi = np.dot(Lmi.T,Lmi) KmmiKmn = np.dot(Kmmi,Kmn) Qnn_diag = np.sum(Kmn*KmmiKmn,-2) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) mu = np.zeros(num_data) LLT = Kmm.copy() #Sigma = K.copy() Sigma_diag = Qnn_diag.copy() + 1e-8 #Initial values - Marginal moments Z_hat = np.zeros(num_data,dtype=np.float64) mu_hat = np.zeros(num_data,dtype=np.float64) sigma2_hat = np.zeros(num_data,dtype=np.float64) tau_cav = np.empty(num_data,dtype=np.float64) v_cav = np.empty(num_data,dtype=np.float64) #initial values - Gaussian factors if self.old_mutilde is None: tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data)) else: assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!" mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde tau_tilde = v_tilde/mu_tilde #Approximation tau_diff = self.epsilon + 1. v_diff = self.epsilon + 1. iterations = 0 tau_tilde_old = 0. v_tilde_old = 0. update_order = np.random.permutation(num_data) while (tau_diff > self.epsilon) or (v_diff > self.epsilon): for i in update_order: #Cavity distribution parameters tau_cav[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i] v_cav[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i] #Marginal moments Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i])#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i])) #Site parameters update delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) tau_tilde[i] += delta_tau v_tilde[i] += delta_v #Posterior distribution parameters update #DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i])) DSYR(LLT,Kmn[:,i].copy(),delta_tau) L = jitchol(LLT+np.eye(LLT.shape[0])*1e-7) V,info = dtrtrs(L,Kmn,lower=1) Sigma_diag = np.sum(V*V,-2) si = np.sum(V.T*V[:,i],-1) mu += (delta_v-delta_tau*mu[i])*si #mu = np.dot(Sigma, v_tilde) #(re) compute Sigma and mu using full Cholesky decompy LLT = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T) #diag.add(LLT, 1e-8) L = jitchol(LLT) V, _ = dtrtrs(L,Kmn,lower=1) V2, _ = dtrtrs(L.T,V,lower=0) #Sigma_diag = np.sum(V*V,-2) #Knmv_tilde = np.dot(Kmn,v_tilde) #mu = np.dot(V2.T,Knmv_tilde) Sigma = np.dot(V2.T,V2) mu = np.dot(Sigma,v_tilde) #monitor convergence #if iterations>0: tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old)) v_diff = np.mean(np.square(v_tilde-v_tilde_old)) tau_tilde_old = tau_tilde.copy() v_tilde_old = v_tilde.copy() # Only to while loop once:? tau_diff = 0 v_diff = 0 iterations += 1 mu_tilde = v_tilde/tau_tilde mu_cav = v_cav/tau_cav sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde Z_tilde = np.exp(np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde) + 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde)) return mu, Sigma, ObsAr(mu_tilde[:,None]), tau_tilde, Z_tilde