def predict_with_posterior(self, post, xs, ys=None): ''' Prediction of test points (given by xs) based on training data of the current model with posterior already provided. (i.e. you already have the posterior and thus don't need the fitting phase.) This method will output the following value:\n predictive output means(ym),\n predictive output variances(ys2),\n predictive latent means(fm),\n predictive latent variances(fs2),\n log predictive probabilities(lp).\n Theses values can also be achieved from model's property. (e.g. model.ym) :param post: struct representation of posterior :param xs: test input :param ys: test target(optional) :return: ym, ys2, fm, fs2, lp ''' # check the shape of inputs # transform to correct shape if neccessary if xs.ndim == 1: xs = np.reshape(xs, (xs.shape[0], 1)) self.xs = xs if not ys is None: if ys.ndim == 1: ys = np.reshape(ys, (ys.shape[0], 1)) self.ys = ys meanfunc = self.meanfunc covfunc = self.covfunc likfunc = self.likfunc inffunc = self.inffunc x = self.x y = self.y self.posterior = deepcopy(post) alpha = post.alpha L = post.L sW = post.sW nz = range(len(alpha[:, 0])) # non-sparse representation if L == []: # in case L is not provided, we compute it K = covfunc.getCovMatrix(x=x[nz, :], mode='train') #L = np.linalg.cholesky( (np.eye(nz) + np.dot(sW,sW.T)*K).T ) L = jitchol((np.eye(len(nz)) + np.dot(sW, sW.T) * K).T) Ltril = np.all(np.tril(L, -1) == 0) # is L an upper triangular matrix? ns = xs.shape[0] # number of data points nperbatch = 1000 # number of data points per mini batch nact = 0 # number of already processed test data points ymu = np.zeros((ns, 1)) ys2 = np.zeros((ns, 1)) fmu = np.zeros((ns, 1)) fs2 = np.zeros((ns, 1)) lp = np.zeros((ns, 1)) while nact <= ns - 1: # process minibatches of test cases to save memory id = range(nact, min(nact + nperbatch, ns)) # data points to process kss = covfunc.getCovMatrix(z=xs[id, :], mode='self_test') # self-variances Ks = covfunc.getCovMatrix(x=x[nz, :], z=xs[id, :], mode='cross') # cross-covariances ms = meanfunc.getMean(xs[id, :]) N = (alpha.shape )[1] # number of alphas (usually 1; more in case of sampling) Fmu = np.tile(ms, (1, N)) + np.dot( Ks.T, alpha[nz]) # conditional mean fs|f fmu[id] = np.reshape(Fmu.sum(axis=1) / N, (len(id), 1)) # predictive means if Ltril: # L is triangular => use Cholesky parameters (alpha,sW,L) V = np.linalg.solve(L.T, np.tile(sW, (1, len(id))) * Ks) fs2[id] = kss - np.array([(V * V).sum(axis=0) ]).T # predictive variances else: # L is not triangular => use alternative parametrization fs2[id] = kss + np.array([(Ks * np.dot(L, Ks)).sum(axis=0) ]).T # predictive variances fs2[id] = np.maximum( fs2[id], 0) # remove numerical noise i.e. negative variances Fs2 = np.tile( fs2[id], (1, N)) # we have multiple values in case of sampling if ys is None: [Lp, Ymu, Ys2] = likfunc.evaluate(None, Fmu[:], Fs2[:], None, None, 3) else: [Lp, Ymu, Ys2] = likfunc.evaluate(np.tile(ys[id], (1, N)), Fmu[:], Fs2[:], None, None, 3) lp[id] = np.reshape( np.reshape(Lp, (np.prod(Lp.shape), N)).sum(axis=1) / N, (len(id), 1)) # log probability; sample averaging ymu[id] = np.reshape( np.reshape(Ymu, (np.prod(Ymu.shape), N)).sum(axis=1) / N, (len(id), 1)) # predictive mean ys|y and ... ys2[id] = np.reshape( np.reshape(Ys2, (np.prod(Ys2.shape), N)).sum(axis=1) / N, (len(id), 1)) # .. variance nact = id[-1] + 1 # set counter to index of next data point self.ym = ymu self.ys2 = ys2 self.lp = lp self.fm = fmu self.fs2 = fs2 if ys is None: return ymu, ys2, fmu, fs2, None else: return ymu, ys2, fmu, fs2, lp
def predict_with_posterior(self, post, xs, ys=None): ''' Prediction of test points (given by xs) based on training data of the current model with posterior already provided. (i.e. you already have the posterior and thus don't need the fitting phase.) This method will output the following value:\n predictive output means(ym),\n predictive output variances(ys2),\n predictive latent means(fm),\n predictive latent variances(fs2),\n log predictive probabilities(lp).\n Theses values can also be achieved from model's property. (e.g. model.ym) :param post: struct representation of posterior :param xs: test input :param ys: test target(optional) :return: ym, ys2, fm, fs2, lp ''' # check the shape of inputs # transform to correct shape if neccessary if xs.ndim == 1: xs = np.reshape(xs, (xs.shape[0],1)) self.xs = xs if not ys is None: if ys.ndim == 1: ys = np.reshape(ys, (ys.shape[0],1)) self.ys = ys meanfunc = self.meanfunc covfunc = self.covfunc likfunc = self.likfunc inffunc = self.inffunc x = self.x y = self.y self.posterior = deepcopy(post) alpha = post.alpha L = post.L sW = post.sW nz = range(len(alpha[:,0])) # non-sparse representation if L == []: # in case L is not provided, we compute it K = covfunc.getCovMatrix(x=x[nz,:], mode='train') #L = np.linalg.cholesky( (np.eye(nz) + np.dot(sW,sW.T)*K).T ) L = jitchol( (np.eye(len(nz)) + np.dot(sW,sW.T)*K).T ) Ltril = np.all( np.tril(L,-1) == 0 ) # is L an upper triangular matrix? ns = xs.shape[0] # number of data points nperbatch = 1000 # number of data points per mini batch nact = 0 # number of already processed test data points ymu = np.zeros((ns,1)) ys2 = np.zeros((ns,1)) fmu = np.zeros((ns,1)) fs2 = np.zeros((ns,1)) lp = np.zeros((ns,1)) while nact<=ns-1: # process minibatches of test cases to save memory id = range(nact,min(nact+nperbatch,ns)) # data points to process kss = covfunc.getCovMatrix(z=xs[id,:], mode='self_test') # self-variances Ks = covfunc.getCovMatrix(x=x[nz,:], z=xs[id,:], mode='cross') # cross-covariances ms = meanfunc.getMean(xs[id,:]) N = (alpha.shape)[1] # number of alphas (usually 1; more in case of sampling) Fmu = np.tile(ms,(1,N)) + np.dot(Ks.T,alpha[nz]) # conditional mean fs|f fmu[id] = np.reshape(Fmu.sum(axis=1)/N,(len(id),1)) # predictive means if Ltril: # L is triangular => use Cholesky parameters (alpha,sW,L) V = np.linalg.solve(L.T,np.tile(sW,(1,len(id)))*Ks) fs2[id] = kss - np.array([(V*V).sum(axis=0)]).T # predictive variances else: # L is not triangular => use alternative parametrization fs2[id] = kss + np.array([(Ks*np.dot(L,Ks)).sum(axis=0)]).T # predictive variances fs2[id] = np.maximum(fs2[id],0) # remove numerical noise i.e. negative variances Fs2 = np.tile(fs2[id],(1,N)) # we have multiple values in case of sampling if ys is None: [Lp, Ymu, Ys2] = likfunc.evaluate(None,Fmu[:],Fs2[:],None,None,3) else: [Lp, Ymu, Ys2] = likfunc.evaluate(np.tile(ys[id],(1,N)), Fmu[:], Fs2[:],None,None,3) lp[id] = np.reshape( np.reshape(Lp,(np.prod(Lp.shape),N)).sum(axis=1)/N , (len(id),1) ) # log probability; sample averaging ymu[id] = np.reshape( np.reshape(Ymu,(np.prod(Ymu.shape),N)).sum(axis=1)/N ,(len(id),1) ) # predictive mean ys|y and ... ys2[id] = np.reshape( np.reshape(Ys2,(np.prod(Ys2.shape),N)).sum(axis=1)/N , (len(id),1) ) # .. variance nact = id[-1]+1 # set counter to index of next data point self.ym = ymu self.ys2 = ys2 self.lp = lp self.fm = fmu self.fs2 = fs2 if ys is None: return ymu, ys2, fmu, fs2, None else: return ymu, ys2, fmu, fs2, lp