def gp_pred(logtheta, covfunc, X, y, Xstar, R=None, w=None, Rstar=None): #else: # print ' xgp_pred()' # compute training set covariance matrix (K) and # (marginal) test predictions (Kss = self-cov; Kstar = corss-cov) if R==None: K = feval(covfunc, logtheta, X) # training covariances [Kss, Kstar] = feval(covfunc, logtheta, X, Xstar) # test covariances (Kss = self covariances, Kstar = cov between train and test cases) else: K = feval(covfunc, logtheta, X, R, w) # training covariances [Kss, Kstar] = feval(covfunc, logtheta, X, R, w, Xstar, Rstar) # test covariances # K += sn2*eye(X.shape[0] try: n = X.shape[0] K = K + identity(n)*sn2 #numerical stability shit except TypeError: raise Exception(str(K) + " " + str(X.shape) + " " + str(identity(sn2).shape) + " " + str(np.array(K).shape)) try: L = linalg.cholesky(K,lower=True) # lower triangular matrix except linalg.LinAlgError: L = linalg.cholesky(nearPD(K),lower=True) #L = linalg.cholesky(K,lower=True) # lower triangular matrix alpha = solve_chol(L.transpose(),y) # compute inv(K)*y out1 = dot(Kstar.transpose(),alpha) # predicted means v = linalg.solve(L, Kstar) tmp=v*v out2 = Kss - array([tmp.sum(axis=0)]).transpose() # predicted variances return [out1, out2]
def gp_pred(logtheta, covfunc, X, y, Xstar, R=None, w=None, Rstar=None): if R == None: print ' gp_pred()' #else: # print ' xgp_pred()' # compute training set covariance matrix (K) and # (marginal) test predictions (Kss = self-cov; Kstar = corss-cov) if R == None: K = feval(covfunc, logtheta, X) # training covariances [Kss, Kstar] = feval( covfunc, logtheta, X, Xstar ) # test covariances (Kss = self covariances, Kstar = cov between train and test cases) else: K = feval(covfunc, logtheta, X, R, w) # training covariances [Kss, Kstar] = feval(covfunc, logtheta, X, R, w, Xstar, Rstar) # test covariances L = linalg.cholesky( K) # cholesky factorization of cov (lower triangular matrix) alpha = solve_chol(L.transpose(), y) # compute inv(K)*y out1 = dot(Kstar.transpose(), alpha) # predicted means v = linalg.solve(L, Kstar) tmp = v * v out2 = Kss - array([tmp.sum(axis=0)]).transpose() # predicted variances return [out1, out2]
def dnlml(loghyper, covfunc, X, y, R=None, w=None): out = zeros((loghyper.shape)) W = get_W(loghyper, covfunc, X, y, R, w) if R==None: for i in range(0,size(out)): out[i] = (W*feval(covfunc, loghyper, X, i)).sum()/2 else: for i in range(0,size(out)): out[i] = (W*feval(covfunc, loghyper, X, R, w, i)).sum()/2 return out
def dnlml(loghyper, covfunc, X, y, R=None, w=None): out = zeros((loghyper.shape)) W = get_W(loghyper, covfunc, X, y, R, w) if R == None: for i in range(0, size(out)): out[i] = (W * feval(covfunc, loghyper, X, i)).sum() / 2 else: for i in range(0, size(out)): out[i] = (W * feval(covfunc, loghyper, X, R, w, i)).sum() / 2 return out
def get_W(loghyper, covfunc, X, y, R=None, w=None): '''Precompute W for convenience.''' n = X.shape[0] # compute training set covariance matrix if R==None: K = feval(covfunc, loghyper, X) else: K = feval(covfunc, loghyper, X, R, w) # cholesky factorization of the covariance L = linalg.cholesky(K) # lower triangular matrix alpha = solve_chol(L.transpose(),y) W = linalg.solve(L.transpose(),linalg.solve(L,eye(n)))-dot(alpha,alpha.transpose()) return W
def infExact(gp, X, y, Xstar, R=None, w=None, Rstar=None): # Exact inference for a GP with Gaussian likelihood. Compute a parametrization # of the posterior, the negative log marginal likelihood and its derivatives # w.r.t. the hyperparameters. See also "help infMethods". # if R==None: K = feval(gp['covfunc'], gp['covtheta'], X) # training covariances Kss = feval(gp['covfunc'], gp['covtheta'], Xstar, 'diag') # test covariances (Kss = self covariances, Kstar = cov between train and test cases) Kstar = feval(gp['covfunc'], gp['covtheta'], X, Xstar) # test covariances (Kss = self covariances, Kstar = cov between train and test cases) else: K = feval(gp['covfunc'], gp['covtheta'], X, R, w) # training covariances Kss = feval(gp['covfunc'],gp['covtheta'], Xstar, R, w, 'diag', Rstar) # test covariances Kstar = feval(gp['covfunc'],gp['covtheta'], X, R, w, Xstar, Rstar) # test covariances ms = feval(gp['meanfunc'], gp['meantheta'], Xstar) mean_y = feval(gp['meanfunc'], gp['meantheta'], X) K += sn2*np.eye(X.shape[0]) # Hardcoded noise try: L = np.linalg.cholesky(K) # cholesky factorization of cov (lower triangular matrix) except linalg.linalg.LinAlgError: L = np.linalg.cholesky(nearPD(K)) # Find the "Nearest" covariance mattrix to K and do cholesky on that' alpha = solve_chol(L.T,y-mean_y) # compute inv(K)*(y-mean(y)) fmu = ms + np.dot(Kstar.T,alpha) # predicted means v = np.linalg.solve(L, Kstar) tmp = v*v fs2 = Kss - np.array([tmp.sum(axis=0)]).T # predicted variances fs2[fs2 < 0.] = 0. # Remove numerical noise i.e. negative variances return [fmu, fs2]
def nlml(loghyper, covfunc, X, y, R=None, w=None): n = X.shape[0] # compute training set covariance matrix if R==None: K = feval(covfunc, loghyper, X) else: K = feval(covfunc, loghyper, X, R, w) # cholesky factorization of the covariance L = linalg.cholesky(K) # lower triangular matrix # compute inv(K)*y alpha = solve_chol(L.transpose(),y) # compute the negative log marginal likelihood return (0.5*dot(y.transpose(),alpha) + (log(diag(L))).sum(axis=0) + 0.5*n*log(2*pi))[0][0]
def get_W(loghyper, covfunc, X, y, R=None, w=None): '''Precompute W for convenience.''' n = X.shape[0] # compute training set covariance matrix if R == None: K = feval(covfunc, loghyper, X) else: K = feval(covfunc, loghyper, X, R, w) # cholesky factorization of the covariance L = linalg.cholesky(K) # lower triangular matrix alpha = solve_chol(L.transpose(), y) W = linalg.solve(L.transpose(), linalg.solve(L, eye(n))) - dot( alpha, alpha.transpose()) return W
def gp_pred_precompute_alpha(logtheta, covfunc, X, y): # compute training set covariance matrix (K) K = feval(covfunc, logtheta, X) # training covariances L = nlg.cholesky( K) # cholesky factorization of cov (lower triangular matrix) alpha = gpr.solve_chol(L.transpose(), y) # compute inv(K)*y return alpha
def nlml(loghyper, covfunc, X, y, R=None, w=None): n = X.shape[0] # compute training set covariance matrix if R == None: K = feval(covfunc, loghyper, X) else: K = feval(covfunc, loghyper, X, R, w) # cholesky factorization of the covariance L = linalg.cholesky(K) # lower triangular matrix # compute inv(K)*y alpha = solve_chol(L.transpose(), y) # compute the negative log marginal likelihood return (0.5 * dot(y.transpose(), alpha) + (log(diag(L))).sum(axis=0) + 0.5 * n * log(2 * pi))[0][0]
def nlml(loghyper, covfunc, X, y, R=None, w=None): # compute training set covariance matrix if R==None: K = feval(covfunc, loghyper, X) else: K = feval(covfunc, loghyper, X, R, w) # K += sn2*eye(X.shape[0]) n = X.shape[0] K = K + identity(n)*sn2 #numerical stability shit try: L = linalg.cholesky(K,lower=True) # lower triangular matrix except linalg.LinAlgError: L = linalg.cholesky(nearPD(K),lower=True) #L = linalg.cholesky(K,lower=True) # lower triangular matrix # compute inv(K)*y alpha = solve_chol(L.transpose(),y) return (0.5*dot(y.transpose(),alpha) + (log(diag(L))).sum(axis=0) + 0.5*n*log(2*pi))[0][0]
def get_W(loghyper, covfunc, X, y, R=None, w=None): '''Precompute W for convenience.''' # compute training set covariance matrix if R==None: K = feval(covfunc, loghyper, X) else: K = feval(covfunc, loghyper, X, R, w) #K += sn2*eye(X.shape[0]) n = X.shape[0] K = K + identity(n)*sn2 #numerical stability shit try: L = linalg.cholesky(K,lower=True) # lower triangular matrix except linalg.LinAlgError: L = linalg.cholesky(nearPD(K),Lower=True) #L = linalg.cholesky(K,lower=True) # lower triangular matrix alpha = solve_chol(L.transpose(),y) W = linalg.solve(L.transpose(),linalg.solve(L,eye(n)))-dot(alpha,alpha.transpose()) return W
def get_W(theta, gp, X, y, R=None, w=None): '''Precompute W for convenience.''' n = X.shape[0] mt = len(gp['meantheta']) meantheta = theta[:mt] covtheta = theta[mt:] # compute training set covariance matrix if R==None: K = feval(gp['covfunc'], covtheta, X) else: K = feval(gp['covfunc'], covtheta, X, R, w) K += sn2*np.eye(X.shape[0]) m = feval(gp['meanfunc'], meantheta, X) # cholesky factorization of the covariance try: L = np.linalg.cholesky(K) # lower triangular matrix except np.linalg.linalg.LinAlgError: L = np.linalg.cholesky(nearPD(K)) # Find the "Nearest" covariance mattrix to K and do cholesky on that alpha = solve_chol(L.T,y-m) W = np.linalg.solve(L.T,np.linalg.solve(L,np.eye(n)))-np.dot(alpha,alpha.T) return W
def gp_pred(logtheta, covfunc, X, y, Xstar, R=None, w=None, Rstar=None): if R==None: print ' gp_pred()' #else: # print ' xgp_pred()' # compute training set covariance matrix (K) and # (marginal) test predictions (Kss = self-cov; Kstar = corss-cov) if R==None: K = feval(covfunc, logtheta, X) # training covariances [Kss, Kstar] = feval(covfunc, logtheta, X, Xstar) # test covariances (Kss = self covariances, Kstar = cov between train and test cases) else: K = feval(covfunc, logtheta, X, R, w) # training covariances [Kss, Kstar] = feval(covfunc, logtheta, X, R, w, Xstar, Rstar) # test covariances L = linalg.cholesky(K) # cholesky factorization of cov (lower triangular matrix) alpha = solve_chol(L.transpose(),y) # compute inv(K)*y out1 = dot(Kstar.transpose(),alpha) # predicted means v = linalg.solve(L, Kstar) tmp=v*v out2 = Kss - array([tmp.sum(axis=0)]).transpose() # predicted variances return [out1, out2]
l = 20 # number of labeled/training data u = 201 # number of unlabeled/test data X = array(15 * (random.random((l, 1)) - 0.5)) ## DEFINE parameterized covariance funcrion covfunc = ['kernels.covSum', ['kernels.covSEiso', 'kernels.covNoise']] ## SET (hyper)parameters #logtheta = array([log(0.3), log(1.08), log(5e-5)]) #logtheta = array([log(3), log(1.16), log(0.89)]) logtheta = array([log(1), log(1), log(sqrt(0.01))]) print 'hyperparameters: ', exp(logtheta) ### GENERATE sample observations from the GP y = dot( linalg.cholesky(general.feval(covfunc, logtheta, X)).transpose(), random.standard_normal((l, 1))) ### TEST POINTS Xstar = array([ linspace(-7.5, 7.5, u) ]).transpose() # u test points evenly distributed in the interval [-7.5, 7.5] #_________________________________ # STANDARD GP: # ***UNCOMMENT THE FOLLOWING LINES TO DO TRAINING OF HYPERPARAMETERS*** ### TRAINING GP #print 'GP: ...training' ### INITIALIZE (hyper)parameters by -1 #d = X.shape[1] #init = -1*ones((d,1))
d = diag(K_R[l:l+u,l:l+u]).transpose() # self convariances for unlabeled data (needed in kernels.covMatrix) B = vstack((B, d)) ## SET COVARIANCE FUNCTION covfunc = ['kernels.covSumMat', ['kernels.covSEiso','kernels.covNoise','kernels.covMatrix']] # covMatrix -> no hyperparameters to optimize!! ## SET (hyper)parameters, e.g.: #logtheta = array([log(0.3), log(1.08), log(5e-5)]) #logtheta = array([log(3), log(1.16), log(0.89)]) logtheta = array([log(1), log(1), log(sqrt(0.01))]) w_used = round(random.random(),1) print 'generated mixture weight: ', w_used ### GENERATE sample observations from the XGP y = dot(linalg.cholesky(general.feval(covfunc, logtheta, X, A, w_used)).transpose(),random.standard_normal((l,1))) ### TEST POINTS Xstar = array([linspace(-7.5,7.5,u)]).transpose() # u test points evenly distributed in the interval [-7.5, 7.5] #_________________________________ ## Relational GP (XGP) # ***UNCOMMENT THE FOLLOWING LINES TO DO TRAINING OF HYPERPARAMETERS AND MIXTURE WEIGHT*** ### TRAINING XGP (learn hyperparameters of GP and tune mixture weight) ## INITIALIZE (hyper)parameters by -1 #d = X.shape[1] #init = -1*ones((d,1)) #loghyper = array([[-1], [-1]]) #loghyper = vstack((init, loghyper))[:,0]
def __gpr_pred_fast(self, X, Xstar, covfunc, alpha, logtheta): [Kss, Kstar] = feval(covfunc, logtheta, X, Xstar) return np.dot(Kstar.transpose(),alpha)
## GENERATE data from a noisy GP l = 20 # number of labeled/training data u = 201 # number of unlabeled/test data X = array(15*(random.random((l,1))-0.5)) ## DEFINE parameterized covariance funcrion covfunc = ['kernels.covSum', ['kernels.covSEiso','kernels.covNoise']] ## SET (hyper)parameters #logtheta = array([log(0.3), log(1.08), log(5e-5)]) #logtheta = array([log(3), log(1.16), log(0.89)]) logtheta = array([log(1), log(1), log(sqrt(0.01))]) print 'hyperparameters: ', exp(logtheta) ### GENERATE sample observations from the GP y = dot(linalg.cholesky(general.feval(covfunc, logtheta, X)).transpose(),random.standard_normal((l,1))) ### TEST POINTS Xstar = array([linspace(-7.5,7.5,u)]).transpose() # u test points evenly distributed in the interval [-7.5, 7.5] #_________________________________ # STANDARD GP: # ***UNCOMMENT THE FOLLOWING LINES TO DO TRAINING OF HYPERPARAMETERS*** ### TRAINING GP #print 'GP: ...training' ### INITIALIZE (hyper)parameters by -1 #d = X.shape[1] #init = -1*ones((d,1))
def gp_pred_precompute_alpha(logtheta, covfunc, X, y): # compute training set covariance matrix (K) K = feval(covfunc, logtheta, X) # training covariances L = nlg.cholesky(K) # cholesky factorization of cov (lower triangular matrix) alpha = gpr.solve_chol(L.transpose(),y) # compute inv(K)*y return alpha
def gp_pred_fast(logtheta, covfunc, X, alpha, Xstar): # (marginal) test predictions (Kss = self-cov; Kstar = corss-cov) [Kss, Kstar] = feval( covfunc, logtheta, X, Xstar ) # test covariances (Kss = self covariances, Kstar = cov between train and test cases) return np.dot(Kstar.transpose(), alpha) # predicted means
def gp_pred_fast(logtheta, covfunc, X, alpha, Xstar): # (marginal) test predictions (Kss = self-cov; Kstar = corss-cov) [Kss, Kstar] = feval(covfunc, logtheta, X, Xstar) # test covariances (Kss = self covariances, Kstar = cov between train and test cases) return np.dot(Kstar.transpose(),alpha) # predicted means
def infFITC(gp, X, y, Xstar, R=None, w=None, Rstar=None): # FITC approximation to the posterior Gaussian process. The function is # equivalent to infExact with the covariance function: # Kt = Q + G; G = diag(diag(K-Q); Q = Ku'*inv(Kuu + snu2*eye(nu))*Ku; # where Ku and Kuu are covariances w.r.t. to inducing inputs xu and # snu2 = sn2/1e6 is the noise of the inducing inputs. We fixed the standard # deviation of the inducing inputs to be a one per mil of the measurement noise # standard deviation. # The implementation exploits the Woodbury matrix identity # inv(Kt) = inv(G) - inv(G)*Ku'*inv(Kuu+Ku*inv(G)*Ku')*Ku*inv(G) # in order to be applicable to large datasets. The computational complexity # is O(n nu^2) where n is the number of data points x and nu the number of # inducing inputs in xu. cov = gp['covfunc'] assert( isinstance(cov[-1],np.ndarray) ) xu = cov[-1] covfunc = cov[:-1] m = feval(gp['meanfunc'], gp['meantheta'], X) # evaluate mean vector ms = feval(gp['meanfunc'], gp['meantheta'], Xstar) # evaluate mean vector n,D = X.shape nu = len(xu) [diagK,Kuu,Ku] = feval(covfunc, xu, gp['covtheta'], X) # evaluate covariance matrix snu2 = 1.e-6 * sn2 Kuu += snu2*np.eye(nu) try: Luu = np.linalg.cholesky(Kuu) # Kuu = Luu'*Luu except np.linalg.linalg.LinAlgError: Luu = np.linalg.cholesky(nearPD(Kuu)) # Kuu = Luu'*Luu, or at least closest SDP Kuu V = np.linalg.solve(Luu.T,Ku) # V = inv(Luu')*Ku => V'*V = Q M = np.reshape( np.diag(np.dot(V.T,V)), (diagK.shape[0],1) ) g_sn20 = diagK + sn2 - (V*V).sum(axis=0).T # g = diag(K) + sn2 - diag(Q) g_sn2 = diagK + sn2 - M # g = diag(K) + sn2 - diag(Q) print "Values = ",g_sn20, g_sn2 diagG = np.reshape( np.diag(g_sn2),(g_sn2.shape[0],1)) V = V/np.tile(diagG.T,(nu,1)) Lu = np.linalg.cholesky( np.eye(nu) + np.dot(V,V.T) ) # Lu'*Lu = I + V*diag(1/g_sn2)*V' r = (y-m)/np.sqrt(diagG) #be = np.linalg.solve(Lu.T,np.dot(V,(r/np.sqrt(g_sn2)))) be = np.linalg.solve(Lu.T,np.dot(V,r)) iKuu = solve_chol(Luu,np.eye(nu)) # inv(Kuu + snu2*I) = iKuu LuBe = np.linalg.solve(Lu,be) alpha = np.linalg.solve(Luu,LuBe) # return the posterior parameters L = solve_chol(np.dot(Lu,Luu),np.eye(nu)) - iKuu # Sigma-inv(Kuu) #Q = np.dot(Ku.T,np.linalg.solve(Kuu,Ku)) #Lam = diagK - np.reshape( np.diag(Q), (diagK.shape[0],1) ) # diag(K) - diag(Q), Q = Kxu * Kuu^-1 * Kux #K = Q + Lam* np.eye(len(Lam)) if R==None: Kss = feval(covfunc[1], gp['covtheta'], Xstar, 'diag') Kus = feval(covfunc[1], gp['covtheta'], xu, Xstar) Kuf = feval(covfunc[1], gp['covtheta'], xu, X) else: Kss = feval(covfunc[1], gp['covtheta'], Xstar, R, w, 'diag', Rstar) # test covariances Ku = feval(covfunc[1], gp['covtheta'], Xstar, R, w, xu, Rstar) # test covariances #Qsf = np.dot(Kus.T,np.linalg.solve(Kuu, Kuf)) #fmu = ms + np.dot(Qsf,np.linalg.solve(K,y-m)) # predicted means fmu = ms + np.dot(Kus.T,alpha) # predicted means #QQ = np.dot(Qsf,np.linalg.solve(K,Qsf.T)) #fs2 = Kss - np.reshape( np.diag(QQ), (Kss.shape[0],1) ) # predicted variances vv = np.linalg.solve(L.T, Kuf) tmp = vv*vv fs2 = Kss - np.array([tmp.sum(axis=0)]).T # predicted variances fs2[fs2 < 0.] = 0. # Remove numerical noise i.e. negative variances return [fmu, fs2]
def dnlml(theta, gp, X, y, R=None, w=None): mt = len(gp['meantheta']) ct = len(gp['covtheta']) out = np.zeros(mt+ct) meantheta = theta[:mt] covtheta = theta[mt:] if gp['covfunc'][0][0] == 'kernels.covFITC': # Do FITC Approximation cov = gp['covfunc'] xu = cov[-1] covfunc = cov[1:-1][0] nu = len(xu) [diagK,Kuu,Ku] = feval(cov[:-1], xu, covtheta, X) # evaluate covariance matrix snu2 = 1.e-6 * sn2 Kuu += snu2*np.eye(nu) m = feval(gp['meanfunc'], meantheta, X) # evaluate mean vector n,D = X.shape nu = Ku.shape[0] try: Luu = np.linalg.cholesky(Kuu) # Kuu = Luu'*Luu except np.linalg.linalg.LinAlgError: Luu = np.linalg.cholesky(nearPD(Kuu)) # Kuu = Luu'*Luu, or at least closest SDP Kuu V = np.linalg.solve(Luu.T,Ku) # V = inv(Luu')*Ku => V'*V = Q g_sn2 = diagK - (V*V).sum(axis=0).T # g = diag(K) - diag(Q) diagG = np.reshape( np.diag(g_sn2),(g_sn2.shape[0],1)) V = V/np.tile(diagG.T,(nu,1)) Lu = np.linalg.cholesky( np.eye(nu) + np.dot(V,V.T) ) # Lu'*Lu = I + V*diag(1/g_sn2)*V' r = (y-m)/np.sqrt(diagG) #be = np.linalg.solve(Lu.T,np.dot(V,(r/np.sqrt(g_sn2)))) be = np.linalg.solve(Lu.T,np.dot(V,r)) V = np.linalg.solve(Luu.T,Ku) # V = inv(Luu')*Ku => V'*V = Q iKuu = solve_chol(Luu,np.eye(nu)) # inv(Kuu + snu2*I) = iKuu LuBe = np.linalg.solve(Lu,be) alpha = np.linalg.solve(Luu,LuBe) # return the posterior parameters L = solve_chol(np.dot(Lu,Luu),np.eye(nu)) - iKuu # Sigma-inv(Kuu)''' #Q = np.dot(Ku.T,np.linalg.solve(Kuu,Ku)) #Lam = diagK - np.reshape( np.diag(Q), (diagK.shape[0],1) ) # diag(K) - diag(Q), Q = Kxu * Kuu^-1 * Kux #K = Q + Lam* np.eye(len(Lam)) # cholesky factorization of the covariance #try: # L = np.linalg.cholesky(K) # lower triangular matrix #except np.linalg.linalg.LinAlgError: # L = np.linalg.cholesky(nearPD(K)) # Find the "Nearest" covariance mattrix to K and do cholesky on that #alpha = solve_chol(L.T,y-m) #alpha = np.linalg.solve(K,y-m) #W = np.linalg.solve(L.T,np.linalg.solve(L,np.eye(n)))-np.dot(alpha,alpha.T) #W = np.linalg.solve(K,np.eye(n))-np.dot(alpha,alpha.T) W = Ku/np.tile(diagG.T,(nu,1)) W = np.linalg.solve((Kuu + np.dot(W,W.T) + snu2*np.eye(nu)).T, Ku) al = ( (y-m) - np.dot(W.T,np.dot(W,(y-m)/diagG)) )/diagG B = np.dot(iKuu,Ku) Wdg = W/np.tile(diagG.T,(nu,1)) w = np.dot(B,al) '''al = r/np.sqrt(g_sn2) - np.dot(V.T,LuBe)/g_sn2 # al = (Kt+sn2*eye(n))\y B = np.dot(iKuu,Ku) w = np.dot(B,al) W = np.linalg.solve(Lu.T, (V/np.tile(g_sn2.T,(nu,1))))''' if R==None: for ii in range(len(meantheta)): out[ii] = -1.*np.dot(feval(gp['meanfunc'], meantheta, X, ii).T,al) kk = len(gp['meantheta']) for ii in range(len(covtheta)): [ddiagKi,dKuui,dKui] = feval(cov[:-1], covtheta, X, None, ii) # eval cov deriv R = 2.*dKui - np.dot(dKuui,B) v = ddiagKi - (R*B).sum(axis=0).T # diag part of cov deriv out[ii+kk] = ( np.dot(ddiagKi.T,(1./g_sn2)) + np.dot(w.T,(np.dot(dKuui,w)-2.*np.dot(dKui,al))-np.dot(al.T,(v*al) \ - np.dot((Wdg*Wdg).sum(axis=0),v) - np.dot(R,Wdg.T)*np.dot(B,Wdg.T))) )/2. #A = feval(covfunc, covtheta, X, None, ii) #out[ii+kk] = ( W * feval(covfunc, covtheta, X, None, ii) ).sum()/2. else: for ii in range(len(meantheta)): out[ii] = (W*feval(gp['meanfunc'], meantheta, X, R, w, ii)).sum()/2. kk = len(meantheta) for ii in range(len(covtheta)): out[ii+kk] = (W*feval(covfunc, covtheta, X, R, w, ii)).sum()/2. else: # Do Exact inference W = get_W(theta, gp, X, y, R, w) if R==None: for ii in range(len(meantheta)): out[ii] = (W*feval(gp['meanfunc'], meantheta, X, ii)).sum()/2. kk = len(meantheta) for ii in range(len(covtheta)): out[ii+kk] = (W*feval(gp['covfunc'], covtheta, X, None, ii)).sum()/2. else: for ii in range(len(meantheta)): out[ii] = (W*feval(gp['meanfunc'], meantheta, X, R, w, ii)).sum()/2. kk = len(meantheta) for ii in range(len(gpcovtheta)): out[ii+kk] = (W*feval(gp['covfunc'], covtheta, X, R, w, ii)).sum()/2. return out
def nlml(theta, gp, X, y, R=None, w=None): mt = len(gp['meantheta']) ct = len(gp['covtheta']) out = np.zeros(mt+ct) meantheta = theta[:mt] covtheta = theta[mt:] n,D = X.shape if gp['covfunc'][0][0] == 'kernels.covFITC': # Do FITC Approximation cov = gp['covfunc'] xu = cov[-1] covfunc = cov[:-1] [diagK,Kuu,Ku] = feval(covfunc, xu, covtheta, X) # evaluate covariance matrix m = feval(gp['meanfunc'], meantheta, X) # evaluate mean vector nu = Ku.shape[0] snu2 = 1.e-6 * sn2 Kuu += snu2*np.eye(nu) try: Luu = np.linalg.cholesky(Kuu) # Kuu = Luu'*Luu except np.linalg.linalg.LinAlgError: Luu = np.linalg.cholesky(nearPD(Kuu)) # Kuu = Luu'*Luu, or at least closest SDP Kuu V = np.linalg.solve(Luu.T,Ku) # V = inv(Luu')*Ku => V'*V = Q g_sn2 = diagK - (V*V).sum(axis=0).T # g = diag(K) - diag(Q) diagG = np.reshape( np.diag(g_sn2),(g_sn2.shape[0],1)) V = V/np.tile(diagG.T,(nu,1)) Lu = np.linalg.cholesky( np.eye(nu) + np.dot(V,V.T) ) # Lu'*Lu = I + V*diag(1/g_sn2)*V' r = (y-m)/np.sqrt(diagG) V = np.linalg.solve(Luu.T,Ku) # V = inv(Luu')*Ku => V'*V = Q #be = np.linalg.solve(Lu.T,np.dot(V,(r/np.sqrt(g_sn2)))) be = np.linalg.solve(Lu.T,np.dot(V,r)) iKuu = solve_chol(Luu,np.eye(nu)) # inv(Kuu + snu2*I) = iKuu LuBe = np.linalg.solve(Lu,be) alpha = np.linalg.solve(Luu,LuBe) # return the posterior parameters L = solve_chol(np.dot(Lu,Luu),np.eye(nu)) - iKuu # Sigma-inv(Kuu) #Q = np.dot(Ku.T,np.linalg.solve(Kuu,Ku)) #Lam = diagK - np.reshape( np.diag(Q), (diagK.shape[0],1) ) # diag(K) - diag(Q), Q = Kxu * Kuu^-1 * Kux #K = Q + Lam* np.eye(len(Lam)) ms = feval(gp['meanfunc'], meantheta, X) # cholesky factorization of the covariance #try: # L = np.linalg.cholesky(K) # lower triangular matrix #except np.linalg.linalg.LinAlgError: # L = np.linalg.cholesky(nearPD(K)) # Find the "Nearest" covariance mattrix to K and do cholesky on that # compute inv(K)*y #alpha = solve_chol(L.T,y-m) #alpha = np.linalg.solve(K,y-m) # compute the negative log marginal likelihood aa = ( np.log(np.diag(Lu)).sum() + np.log(g_sn2).sum() + n*np.log(2.*np.pi) + np.dot(r.T,r) - np.dot(be.T,be) )/2. #aa = ( 0.5*np.dot((y-ms).T,alpha) + (np.log(np.diag(L))).sum(axis=0) + 0.5*n*np.log(2.*np.pi) ) #aa = ( 0.5*np.dot((y-ms).T,alpha) + (np.log(np.linalg.det(K))) + 0.5*n*np.log(2.*np.pi) ) #t1 = 0.5*np.dot((y-ms).T,alpha) #t2 = (np.log(np.linalg.det(K))) #t3 = 0.5*n*np.log(2.*np.pi) #t4 = np.linalg.det(K) #print t1, t2, t3, t4 #aa = t1 + t2 + t3 else: # Do Exact inference # compute training set covariance matrix if R==None: K = feval(gp['covfunc'], covtheta, X) else: K = feval(gp['covfunc'], covtheta, X, R, w) K += sn2*np.eye(X.shape[0]) m = feval(gp['meanfunc'], meantheta, X) # cholesky factorization of the covariance try: L = np.linalg.cholesky(K) # lower triangular matrix except np.linalg.linalg.LinAlgError: L = np.linalg.cholesky(nearPD(K)) # Find the "Nearest" covariance mattrix to K and do cholesky on that # compute inv(K)*y #alpha = solve_chol(L.T,y-m) alpha = np.linalg.solve(K,y-m) # compute the negative log marginal likelihood aa = ( 0.5*np.dot((y-m).T,alpha) + (np.log(np.diag(L))).sum(axis=0) + 0.5*n*np.log(2.*np.pi) ) return aa[0]