def gp(hyp, inffunc, meanfunc, covfunc, likfunc, x, y, xs=None, ys=None, der=None): # Gaussian Process inference and prediction. The gp function provides a # flexible framework for Bayesian inference and prediction with Gaussian # processes for scalar targets, i.e. both regression and binary # classification. The prior is Gaussian process, defined through specification # of its mean and covariance function. The likelihood function is also # specified. Both the prior and the likelihood may have hyperparameters # associated with them. # # Two modes are possible: training or prediction: if no test cases are # supplied, then the negative log marginal likelihood and its partial # derivatives w.r.t. the hyperparameters is computed; this mode is used to fit # the hyperparameters. If test cases are given, then the test set predictive # probabilities are returned. Usage: # # training: [nlZ dnlZ ] = gp(hyp, inf, mean, cov, lik, x, y, None, None, der); # prediction: [ymu ys2 fmu fs2 ] = gp(hyp, inf, mean, cov, lik, x, y, xs, None, None, None); # or: [ymu ys2 fmu fs2 lp] = gp(hyp, inf, mean, cov, lik, x, y, xs, ys, None); # # where: # # hyp column vector of hyperparameters # inffunc function specifying the inference method # covfunc prior covariance function (see below) # meanfunc prior mean function # likfunc likelihood function # x n by D matrix of training inputs # y column vector of length n of training targets # xs ns by D matrix of test inputs # ys column vector of length nn of test targets # der flag for dnlZ computation determination (when xs == None also) # # nlZ returned value of the negative log marginal likelihood # dnlZ column vector of partial derivatives of the negative # log marginal likelihood w.r.t. each hyperparameter # ymu column vector (of length ns) of predictive output means # ys2 column vector (of length ns) of predictive output variances # fmu column vector (of length ns) of predictive latent means # fs2 column vector (of length ns) of predictive latent variances # lp column vector (of length ns) of log predictive probabilities # # post struct representation of the (approximate) posterior # 3rd output in training mode and 6th output in prediction mode # # See also covFunctions.m, infMethods.m, likFunctions.m, meanFunctions.m. # # Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2011-02-18 if not inffunc: inffunc = ['inf.infExact'] # set default inf if not meanfunc: meanfunc = ['means.meanZero'] # set default mean if not covfunc: raise Exception('Covariance function cannot be empty') # no default covariance if covfunc[0] == 'kernels.covFITC': inffunc = ['inf.infFITC'] # only one possible inference alg for covFITC if not likfunc: likfunc = ['lik.likGauss'] # set default lik D = np.shape(x)[1] if not checkParameters(meanfunc,hyp.mean,D): should,andis = numberOfHyper(meanfunc,hyp.mean,D) raise Exception('Number of mean function hyperparameters disagree with mean function' + str(should) + " and " + str(andis)) if not checkParameters(covfunc,hyp.cov,D): should,andis = numberOfHyper(covfunc,hyp.cov,D) raise Exception('Number of cov function hyperparameters disagree with cov function: ' + str(should) + " and " + str(andis)) if not checkParameters(likfunc,hyp.lik,D): should,andis = numberOfHyper(likfunc,hyp.lik,D) raise Exception('Number of lik function hyperparameters disagree with lik function' + str(should) + " and " + str(andis)) try: # call the inference method # issue a warning if a classification likelihood is used in conjunction with # labels different from +1 and -1 if likfunc[0] == ['lik.likErf'] or likfunc[0] == ['lik.likLogistic']: uy = unique(y) ind = ( uy != 1 ) if any( uy[ind] != -1): raise Exception('You attempt classification using labels different from {+1,-1}\n') #end #end if not xs == None: # compute marginal likelihood and its derivatives only if needed vargout = Tools.general.feval(inffunc,hyp, meanfunc, covfunc, likfunc, x, y, 1) post = vargout[0] else: if not der: vargout = Tools.general.feval(inffunc, hyp, meanfunc, covfunc, likfunc, x, y, 2) post = vargout[0]; nlZ = vargout[1] else: vargout = Tools.general.feval(inffunc, hyp, meanfunc, covfunc, likfunc, x, y, 3) post = vargout[0]; nlZ = vargout[1]; dnlZ = vargout[2] #end #end except Exception, e: raise Exception('Inference method failed ' + str(e) + '\n')
def gp(hyp, inffunc, meanfunc, covfunc, likfunc, x, y, xs=None, ys=None, der=None): # Gaussian Process inference and prediction. The gp function provides a # flexible framework for Bayesian inference and prediction with Gaussian # processes for scalar targets, i.e. both regression and binary # classification. The prior is Gaussian process, defined through specification # of its mean and covariance function. The likelihood function is also # specified. Both the prior and the likelihood may have hyperparameters # associated with them. # # Two modes are possible: training or prediction: if no test cases are # supplied, then the negative log marginal likelihood and its partial # derivatives w.r.t. the hyperparameters is computed; this mode is used to fit # the hyperparameters. If test cases are given, then the test set predictive # probabilities are returned. Usage: # # training: [nlZ dnlZ ] = gp(hyp, inf, mean, cov, lik, x, y, None, None, der); # prediction: [ymu ys2 fmu fs2 ] = gp(hyp, inf, mean, cov, lik, x, y, xs, None, None, None); # or: [ymu ys2 fmu fs2 lp] = gp(hyp, inf, mean, cov, lik, x, y, xs, ys, None); # # where: # # hyp column vector of hyperparameters # inffunc function specifying the inference method # covfunc prior covariance function (see below) # meanfunc prior mean function # likfunc likelihood function # x n by D matrix of training inputs # y column vector of length n of training targets # xs ns by D matrix of test inputs # ys column vector of length nn of test targets # der flag for dnlZ computation determination (when xs == None also) # # nlZ returned value of the negative log marginal likelihood # dnlZ column vector of partial derivatives of the negative # log marginal likelihood w.r.t. each hyperparameter # ymu column vector (of length ns) of predictive output means # ys2 column vector (of length ns) of predictive output variances # fmu column vector (of length ns) of predictive latent means # fs2 column vector (of length ns) of predictive latent variances # lp column vector (of length ns) of log predictive probabilities # # post struct representation of the (approximate) posterior # 3rd output in training mode and 6th output in prediction mode # # This is a python implementation of gpml functionality (Copyright (c) by # Carl Edward Rasmussen and Hannes Nickisch, 2011-02-18). # # Copyright (c) by Marion Neumann and Daniel Marthaler, 20/05/2013 if not inffunc: inffunc = ['inf.infExact'] # set default inf if not meanfunc: meanfunc = ['means.meanZero'] # set default mean if not covfunc: raise Exception('Covariance function cannot be empty') # no default covariance if covfunc[0] == 'kernels.covFITC': inffunc = ['inf.infFITC'] # only one possible inference alg for covFITC if not likfunc: likfunc = ['lik.likGauss'] # set default lik D = np.shape(x)[1] if not checkParameters(meanfunc,hyp.mean,D): raise Exception('Number of mean function hyperparameters disagree with mean function') if not checkParameters(covfunc,hyp.cov,D): raise Exception('Number of cov function hyperparameters disagree with cov function') if not checkParameters(likfunc,hyp.lik,D): raise Exception('Number of lik function hyperparameters disagree with lik function') # call the inference method # issue a warning if a classification likelihood is used in conjunction with # labels different from +1 and -1 if likfunc[0] == ['lik.likErf'] or likfunc[0] == ['lik.likLogistic']: uy = unique(y) ind = ( uy != 1 ) if any( uy[ind] != -1): raise Exception('You attempt classification using labels different from {+1,-1}\n') if not xs == None: # compute marginal likelihood and its derivatives only if needed vargout = Tools.general.feval(inffunc,hyp, meanfunc, covfunc, likfunc, x, y, 1) post = vargout[0] else: if not der: vargout = Tools.general.feval(inffunc, hyp, meanfunc, covfunc, likfunc, x, y, 2) post = vargout[0]; nlZ = vargout[1] else: vargout = Tools.general.feval(inffunc, hyp, meanfunc, covfunc, likfunc, x, y, 3) post = vargout[0]; nlZ = vargout[1]; dnlZ = vargout[2] if xs == None: # if no test cases are provided if not der: varargout = [nlZ, post] # report -log marg lik, derivatives and post else: varargout = [nlZ, dnlZ, post] # report -log marg lik, derivatives and post else: alpha = post.alpha L = post.L sW = post.sW #if issparse(alpha) # handle things for sparse representations # nz = alpha != 0 # determine nonzero indices # if issparse(L), L = full(L(nz,nz)) # convert L and sW if necessary # if issparse(sW), sW = full(sW(nz)) #else: nz = range(len(alpha[:,0])) # non-sparse representation if L == []: # in case L is not provided, we compute it K = Tools.general.feval(covfunc, hyp.cov, x[nz,:]) L = np.linalg.cholesky( (np.eye(nz) + np.dot(sW,sW.T)*K).T ) Ltril = np.all( np.tril(L,-1) == 0 ) # is L an upper triangular matrix? ns = xs.shape[0] # number of data points nperbatch = 1000 # number of data points per mini batch nact = 0 # number of already processed test data points ymu = np.zeros((ns,1)); ys2 = np.zeros((ns,1)) fmu = np.zeros((ns,1)); fs2 = np.zeros((ns,1)); lp = np.zeros((ns,1)) while nact<ns-1: # process minibatches of test cases to save memory id = range(nact,min(nact+nperbatch,ns)) # data points to process kss = Tools.general.feval(covfunc, hyp.cov, xs[id,:], 'diag') # self-variances Ks = Tools.general.feval(covfunc, hyp.cov, x[nz,:], xs[id,:]) # cross-covariances ms = Tools.general.feval(meanfunc, hyp.mean, xs[id,:]) N = (alpha.shape)[1] # number of alphas (usually 1; more in case of sampling) Fmu = np.tile(ms,(1,N)) + np.dot(Ks.T,alpha[nz]) # conditional mean fs|f fmu[id] = np.reshape(Fmu.sum(axis=1)/N,(len(id),1)) # predictive means #fmu[id] = ms + np.dot(Ks.T,alpha[nz]) # conditional mean fs|f if Ltril: # L is triangular => use Cholesky parameters (alpha,sW,L) V = np.linalg.solve(L.T,np.tile(sW,(1,len(id)))*Ks) fs2[id] = kss - np.array([(V*V).sum(axis=0)]).T # predictive variances else: # L is not triangular => use alternative parametrization fs2[id] = kss + np.array([(Ks*np.dot(L,Ks)).sum(axis=0)]).T # predictive variances fs2[id] = np.maximum(fs2[id],0) # remove numerical noise i.e. negative variances Fs2 = np.tile(fs2[id],(1,N)) # we have multiple values in case of sampling if ys == None: [Lp, Ymu, Ys2] = Tools.general.feval(likfunc,hyp.lik,None,Fmu[:],Fs2[:],None,None,3) else: [Lp, Ymu, Ys2] = Tools.general.feval(likfunc,hyp.lik,np.tile(ys[id],(1,N)),Fmu[:],Fs2[:],None,None,3) lp[id] = np.reshape( np.reshape(Lp,(np.prod(Lp.shape),N)).sum(axis=1)/N , (len(id),1) ) # log probability; sample averaging ymu[id] = np.reshape( np.reshape(Ymu,(np.prod(Ymu.shape),N)).sum(axis=1)/N ,(len(id),1) ) # predictive mean ys|y and ... ys2[id] = np.reshape( np.reshape(Ys2,(np.prod(Ys2.shape),N)).sum(axis=1)/N , (len(id),1) ) # .. variance nact = id[-1] # set counter to index of last processed data point if ys == None: varargout = [ymu, ys2, fmu, fs2, None, post] # assign output arguments else: varargout = [ymu, ys2, fmu, fs2, lp, post] # assign output arguments return varargout
def gp(hyp, inffunc, meanfunc, covfunc, likfunc, x, y, xs=None, ys=None, der=None): # Gaussian Process inference and prediction. The gp function provides a # flexible framework for Bayesian inference and prediction with Gaussian # processes for scalar targets, i.e. both regression and binary # classification. The prior is Gaussian process, defined through specification # of its mean and covariance function. The likelihood function is also # specified. Both the prior and the likelihood may have hyperparameters # associated with them. # # Two modes are possible: training or prediction: if no test cases are # supplied, then the negative log marginal likelihood and its partial # derivatives w.r.t. the hyperparameters is computed; this mode is used to fit # the hyperparameters. If test cases are given, then the test set predictive # probabilities are returned. Usage: # # training: [nlZ dnlZ ] = gp(hyp, inf, mean, cov, lik, x, y, None, None, der); # prediction: [ymu ys2 fmu fs2 ] = gp(hyp, inf, mean, cov, lik, x, y, xs, None, None, None); # or: [ymu ys2 fmu fs2 lp] = gp(hyp, inf, mean, cov, lik, x, y, xs, ys, None); # # where: # # hyp column vector of hyperparameters # inffunc function specifying the inference method # covfunc prior covariance function (see below) # meanfunc prior mean function # likfunc likelihood function # x n by D matrix of training inputs # y column vector of length n of training targets # xs ns by D matrix of test inputs # ys column vector of length nn of test targets # der flag for dnlZ computation determination (when xs == None also) # # nlZ returned value of the negative log marginal likelihood # dnlZ column vector of partial derivatives of the negative # log marginal likelihood w.r.t. each hyperparameter # ymu column vector (of length ns) of predictive output means # ys2 column vector (of length ns) of predictive output variances # fmu column vector (of length ns) of predictive latent means # fs2 column vector (of length ns) of predictive latent variances # lp column vector (of length ns) of log predictive probabilities # # post struct representation of the (approximate) posterior # 3rd output in training mode and 6th output in prediction mode # # See also covFunctions.m, infMethods.m, likFunctions.m, meanFunctions.m. # # Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2011-02-18 if not inffunc: inffunc = ['inf.infExact'] # set default inf if not meanfunc: meanfunc = ['means.meanZero'] # set default mean if not covfunc: raise Exception( 'Covariance function cannot be empty') # no default covariance if covfunc[0] == 'kernels.covFITC': inffunc = ['inf.infFITC' ] # only one possible inference alg for covFITC if not likfunc: likfunc = ['lik.likGauss'] # set default lik D = np.shape(x)[1] if not checkParameters(meanfunc, hyp.mean, D): raise Exception( 'Number of mean function hyperparameters disagree with mean function' ) if not checkParameters(covfunc, hyp.cov, D): raise Exception( 'Number of cov function hyperparameters disagree with cov function' ) if not checkParameters(likfunc, hyp.lik, D): raise Exception( 'Number of lik function hyperparameters disagree with lik function' ) try: # call the inference method # issue a warning if a classification likelihood is used in conjunction with # labels different from +1 and -1 if likfunc[0] == ['lik.likErf'] or likfunc[0] == ['lik.likLogistic']: uy = unique(y) ind = (uy != 1) if any(uy[ind] != -1): raise Exception( 'You attempt classification using labels different from {+1,-1}\n' ) #end #end if not xs == None: # compute marginal likelihood and its derivatives only if needed vargout = Tools.general.feval(inffunc, hyp, meanfunc, covfunc, likfunc, x, y, 1) post = vargout[0] else: if not der: vargout = Tools.general.feval(inffunc, hyp, meanfunc, covfunc, likfunc, x, y, 2) post = vargout[0] nlZ = vargout[1] else: vargout = Tools.general.feval(inffunc, hyp, meanfunc, covfunc, likfunc, x, y, 3) post = vargout[0] nlZ = vargout[1] dnlZ = vargout[2] #end #end except: raise Exception('Inference method failed\n') #end if xs == None: # if no test cases are provided if not der: varargout = [nlZ, post] # report -log marg lik, derivatives and post else: varargout = [nlZ, dnlZ, post] # report -log marg lik, derivatives and post else: alpha = post.alpha L = post.L sW = post.sW #if issparse(alpha) # handle things for sparse representations # nz = alpha != 0 # determine nonzero indices # if issparse(L), L = full(L(nz,nz)); end # convert L and sW if necessary # if issparse(sW), sW = full(sW(nz)); end #else: nz = range(len(alpha[:, 0])) # non-sparse representation if L == []: # in case L is not provided, we compute it K = Tools.general.feval(covfunc, hyp.cov, x[nz, :]) L = np.linalg.cholesky((np.eye(nz) + np.dot(sW, sW.T) * K).T) #end Ltril = np.all(np.tril(L, -1) == 0) # is L an upper triangular matrix? ns = xs.shape[0] # number of data points nperbatch = 1000 # number of data points per mini batch nact = 0 # number of already processed test data points ymu = np.zeros((ns, 1)) ys2 = np.zeros((ns, 1)) fmu = np.zeros((ns, 1)) fs2 = np.zeros((ns, 1)) lp = np.zeros((ns, 1)) while nact < ns - 1: # process minibatches of test cases to save memory id = range(nact, min(nact + nperbatch, ns)) # data points to process kss = Tools.general.feval(covfunc, hyp.cov, xs[id, :], 'diag') # self-variances Ks = Tools.general.feval(covfunc, hyp.cov, x[nz, :], xs[id, :]) # cross-covariances ms = Tools.general.feval(meanfunc, hyp.mean, xs[id, :]) N = (alpha.shape )[1] # number of alphas (usually 1; more in case of sampling) Fmu = np.tile(ms, (1, N)) + np.dot( Ks.T, alpha[nz]) # conditional mean fs|f fmu[id] = np.reshape(Fmu.sum(axis=1) / N, (len(id), 1)) # predictive means #fmu[id] = ms + np.dot(Ks.T,alpha[nz]) # conditional mean fs|f # if Ltril: # L is triangular => use Cholesky parameters (alpha,sW,L) V = np.linalg.solve(L.T, np.tile(sW, (1, len(id))) * Ks) fs2[id] = kss - np.array([(V * V).sum(axis=0) ]).T # predictive variances else: # L is not triangular => use alternative parametrization fs2[id] = kss + np.array([(Ks * np.dot(L, Ks)).sum(axis=0) ]).T # predictive variances #end fs2[id] = np.maximum( fs2[id], 0) # remove numerical noise i.e. negative variances Fs2 = np.tile( fs2[id], (1, N)) # we have multiple values in case of sampling if ys == None: [Lp, Ymu, Ys2] = Tools.general.feval(likfunc, hyp.lik, None, Fmu[:], Fs2[:], None, None, 3) else: [Lp, Ymu, Ys2] = Tools.general.feval(likfunc, hyp.lik, np.tile(ys[id], (1, N)), Fmu[:], Fs2[:], None, None, 3) #end lp[id] = np.reshape( np.reshape(Lp, (np.prod(Lp.shape), N)).sum(axis=1) / N, (len(id), 1)) # log probability; sample averaging ymu[id] = np.reshape( np.reshape(Ymu, (np.prod(Ymu.shape), N)).sum(axis=1) / N, (len(id), 1)) # predictive mean ys|y and ... ys2[id] = np.reshape( np.reshape(Ys2, (np.prod(Ys2.shape), N)).sum(axis=1) / N, (len(id), 1)) # .. variance nact = id[-1] # set counter to index of last processed data point #end if ys == None: varargout = [ymu, ys2, fmu, fs2, None, post] # assign output arguments else: varargout = [ymu, ys2, fmu, fs2, lp, post] #end return varargout