class ChangepointRBF(Kern): def __init__(self, input_dim, variance1=1., variance2=1., lengthscale1=1., lengthscale2=1., xc=1, active_dims=None): super(ChangepointRBF, self).__init__(input_dim, active_dims, 'chngpt') assert input_dim == 1, "For this kernel we assume input_dim = 1" self.variance1 = Param('variance1', variance1) self.variance2 = Param('variance2', variance2) self.lengthscale1 = Param('lengthscale1', lengthscale1) self.lengthscale2 = Param('lengthscale2', lengthscale2) self.rbf = RBF(input_dim=input_dim, lengthscale=1., variance=1.) self.xc = Param('xc', xc) self.add_parameters(self.variance1, self.variance2, self.lengthscale1, self.lengthscale2, self.xc) def parameters_changed(self): pass def K(self, X, X2): """Covariance matrix""" u1 = self.u(X) a1 = self.a(X) if X2 is None: u2 = u1 a2 = a1 else: u2 = self.u(X2) a2 = self.a(X2) return a1 * a2 * self.rbf.K(X=u1, X2=u2) def Kdiag(self, X): """Diagonal of covariance matrix""" u = self.u(X) a = self.a(X) return a * self.rbf.Kdiag(u) def u(self, X: np.ndarray): """u operation in the paper""" u = np.empty(X.shape) for i in X.shape[0]: if X[i] < self.xc: u[i] = X[i] / self.variance1 else: u[i] = self.xc/self.variance1 + (X[i] - self.xc)/self.variance2 return u def a(self, X: np.ndarray): """a operation in the paper""" a = np.empty(X.shape) for i in X.shape[0]: if X[i] < self.xc: a[i] = self.lengthscale1 else: a[i] = self.lengthscale2 return a
class GPQuad(BayesianQuadratureTransform): def __init__(self, dim, unit_sp=None, hypers=None): super(GPQuad, self).__init__(dim, unit_sp, hypers) # GPy RBF kernel with given hypers self.kern = RBF(self.d, variance=self.hypers['sig_var'], lengthscale=self.hypers['lengthscale'], ARD=True) def weights_rbf(self, unit_sp, hypers): # BQ weights for RBF kernel with given hypers, computations adopted from the GP-ADF code [Deisenroth] with # the following assumptions: # (A1) the uncertain input is zero-mean with unit covariance # (A2) one set of hyper-parameters is used for all output dimensions (one GP models all outputs) d, n = unit_sp.shape # GP kernel hyper-parameters alpha, el, jitter = hypers['sig_var'], hypers['lengthscale'], hypers[ 'noise_var'] assert len(el) == d # pre-allocation for convenience eye_d, eye_n = np.eye(d), np.eye(n) iLam1 = np.atleast_2d(np.diag(el**-1)) # sqrt(Lambda^-1) iLam2 = np.atleast_2d(np.diag(el**-2)) inp = unit_sp.T.dot( iLam1 ) # sigmas / el[:, na] (x - m)^T*sqrt(Lambda^-1) # (numSP, xdim) K = np.exp(2 * np.log(alpha) - 0.5 * maha(inp, inp)) iK = cho_solve(cho_factor(K + jitter * eye_n), eye_n) B = iLam2 + eye_d # (D, D) c = alpha**2 / np.sqrt(det(B)) t = inp.dot(inv(B)) # inn*(P + Lambda)^-1 l = np.exp(-0.5 * np.sum(inp * t, 1)) # (N, 1) zet = 2 * np.log(alpha) - 0.5 * np.sum(inp * inp, 1) inp = inp.dot(iLam1) R = 2 * iLam2 + eye_d t = 1 / np.sqrt(det(R)) L = np.exp((zet[:, na] + zet[:, na].T) + maha(inp, -inp, V=0.5 * inv(R))) q = c * l # evaluations of the kernel mean map (from the viewpoint of RHKS methods) # mean weights wm_f = q.dot(iK) iKQ = iK.dot(t * L) # covariance weights wc_f = iKQ.dot(iK) # cross-covariance "weights" wc_fx = np.diag(q).dot(iK) # used for self.D.dot(x - mean).dot(wc_fx).dot(fx) self.D = inv(eye_d + np.diag(el**2)) # S(S+Lam)^-1; for S=I, (I+Lam)^-1 # model variance; to be added to the covariance # this diagonal form assumes independent GP outputs (cov(f^a, f^b) = 0 for all a, b: a neq b) self.model_var = np.diag((alpha**2 - np.trace(iKQ)) * np.ones((d, 1))) return wm_f, wc_f, wc_fx def plot_gp_model(self, f, unit_sp, args, test_range=(-5, 5, 50), plot_dims=(0, 0)): # plot out_dim vs. in_dim in_dim, out_dim = plot_dims # test input must have the same dimension as specified in kernel test = np.linspace(*test_range) test_pts = np.zeros((self.d, len(test))) test_pts[in_dim, :] = test # function value observations at training points (unit sigma-points) y = np.apply_along_axis(f, 0, unit_sp, args) fx = np.apply_along_axis(f, 0, test_pts, args) # function values at test points K = self.kern.K(unit_sp.T) # covariances between sigma-points k = self.kern.K( test_pts.T, unit_sp.T) # covariance between test inputs and sigma-points kxx = self.kern.Kdiag(test_pts.T) # prior predictive variance k_iK = cho_solve(cho_factor(K), k.T).T gp_mean = k_iK.dot(y[out_dim, :]) # GP mean gp_var = np.diag(np.diag(kxx) - k_iK.dot(k.T)) # GP predictive variance # plot the GP mean, predictive variance and the true function plt.figure() plt.plot(test, fx[out_dim, :], color='r', ls='--', lw=2, label='true') plt.plot(test, gp_mean, color='b', ls='-', lw=2, label='GP mean') plt.fill_between(test, gp_mean + 2 * np.sqrt(gp_var), gp_mean - 2 * np.sqrt(gp_var), color='b', alpha=0.25, label='GP variance') plt.plot(unit_sp[in_dim, :], y[out_dim, :], color='k', ls='', marker='o', ms=8, label='data') plt.legend() plt.show() def _weights(self, sigma_points, hypers): return self.weights_rbf(sigma_points, hypers) def _fcn_eval(self, fcn, x, fcn_pars): return np.apply_along_axis(fcn, 0, x, fcn_pars) def _mean(self, weights, fcn_evals): return fcn_evals.dot(weights) def _covariance(self, weights, fcn_evals, mean_out): return fcn_evals.dot(weights).dot(fcn_evals.T) - np.outer( mean_out, mean_out.T) + self.model_var def _cross_covariance(self, weights, fcn_evals, x, mean_out, mean_in): return fcn_evals.dot(weights.T).dot((x - mean_in).T).dot(self.D) def _int_var_rbf(self, X, hyp, jitter=1e-8): """ Posterior integral variance of the Gaussian Process quadrature. X - vector (1, 2*xdim**2+xdim) hyp - kernel hyperparameters [s2, el_1, ... el_d] """ # reshape X to SP matrix X = np.reshape(X, (self.n, self.d)) # set kernel hyper-parameters s2, el = hyp[0], hyp[1:] self.kern.param_array[0] = s2 # variance self.kern.param_array[1:] = el # lengthscale K = self.kern.K(X) L = np.diag(el**2) # posterior variance of the integral ks = s2 * np.sqrt(det(L + np.eye(self.d))) * multivariate_normal( mean=np.zeros(self.d), cov=L).pdf(X) postvar = -ks.dot(solve(K + jitter * np.eye(self.n), ks.T)) return postvar def _int_var_rbf_hyp(self, hyp, X, jitter=1e-8): """ Posterior integral variance as a function of hyper-parameters :param hyp: RBF kernel hyper-parameters [s2, el_1, ..., el_d] :param X: sigma-points :param jitter: numerical jitter (for stabilizing computations) :return: posterior integral variance """ # reshape X to SP matrix X = np.reshape(X, (self.n, self.d)) # set kernel hyper-parameters s2, el = 1, hyp # sig_var hyper always set to 1 self.kern.param_array[0] = s2 # variance self.kern.param_array[1:] = el # lengthscale K = self.kern.K(X) L = np.diag(el**2) # posterior variance of the integral ks = s2 * np.sqrt(det(L + np.eye(self.d))) * multivariate_normal( mean=np.zeros(self.d), cov=L).pdf(X) postvar = s2 * np.sqrt(det(2 * inv(L) + np.eye(self.d)))**-1 - ks.dot( solve(K + jitter * np.eye(self.n), ks.T)) return postvar def _min_var_sigmas(self): # solver options op = {'disp': True} # bounds based on input unit Gaussian (-2*std, +2std) bnds = tuple((-2, 2) for i in range(self.n * self.d)) hyp = np.hstack((self.hypers['sig_var'], self.hypers['lengthscale'])) # unconstrained # res = minimize(self._gpq_postvar, self.X0, method='Nelder-Mead', options=op) # res = minimize(self._gpq_postvar, self.X0, method='SLSQP', bounds=bnds, options=op) res = minimize(self._int_var_rbf, self.unit_sp, args=hyp, method='L-BFGS-B', bounds=bnds, options=op) return res.x def _min_var_hypers(self): """ Finds kernel hyper-parameters minimizing the posterior integral variance. :return: optimized kernel hyper-parameters """ # solver options op = {'disp': True} hyp = self.hypers[ 'lengthscale'] # np.hstack((self.hypers['sig_var'], self.hypers['lengthscale'])) # bounds based on input unit Gaussian (-2*std, +2std) bnds = tuple((1e-3, 1000) for i in range(len(hyp))) # unconstrained # res = minimize(self._gpq_postvar, self.X0, method='Nelder-Mead', options=op) # res = minimize(self._gpq_postvar, self.X0, method='SLSQP', bounds=bnds, options=op) res = minimize(self._int_var_rbf_hyp, hyp, args=self.unit_sp, method='L-BFGS-B', bounds=bnds, options=op) return res.x def _min_logmarglik_hypers(self): # finds hypers by maximizing the marginal likelihood (empirical bayes) # the multiple output dimensions should be reflected in the log marglik pass def _min_intvar_logmarglik_hypers(self): # finds hypers by minimizing the sum of log-marginal likelihood and the integral variance objectives pass