def test_par_gradient(self): dim = 2 x = np.hstack((np.zeros((dim, 1)), np.eye(dim), -np.eye(dim))) y = x[0, :] par = np.array([[1, 1, 3]], dtype=float) kernel = RBFGauss(dim, par) dK_dpar = kernel.der_par(par.squeeze(), x)
def setUpClass(cls): cls.par_1d = np.array([[1, 3]]) cls.par_2d = np.array([[1, 3, 3]]) cls.kern_rbf_1d = RBFGauss(1, cls.par_1d) cls.kern_rbf_2d = RBFGauss(2, cls.par_2d) cls.data_1d = np.array([[1, -1, 0]], dtype=float) cls.data_2d = np.hstack((np.zeros((2, 1)), np.eye(2), -np.eye(2))) cls.test_data_1d = np.atleast_2d(np.linspace(-5, 5, 50)) cls.test_data_2d = np.random.multivariate_normal( np.zeros((2, )), np.eye(2), 50).T
def exp_x_kxdkx(self, par, x, scaling=False, which_der=None): """Expectation E_x[k_ff(x_n, x) k_fd(x, x_m)]""" dim, num_pts = x.shape which_der = np.arange(num_pts) if which_der is None else which_der num_der = len(which_der) _, sqrt_inv_lam = RBFGauss._unpack_parameters(par) inv_lam = sqrt_inv_lam**2 lam = np.diag(inv_lam.diagonal()**-1) eye_d = np.eye(dim) # quantities for covariance weights Sig_q = cho_solve(cho_factor(inv_lam + eye_d), eye_d) # B^-1*I eta = Sig_q.dot(x) # (D,N) Sig_q*x inn = inv_lam.dot(x) # inp / el[:, na]**2 Q = self.exp_x_kxkx(par, par, x, scaling) # (N,N) cho_LamSig = cho_factor(lam + Sig_q) eta_tilde = inv_lam.dot(cho_solve( cho_LamSig, eta)) # Lambda^-1(Lambda+Sig_q)^-1*eta mu_Q = eta_tilde[ ..., na] + eta_tilde[:, na, :] # (D,N_der,N) pairwise sum of pre-multiplied eta's E_dfff = np.empty((num_der * dim, num_pts)) for i in range(num_der): for j in range(num_pts): istart, iend = i * dim, i * dim + dim i_d = which_der[i] E_dfff[istart:iend, j] = Q[i_d, j] * (mu_Q[:, i_d, j] - inn[:, i_d]) return E_dfff.T # (num_pts, num_der*dim)
def exp_x_xdkx(self, par, x, scaling=False, which_der=None): """Expectation E_x[x k_fd(x, x_m)]""" dim, num_pts = x.shape which_der = np.arange(num_pts) if which_der is None else which_der num_der = len(which_der) _, sqrt_inv_lam = RBFGauss._unpack_parameters(par) inv_lam = sqrt_inv_lam**2 eye_d = np.eye(dim) q = self.exp_x_kx(par, x, scaling) Sig_q = cho_solve(cho_factor(inv_lam + eye_d), eye_d) # B^-1*I eta = Sig_q.dot(x) # (D,N) Sig_q*x mu_q = inv_lam.dot(eta) # (D,N) r = q[na, which_der] * inv_lam.dot( mu_q[:, which_der] - x[:, which_der]) # -t.dot(iLam) * q # (D, N) # quantities for cross-covariance "weights" iLamSig = inv_lam.dot(Sig_q) # (D,D) r_tilde = np.empty((dim, num_der * dim)) for i in range(num_der): i_d = which_der[i] r_tilde[:, i * dim:i * dim + dim] = q[i_d] * iLamSig + np.outer(mu_q[:, i_d], r[:, i].T) return r_tilde # (dim, num_pts*dim)
def eval(self, par, x1, x2=None, diag=False, scaling=True, which_der=None): if x2 is None: x2 = x1.copy() alpha, sqrt_inv_lam = RBFGauss._unpack_parameters(par) alpha = 1.0 if not scaling else alpha x1 = sqrt_inv_lam.dot(x1) # sqrt(Lam^-1) * x x2 = sqrt_inv_lam.dot(x2) if diag: # only diagonal of kernel matrix assert x1.shape == x2.shape dx = x1 - x2 Kff = np.exp(2 * np.log(alpha) - 0.5 * np.sum(dx * dx, axis=0)) else: Kff = np.exp(2 * np.log(alpha) - 0.5 * maha(x1.T, x2.T)) x1, x2 = np.atleast_2d(x1), np.atleast_2d(x2) D, N = x1.shape Ds, Ns = x2.shape assert Ds == D which_der = np.arange(N) if which_der is None else which_der Nd = len(which_der) # points w/ derivative observations # iLam = np.diag(el ** -1 * np.ones(D)) # sqrt(Lam^-1) # iiLam = np.diag(el ** -2 * np.ones(D)) # Lam^-1 # x1 = iLam.dot(x1) # sqrt(Lambda^-1) * X # x2 = iLam.dot(x2) # Kff = np.exp(2 * np.log(alpha) - 0.5 * maha(x2.T, x1.T)) # cov(f(xi), f(xj)) x1 = sqrt_inv_lam.dot(x1) # Lambda^-1 * X x2 = sqrt_inv_lam.dot(x2) inv_lam = sqrt_inv_lam**2 XmX = x2[..., na] - x1[:, na, :] # pair-wise differences # NOTE: benchmark vs. np.kron(), replace with np.kron() if possible, but which_der complicates the matter Kfd = np.zeros((Ns, D * Nd)) # cov(f(xi), df(xj)) for i in range(Ns): for j in range(Nd): jstart, jend = j * D, j * D + D j_d = which_der[j] Kfd[i, jstart:jend] = Kff[i, j_d] * XmX[:, i, j_d] Kdd = np.zeros((D * Nd, D * Nd)) # cov(df(xi), df(xj)) for i in range(Nd): for j in range(Nd): istart, iend = i * D, i * D + D jstart, jend = j * D, j * D + D i_d, j_d = which_der[i], which_der[ j] # indices of points with derivatives Kdd[istart:iend, jstart:jend] = Kff[i_d, j_d] * ( inv_lam - np.outer(XmX[:, i_d, j_d], XmX[:, i_d, j_d])) if Ns == N: return np.vstack((np.hstack((Kff, Kfd)), np.hstack((Kfd.T, Kdd)))) else: return np.hstack((Kff, Kfd))
def exp_x_dkx(self, par, x, scaling=False, which_der=None): """Expectation E_x[k_fd(x, x_n)]""" dim, num_pts = x.shape alpha, sqrt_inv_lam = RBFGauss._unpack_parameters(par) # alpha = 1.0 if not scaling else alpha inv_lam = sqrt_inv_lam**2 lam = np.diag(inv_lam.diagonal()**-1) which_der = np.arange(num_pts) if which_der is None else which_der q = self.exp_x_kx(par, x, scaling) # kernel mean E_x[k_ff(x, x_n)] eye_d = np.eye(dim) Sig_q = cho_solve(cho_factor(inv_lam + eye_d), eye_d) # B^-1*I eta = Sig_q.dot(x) # (D,N) Sig_q*x mu_q = inv_lam.dot(eta) # (D,N) r = q[na, which_der] * inv_lam.dot( mu_q[:, which_der] - x[:, which_der]) # -t.dot(iLam) * q # (D, N) return r.T.ravel() # (1, n_der*D)
def test_total_nlml_gradient(self): # nonlinear vector function from some SSM dyn = CoordinatedTurnTransition(GaussRV(5), GaussRV(5)) # generate inputs num_x = 20 x = 10 + np.random.randn(dyn.dim_in, num_x) # evaluate function at inputs y = np.apply_along_axis(dyn.dyn_eval, 0, x, None) # kernel and it's initial parameters from ssmtoybox.bq.bqkern import RBFGauss lhyp = np.log([1.0] + 5 * [3.0]) kernel = RBFGauss(dyn.dim_in, self.ker_par_5d) from scipy.optimize import check_grad err = check_grad(self._total_nlml, self._total_nlml_grad, lhyp, kernel, y.T, x) print(err) self.assertTrue(err <= 1e-5, 'Gradient error: {:.4f}'.format(err))