def projection_affine(n_dim, u, n, u_0): """ Args: n_dim: affine transformation space u: random point to be projected on n as L n: secant normal vector u_0: secant starting point Returns: """ n_norm = l2_norm(n) I = jnp.eye(n_dim) p2 = [0 * k for k in range(n_dim)] for k in range(n_dim): p2[k] = (jnp.dot(n, I[k]) / n_norm ** 2) * n p2 = jnp.asarray([p2[i] for i in range(n_dim)]) u_0 = u_0.reshape(n_dim, 1) I = jnp.eye(n_dim) t1 = jnp.block([[I, u_0], [jnp.zeros(shape=(1, n_dim)), 1.0]]) t2 = jnp.block( [[p2, jnp.zeros(shape=(n_dim, 1))], [jnp.zeros(shape=(1, n_dim)), 1.0]] ) t3 = jnp.block([[I, -1 * u_0], [jnp.zeros(shape=(1, n_dim)), 1.0]]) P = jnp.matmul(jnp.matmul(t1, t2), t3) pr = jnp.matmul(P, jnp.hstack([u, 1.0])) pr = lax.slice(pr, [0], [n_dim]) return pr
def variational_expectation(self, y, post_mean, post_cov, cubature=None): """ """ num_components = int(post_mean.shape[0] / 2) if cubature is None: x, w = gauss_hermite(num_components, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature(num_components) # subband_mean, modulator_mean = post_mean[:num_components], self.link_fn(post_mean[num_components:]) subband_mean, modulator_mean = post_mean[:num_components], post_mean[num_components:] # TODO: CHECK subband_cov, modulator_cov = post_cov[:num_components, :num_components], post_cov[num_components:, num_components:] sigma_points = cholesky(modulator_cov) @ x + modulator_mean modulator_var = np.diag(subband_cov)[..., None] mu = (self.link_fn(sigma_points).T @ subband_mean)[:, 0] lognormpdf = -0.5 * np.log(2 * np.pi * self.variance) - 0.5 * (y - mu) ** 2 / self.variance const = -0.5 / self.variance * (self.link_fn(sigma_points).T ** 2 @ modulator_var)[:, 0] exp_log_lik = np.sum(w * (lognormpdf + const)) dE1 = np.sum(w * self.link_fn(sigma_points) * (y - mu) / self.variance, axis=-1) dE2 = np.sum(w * (sigma_points - modulator_mean) * modulator_var ** -1 * (lognormpdf + const), axis=-1) dE_dm = np.block([dE1, dE2])[..., None] d2E1 = np.sum(w * - 0.5 * self.link_fn(sigma_points) ** 2 / self.variance, axis=-1) d2E2 = np.sum(w * 0.5 * ( ((sigma_points - modulator_mean) * modulator_var ** -1) ** 2 - modulator_var ** -1 ) * (lognormpdf + const), axis=-1) dE_dv = np.diag(np.block([d2E1, d2E2])) return exp_log_lik, dE_dm, dE_dv
def build_joint(ind, mean, cov, smoother_gain): """ joint posterior (i.e. smoothed) mean and covariance of the states [u_, u+] at time t """ mean_joint = np.block([[mean[ind]], [mean[ind + 1]]]) cross_cov = smoother_gain[ind] @ cov[ind + 1] cov_joint = np.block([[cov[ind], cross_cov], [cross_cov.T, cov[ind + 1]]]) return mean_joint, cov_joint
def moment_match(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ """ num_components = int(cav_mean.shape[0] / 2) if cubature_func is None: x, w = gauss_hermite(num_components, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(num_components) subband_mean, modulator_mean = cav_mean[:num_components], self.link_fn( cav_mean[num_components:]) subband_cov, modulator_cov = cav_cov[:num_components, : num_components], cav_cov[ num_components:, num_components:] sigma_points = cholesky(modulator_cov) @ x + modulator_mean const = power**-0.5 * (2 * pi * hyp)**(0.5 - 0.5 * power) mu = (self.link_fn(sigma_points).T @ subband_mean)[:, 0] var = hyp / power + (self.link_fn(sigma_points).T**2 @ np.diag(subband_cov)[..., None])[:, 0] normpdf = const * (2 * pi * var)**-0.5 * np.exp(-0.5 * (y - mu)**2 / var) Z = np.sum(w * normpdf) Zinv = 1. / (Z + 1e-8) lZ = np.log(Z + 1e-8) dZ1 = np.sum(w * self.link_fn(sigma_points) * (y - mu) / var * normpdf, axis=-1) dZ2 = np.sum(w * (sigma_points - modulator_mean) * np.diag(modulator_cov)[..., None]**-1 * normpdf, axis=-1) dlZ = Zinv * np.block([dZ1, dZ2]) d2Z1 = np.sum(w * self.link_fn(sigma_points)**2 * (((y - mu) / var)**2 - var**-1) * normpdf, axis=-1) d2Z2 = np.sum(w * (((sigma_points - modulator_mean) * np.diag(modulator_cov)[..., None]**-1)**2 - np.diag(modulator_cov)[..., None]**-1) * normpdf, axis=-1) d2lZ = np.diag(-dlZ**2 + Zinv * np.block([d2Z1, d2Z2])) id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ[ ..., None] # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov
def predict_from_state_(x_test, ind, x, post_mean, post_cov, gain, kernel): """ predict the state distribution at time t by projecting from the neighbouring inducing states """ P, T = compute_conditional_statistics(x_test, x, kernel, ind) # joint posterior (i.e. smoothed) mean and covariance of the states [u_, u+] at time t: mean_joint = np.block([[post_mean[ind]], [post_mean[ind + 1]]]) cross_cov = gain[ind] @ post_cov[ind + 1] cov_joint = np.block([[post_cov[ind], cross_cov], [cross_cov.T, post_cov[ind + 1]]]) return P @ mean_joint, P @ cov_joint @ P.T + T
def moment_match(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ """ if cubature_func is None: x, w = gauss_hermite(1, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(1) # sigma_points = np.sqrt(2) * np.sqrt(v) * x + m # scale locations according to cavity dist. sigma_points = np.sqrt(cav_cov[1, 1]) * x + cav_mean[ 1] # fsigᵢ=xᵢ√cₙ + mₙ: scale locations according to cavity f2 = self.link_fn(sigma_points)**2. / power obs_var = f2 + cav_cov[0, 0] const = power**-0.5 * (2 * pi * self.link_fn(sigma_points)**2.)**( 0.5 - 0.5 * power) normpdf = const * (2 * pi * obs_var)**-0.5 * np.exp( -0.5 * (y - cav_mean[0, 0])**2 / obs_var) Z = np.sum(w * normpdf) Zinv = 1. / np.maximum(Z, 1e-8) lZ = np.log(np.maximum(Z, 1e-8)) dZ_integrand1 = (y - cav_mean[0, 0]) / obs_var * normpdf dlZ1 = Zinv * np.sum(w * dZ_integrand1) dZ_integrand2 = (sigma_points - cav_mean[1, 0]) / cav_cov[1, 1] * normpdf dlZ2 = Zinv * np.sum(w * dZ_integrand2) d2Z_integrand1 = (-(f2 + cav_cov[0, 0])**-1 + ((y - cav_mean[0, 0]) / obs_var)**2) * normpdf d2lZ1 = -dlZ1**2 + Zinv * np.sum(w * d2Z_integrand1) d2Z_integrand2 = (-cav_cov[1, 1]**-1 + ( (sigma_points - cav_mean[1, 0]) / cav_cov[1, 1])**2) * normpdf d2lZ2 = -dlZ2**2 + Zinv * np.sum(w * d2Z_integrand2) dlZ = np.block([[dlZ1], [dlZ2]]) d2lZ = np.block([[d2lZ1, 0], [0., d2lZ2]]) id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov
def statistical_linear_regression(self, cav_mean, cav_cov, hyp=None, cubature_func=None): """ This gives the same result as above - delete """ num_components = int(cav_mean.shape[0] / 2) if cubature_func is None: x, w = gauss_hermite(num_components, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(num_components) subband_mean, modulator_mean = cav_mean[:num_components], self.link_fn( cav_mean[num_components:]) subband_cov, modulator_cov = cav_cov[:num_components, : num_components], cav_cov[ num_components:, num_components:] sigma_points = cholesky(modulator_cov) @ x + modulator_mean lik_expectation, lik_covariance = ( self.link_fn(sigma_points).T @ subband_mean).T, hyp # Compute zₙ via cubature: # muₙ = ∫ E[yₙ|fₙ] 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] mu = np.sum(w * lik_expectation, axis=-1)[:, None] # Compute variance S via cubature: # S = ∫ [(E[yₙ|fₙ]-zₙ) (E[yₙ|fₙ]-zₙ)' + Cov[yₙ|fₙ]] 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(E[yₙ|fsigᵢ]-zₙ) (E[yₙ|fsigᵢ]-zₙ)' + Cov[yₙ|fₙ]] S = np.sum(w * ((lik_expectation - mu) * (lik_expectation - mu) + lik_covariance), axis=-1)[:, None] # Compute cross covariance C via cubature: # C = ∫ (fₙ-mₙ) (E[yₙ|fₙ]-zₙ)' 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fsigᵢ -mₙ) (E[yₙ|fsigᵢ]-zₙ)' C = np.sum(w * np.block([[ self.link_fn(sigma_points) * np.diag(subband_cov)[..., None] ], [sigma_points - modulator_mean]]) * (lik_expectation - mu), axis=-1)[:, None] # Compute derivative of mu via cubature: omega = np.sum( w * np.block([[self.link_fn(sigma_points)], [ np.diag(modulator_cov)[..., None]**-1 * (sigma_points - modulator_mean) * lik_expectation ]]), axis=-1)[None, :] return mu, S, C, omega
def lqr_continuous_time_infinite_horizon(A, B, Q, R, N): # Take the last dimension, in case we try to do some kind of broadcasting # thing in the future. x_dim = A.shape[-1] # pylint: disable=line-too-long # See https://en.wikipedia.org/wiki/Linear%E2%80%93quadratic_regulator#Infinite-horizon,_continuous-time_LQR. A1 = A - B @ jp.linalg.solve(R, N.T) Q1 = Q - N @ jp.linalg.solve(R, N.T) # See https://en.wikipedia.org/wiki/Algebraic_Riccati_equation#Solution. H = jp.block([[A1, -B @ jp.linalg.solve(R, B.T)], [-Q1, -A1]]) eigvals, eigvectors = jp.linalg.eig(H) # For large-ish systems (eg x_dim = 7), sometimes we find some values that # have an imaginary component. That's an unfortunate consequence of the # numerical instability in the eigendecomposition. Still, # assert (eigvals.imag == jp.zeros_like(eigvals, dtype=jp.float32)).all() # assert (eigvectors.imag == jp.zeros_like(eigvectors, dtype=jp.float32)).all() # Now it should be safe to take out only the real components. eigvals = eigvals.real eigvectors = eigvectors.real argsort = jp.argsort(eigvals) ix = argsort[:x_dim] U = eigvectors[:, ix] P = U[x_dim:, :] @ jp.linalg.inv(U[:x_dim, :]) K = jp.linalg.solve(R, (B.T @ P + N.T)) return K, P, eigvals[ix]
def materialize_matrix(symmetric_matrix): """Returns a materialized symmetric matrix. Args: symmetric_matrix: the matrix represented by lower-triangular block slices. """ block_rows = symmetric_matrix.block_rows block_size = block_rows[0].shape[-2] num_blocks = len(block_rows) # Slice the lower-triangular and diagonal blocks into blocks. blocks = [[ block_row[Ellipsis, i * block_size:(i + 1) * block_size] for i in range(k + 1) ] for k, block_row in enumerate(block_rows)] # Generate the (off-diagonal) upper-triangular blocks. off_diags = [[] for _ in range(num_blocks - 1)] for k, block_row in enumerate(block_rows[1:]): for i in range(k + 1): off_diags[i].append( jnp.swapaxes(a=block_row[Ellipsis, i * block_size:(i + 1) * block_size], axis1=-1, axis2=-2)) return jnp.block( [row + row_t for row, row_t in zip(blocks[:-1], off_diags)] + [blocks[-1]])
def endptChart(self, p): return self.chartFInv( self.endpt( jnp.block([ self.chartS(self.XStart), jnp.matmul(self.DchartS(self.XStart), p) ]))[:self.n])
def measurement_model(self): H = self.kernel0.measurement_model() for i in range(1, self.num_kernels): kerneli = eval("self.kernel" + str(i)) H_ = kerneli.measurement_model() H = np.block([H, H_]) return H
def test_nFS(): from pOP import nFS as pnFS dim = 2 nC = -1 * np.ones((dim, 1), dtype=np.int32) d = np.zeros(dim, dtype=np.int32) c = np.ones(dim) d2 = np.array([2, 3], dtype=np.int32) nC2Py = np.array([4, 7], dtype=np.int32) nC2 = np.block([[np.arange(4), -1. * np.ones(3)], [np.arange(7)]]).astype(np.int32) n = np.array([10] * dim) N = np.prod(n) z = np.linspace(0, 2. * np.pi, num=n[0]) x = onp.zeros((N, dim)) for k in range(dim): nProd = np.prod(n[k + 1:]) nStack = np.prod(n[0:k]) dark = np.hstack([z] * nProd) x[:, k] = onp.array([dark] * nStack).flatten() c = (2. * np.pi) / (x[-1, :] - x[0, :]) z = (x - x[0, :]) * c - np.pi nfs1 = nFS(x[0, :], x[-1, :], nC, 5) nfs2 = nFS(x[0, :], x[-1, :], nC2, 10) Fc1 = nfs1.H(x.T, d, False) Fc2 = nfs2.H(x.T, d2, False) Fp1 = pnFS(z, 4, d, nC.flatten() * 0.) Fp2 = pnFS(z, 9, d2, nC2Py) assert (np.linalg.norm(Fc1 - Fp1, ord='fro') < 1e-14) assert (np.linalg.norm(Fc2 - Fp2, ord='fro') < 5e-13)
def analytical_linearisation(self, m, sigma=None, hyp=None): """ Compute the Jacobian of the state space observation model w.r.t. the function fₙ and the noise term σₙ. """ return np.block([[np.array(1.0), self.dlink_fn(m[1]) * sigma] ]), self.link_fn(np.array([m[1]]))
def gamma_f(p: jnp.ndarray) -> jnp.ndarray: gamma_f_arr = jnp.block([[zeros5], [ jnp.identity(nn), self.objective_object.final_weight(p), zeros6 ], [zeros51]]) return gamma_f_arr
def predict_from_state_infinite_horizon_(x_test, ind, x, post_mean, kernel): """ predict the state distribution at time t by projecting from the neighbouring inducing states """ P, T = compute_conditional_statistics(x_test, x, kernel, ind) # joint posterior (i.e. smoothed) mean and covariance of the states [u_, u+] at time t: mean_joint = np.block([[post_mean[ind]], [post_mean[ind + 1]]]) return P @ mean_joint
def analytical_linearisation(self, m, sigma=None): """ Compute the Jacobian of the state space observation model w.r.t. the function fₙ and the noise term σₙ. """ num_components = int(m.shape[0] / 2) Jf = np.block([[self.link_fn(m[num_components:])], [m[:num_components] * self.dlink_fn(m[num_components:])]]).T Jsigma = np.array([[self.variance ** 0.5]]) return Jf, Jsigma
def mat(self, s, n): p = n // 2 A = super().mat(s, p) B = super().mat(s, n - p) self._p = p return jnp.block([ [A, jnp.zeros((p, n - p))], [jnp.zeros((n - p, p)), B], ])
def integral_weights(self, theta: np.ndarray, t: float) -> np.ndarray: self._current_q = self._q(theta, t) self._current_r = self._r(theta, t) self._current_s = self._s(theta, t) q = self._current_q r = self._current_r s = self._current_s weights = jnp.block([[q, s], [s.T.conj(), r]]) return weights
def analytical_linearisation(self, m, sigma=None, hyp=None): """ """ obs_noise_var = hyp if hyp is not None else self.hyp num_components = int(m.shape[0] / 2) subbands, modulators = m[:num_components], self.link_fn( m[num_components:]) Jf = np.block([[modulators], [subbands * self.dlink_fn(m[num_components:])]]) Jsigma = np.array([[np.sqrt(obs_noise_var)]]) return np.atleast_2d(Jf).T, np.atleast_2d(Jsigma).T
def kernel_to_state_space(self, R=None): F, L, Qc, H, Pinf = self.kernel0.kernel_to_state_space(R) for i in range(1, self.num_kernels): kerneli = eval("self.kernel" + str(i)) F_, L_, Qc_, H_, Pinf_ = kerneli.kernel_to_state_space(R) F = block_diag(F, F_) L = block_diag(L, L_) Qc = block_diag(Qc, Qc_) H = np.block([H, H_]) Pinf = block_diag(Pinf, Pinf_) return F, L, Qc, H, Pinf
def __init__(self, d): super().__init__() self.d = d self.mean = np.zeros(d) self.xcov = np.eye(d - 1) * np.exp(9 / 2) self.ycov = 9 if d == 2: self.cov = np.block([[self.xcov, np.zeros((d - 1, 1))], [np.zeros((1, d - 1)), self.ycov]]) else: self.cov = None
def _rotation_q(pos: jnp.ndarray, indices: jnp.ndarray, refpos: jnp.ndarray) -> float: dx = pos[indices[:-1]] - pos[indices[:-1]].mean(0) R = dx.T @ refpos Rtr = jnp.trace(R) Ftop = jnp.array([R[1, 2] - R[2, 1], R[2, 0] - R[0, 2], R[0, 1] - R[1, 0]]) F = jnp.block([ [Rtr, Ftop[None, :]], [Ftop[:, None], -Rtr * jnp.eye(3) + R + R.T], ]) q = eigh_rightmost(F) return q * jnp.sign(q[0])
def Scheme(self, z): qn = z[:self.n] q = z[self.n:] # compute Lagrange multipliers den = self.dt**2 * jnp.dot(self.b**3, q**2) dff = 2 * q - qn m1 = jnp.dot(self.b**2 * q, dff) / den m2 = 1 / self.dt**2 * (jnp.dot(self.b * dff, dff) - 1) / den lam = -m1 + jnp.sqrt(m1**2 - m2) return jnp.block([q, 2 * q - qn + self.dt**2 * self.b * q * lam])
def cuspCond(f1, Xa, ds): # shorthands x = Xa[:3] a = Xa[3:] f2 = lambda x: jvp(f1, (x, ), (a, ))[1] # 1st derivative in direction a c1 = f2(x) c2 = (sum(a**2) - 1) / ds f3 = lambda x: jvp(f2, (x, ), (a, ))[1] # 2nd derivative in direction a c3 = jnp.matmul(f3(x), a) return jnp.block([c1, c2, c3])
def rp_to_se3(R: jnp.ndarray, p: jnp.ndarray) -> jnp.ndarray: """Rotation and translation to homogeneous transform. Args: R: (3, 3) An orthonormal rotation matrix. p: (3,) A 3-vector representing an offset. Returns: X: (4, 4) The homogeneous transformation matrix described by rotating by R and translating by p. """ p = jnp.reshape(p, (3, 1)) return jnp.block([[R, p], [jnp.array([[0.0, 0.0, 0.0, 1.0]])]])
def projection_affine(n_dim, u, n, u_0): """ Args: n_dim: affine transformation space u: random point to be projected on n as L n: secant normal vector u_0: secant starting point Returns: """ n_norm = l2_norm(n) I = jnp.eye(n_dim) p2 = jnp.dot(I, n)[:, None] / n_norm**2 * n u_0 = lax.reshape(u_0, (n_dim, 1)) I = jnp.eye(n_dim) t1 = jnp.block([[I, u_0], [jnp.zeros(shape=(1, n_dim)), 1.0]]) t2 = jnp.block([[p2, jnp.zeros(shape=(n_dim, 1))], [jnp.zeros(shape=(1, n_dim)), 1.0]]) t3 = jnp.block([[I, -1 * u_0], [jnp.zeros(shape=(1, n_dim)), 1.0]]) P = jnp.matmul(jnp.matmul(t1, t2), t3) pr = jnp.matmul(P, jnp.hstack([u, 1.0])) pr = lax.slice(pr, [0], [n_dim]) return pr
def kalman_filter_pairs(dt, kernel, y, noise_cov, use_sequential=True): """ A Kalman filter over pairs of states, in which y is [2state_dim, 1] and noise_cov is [2state_dim, 2state_dim] :param dt: step sizes [N, 1] :param kernel: an instantiation of the kernel class, used to determine the state space model :param y: observations [N, 2state_dim, 1] :param noise_cov: observation noise covariances [N, 2state_dim, 2state_dim] :param use_sequential: flag to switch between parallel and sequential implementation of Kalman filter :return: ell: the log-marginal likelihood log p(y), for hyperparameter optimisation (learning) [scalar] means: marginal state filtering means [N, state_dim, 1] covs: marginal state filtering covariances [N, state_dim, state_dim] """ Pinf = kernel.stationary_covariance() state_dim = Pinf.shape[0] minf = np.zeros([state_dim, 1]) zeros = np.zeros([state_dim, state_dim]) Pinfpair = np.block([[Pinf, zeros], [zeros, Pinf]]) minfpair = np.block([[minf], [minf]]) As = vmap(kernel.state_transition)(dt) Qs = vmap(process_noise_covariance, [0, None])(As, Pinf) def construct_pair(A, Q): Apair = np.block([[zeros, np.eye(state_dim)], [zeros, A]]) Qpair = np.block([[zeros, zeros], [zeros, Q]]) return Apair, Qpair Apairs, Qpairs = vmap(construct_pair)(As, Qs) H = np.eye(2 * state_dim) masks = np.zeros_like(y, dtype=bool) if use_sequential: ell, means, covs = _sequential_kf(Apairs, Qpairs, H, y, noise_cov, minfpair, Pinfpair, masks) else: raise NotImplementedError("Parallel KF not implemented yet") return ell, (means[1:, :state_dim], covs[1:, :state_dim, :state_dim])
def symmetric_cubature_third_order(dim=1, kappa=None): """ Return weights and sigma-points for the symmetric cubature rule of order 5, for dimension dim with parameter kappa (default 0). """ if kappa is None: # kappa = 1 - dim kappa = 0 # CKF if (dim == 1) and (kappa == 0): weights = np.array([0., 0.5, 0.5]) sigma_pts = np.array([0., 1., -1.]) # sigma_pts = np.array([-1., 0., 1.]) # weights = np.array([0.5, 0., 0.5]) # u = 1 elif (dim == 2) and (kappa == 0): weights = np.array([0., 0.25, 0.25, 0.25, 0.25]) sigma_pts = np.block([[0., 1.4142, 0., -1.4142, 0.], [0., 0., 1.4142, 0., -1.4142]]) # u = 1.4142 elif (dim == 3) and (kappa == 0): weights = np.array( [0., 0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667]) sigma_pts = np.block([[0., 1.7321, 0., 0., -1.7321, 0., 0.], [0., 0., 1.7321, 0., 0., -1.7321, 0.], [0., 0., 0., 1.7321, 0., 0., -1.7321]]) # u = 1.7321 else: # weights weights = np.zeros([1, 2 * dim + 1]) weights = index_add(weights, index[0, 0], kappa / (dim + kappa)) for j in range(1, 2 * dim + 1): wm = 1 / (2 * (dim + kappa)) weights = index_add(weights, index[0, j], wm) # Sigma points sigma_pts = np.block([np.zeros([dim, 1]), np.eye(dim), -np.eye(dim)]) sigma_pts = np.sqrt(dim + kappa) * sigma_pts # u = np.sqrt(n + kappa) return sigma_pts, weights # , u
def create_spatiotemporal_grid(X, Y): """ create a grid of data sized [T, R1, R2] note that this function removes full duplicates (i.e. where all dimensions match) TODO: generalise to >5D """ if Y.ndim < 2: Y = Y[:, None] num_spatial_dims = X.shape[1] - 1 if num_spatial_dims == 4: sort_ind = nnp.lexsort( (X[:, 4], X[:, 3], X[:, 2], X[:, 1], X[:, 0])) # sort by 0, 1, 2, 4 elif num_spatial_dims == 3: sort_ind = nnp.lexsort( (X[:, 3], X[:, 2], X[:, 1], X[:, 0])) # sort by 0, 1, 2, 3 elif num_spatial_dims == 2: sort_ind = nnp.lexsort((X[:, 2], X[:, 1], X[:, 0])) # sort by 0, 1, 2 elif num_spatial_dims == 1: sort_ind = nnp.lexsort((X[:, 1], X[:, 0])) # sort by 0, 1 else: raise NotImplementedError X = X[sort_ind] Y = Y[sort_ind] unique_time = np.unique(X[:, 0]) unique_space = nnp.unique(X[:, 1:], axis=0) N_t = unique_time.shape[0] N_r = unique_space.shape[0] if num_spatial_dims == 4: R = np.tile(unique_space, [N_t, 1, 1, 1, 1]) elif num_spatial_dims == 3: R = np.tile(unique_space, [N_t, 1, 1, 1]) elif num_spatial_dims == 2: R = np.tile(unique_space, [N_t, 1, 1]) elif num_spatial_dims == 1: R = np.tile(unique_space, [N_t, 1]) else: raise NotImplementedError R_flat = R.reshape(-1, num_spatial_dims) Y_dummy = np.nan * np.zeros([N_t * N_r, 1]) time_duplicate = np.tile(unique_time, [N_r, 1]).T.flatten() X_dummy = np.block([time_duplicate[:, None], R_flat]) X_all = np.vstack([X, X_dummy]) Y_all = np.vstack([Y, Y_dummy]) X_unique, ind = nnp.unique(X_all, axis=0, return_index=True) Y_unique = Y_all[ind] grid_shape = (unique_time.shape[0], ) + unique_space.shape R_grid = X_unique[:, 1:].reshape(grid_shape) Y_grid = Y_unique.reshape(grid_shape[:-1] + (1, )) return unique_time[:, None], R_grid, Y_grid
def variational_expectation(self, y, m, v, hyp=None, cubature_func=None): """ """ if cubature_func is None: x, w = gauss_hermite(1, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(1) m0, m1, v0, v1 = m[0, 0], m[1, 0], v[0, 0], v[1, 1] sigma_points = np.sqrt( v1 ) * x + m1 # fsigᵢ=xᵢ√(2vₙ) + mₙ: scale locations according to cavity dist. # pre-compute wᵢ log p(yₙ|xᵢ√(2vₙ) + mₙ) var = self.link_fn(sigma_points)**2 log_lik = np.log(var) + var**-1 * ((y - m0)**2 + v0) weighted_log_likelihood_eval = w * log_lik # Compute expected log likelihood via cubature: # E[log p(yₙ|fₙ)] = ∫ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ p(yₙ|fsigᵢ) exp_log_lik = -0.5 * np.log( 2 * pi) - 0.5 * np.sum(weighted_log_likelihood_eval) # Compute first derivative via cubature: dE_dm1 = np.sum((var**-1 * (y - m0 + v0)) * w) # dE[log p(yₙ|fₙ)]/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fsigᵢ) dE_dm2 = -0.5 * np.sum(weighted_log_likelihood_eval * v1**-1 * (sigma_points - m1)) # Compute derivative w.r.t. variance: dE_dv1 = -0.5 * np.sum(var**-1 * w) # dE[log p(yₙ|fₙ)]/dvₙ = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fsigᵢ) dE_dv2 = -0.25 * np.sum( (v1**-2 * (sigma_points - m1)**2 - v1**-1) * weighted_log_likelihood_eval) dE_dm = np.block([[dE_dm1], [dE_dm2]]) dE_dv = np.block([[dE_dv1, 0], [0., dE_dv2]]) return exp_log_lik, dE_dm, dE_dv