def getInitialInformationMatrix(data): I = np.zeros((4, 4)) i = np.zeros((4, 1)) for _ in range(10): H, y, R = data.getNextBatch() I = I + H.T @ H * inv(R) i = i + H.T * inv(R) * y return I, i
def compute_cavity(post_mean, post_cov, site_mean, site_cov, power): """ remove local likelihood approximation from the posterior to obtain the marginal cavity distribution """ post_precision, site_precision = inv(post_cov), inv(site_cov) cav_cov = inv(post_precision - power * site_precision) # cavity covariance cav_mean = cav_cov @ (post_precision @ post_mean - power * site_precision @ site_mean) # cavity mean return cav_mean, cav_cov
def update(self, likelihood, y, post_mean, post_cov, hyp=None, site_params=None): """ The update function takes a likelihood as input, and uses analytical linearisation (first order Taylor series expansion) to update the site parameters """ power = 1. if site_params is None else self.power if (site_params is None) or (power == 0): # avoid cavity calc if power is 0 cav_mean, cav_cov = post_mean, post_cov else: site_mean, site_cov = site_params # --- Compute the cavity distribution --- cav_mean, cav_cov = compute_cavity(post_mean, post_cov, site_mean, site_cov, power) # calculate the Jacobian of the observation model w.r.t. function fₙ and noise term rₙ Jf, Jsigma = likelihood.analytical_linearisation( cav_mean, np.zeros_like(y), hyp) # evaluate at mean obs_cov = np.eye(y.shape[0]) # observation noise scale is w.l.o.g. 1 likelihood_expectation, _ = likelihood.conditional_moments( cav_mean, hyp) residual = y - likelihood_expectation # residual, yₙ-E[yₙ|fₙ] sigma = Jsigma @ obs_cov @ Jsigma.T + power * Jf @ cav_cov @ Jf.T site_nat2 = Jf.T @ inv(Jsigma @ obs_cov @ Jsigma.T) @ Jf site_cov = inv(site_nat2 + 1e-10 * np.eye(Jf.shape[1])) site_mean = cav_mean + (site_cov + power * cav_cov) @ Jf.T @ inv(sigma) @ residual # now compute the marginal likelihood approx. sigma_marg_lik = Jsigma @ obs_cov @ Jsigma.T + Jf @ cav_cov @ Jf.T chol_sigma, low = cho_factor(sigma_marg_lik) log_marg_lik = -1 * (.5 * site_cov.shape[0] * np.log(2 * pi) + np.sum(np.log(np.diag(chol_sigma))) + .5 * (residual.T @ cho_solve( (chol_sigma, low), residual))) if (site_params is not None) and (self.damping != 1.): site_mean_prev, site_cov_prev = site_params # previous site params site_nat2_prev = inv(site_cov_prev + 1e-10 * np.eye(Jf.shape[1])) site_nat1, site_nat1_prev = site_nat2 @ site_mean, site_nat2_prev @ site_mean_prev site_cov = inv((1. - self.damping) * site_nat2_prev + self.damping * site_nat2 + 1e-10 * np.eye(Jf.shape[1])) site_mean = site_cov @ ((1. - self.damping) * site_nat1_prev + self.damping * site_nat1) return log_marg_lik, site_mean, site_cov
def moment_match_unstable(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ TODO: Attempt to compute full site covariance, including cross terms. However, this makes things unstable. """ if cubature_func is None: x, w = gauss_hermite(1, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(1) lZ = self.log_expected_likelihood(y, x, w, np.squeeze(cav_mean), np.squeeze(np.diag(cav_cov)), power) dlZ = self.dlZ_dm(y, x, w, np.squeeze(cav_mean), np.squeeze(np.diag(cav_cov)), power)[:, None] d2lZ = jacrev(self.dlZ_dm, argnums=3)(y, x, w, np.squeeze(cav_mean), np.squeeze(np.diag(cav_cov)), power) # d2lZ = np.diag(np.diag(d2lZ)) # discard cross terms id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov
def cay_test_3(FU,U): m,k = U.shape D = np.concatenate([U,FU],axis = 1) C = np.concatenate([FU,-U],axis = 1) K4 = inv(I(k) - 0.25*FU.T@FU) K3 = 2*(K4 - I(k)) K2 = inv(-2*I(k) - 0.5*FU.T@FU) K1 = -2*K2 K12 = np.concatenate([K1,K2],axis=1) K34 = np.concatenate([K3,K4],axis=1) K = np.concatenate([K12,K34],axis=0) return I(m)+C@[email protected]
def cay_test_1(FU,U): m,k = U.shape D = np.concatenate([U,FU],axis = 1) C = np.concatenate([FU,-U],axis = 1) DTC = D.T@C return I(m) + C@inv(I(2*k) - 0.5*DTC)@D.T
def add_satellite(data, I, i, fusion): H, y, R = data.getNextBatch() #compute state dimensional from information C = np.linalg.inv(I) mu = C @ i #normalize H first u = H #project mat/vec in direction of u mu_a = u @ mu C_a = u @ C @ u.T #set-up b distribution mu_b = np.array(y).reshape(1, 1) C_b = np.array(R).reshape(1, 1) x_f, C_f = fusion.fuse(mu_a, mu_b, C_a, C_b) #Compute additional information D = inv(inv(C_f) - inv(C_a)) x_d = D @ (inv(C_f) @ x_f - inv(C_a) @ mu_a) fused_I = I + inv(D) * u.T @ u fused_i = i + u.T * inv(D) * x_d return fused_I, fused_i
def cay_test_2(FU,U): m,k = U.shape Uperp,R22 = np.linalg.qr(FU) R = np.zeros((2*k,2*k)) R[0:k,k:] = -R22.T R[k:,0:k] = R22 W = np.concatenate([U,Uperp],axis = 1) return I(m) + W@R@inv(I(2*k) - 0.5*R)@W.T
def moment_match(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ """ num_components = int(cav_mean.shape[0] / 2) if cubature_func is None: x, w = gauss_hermite(num_components, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(num_components) subband_mean, modulator_mean = cav_mean[:num_components], self.link_fn( cav_mean[num_components:]) subband_cov, modulator_cov = cav_cov[:num_components, : num_components], cav_cov[ num_components:, num_components:] sigma_points = cholesky(modulator_cov) @ x + modulator_mean const = power**-0.5 * (2 * pi * hyp)**(0.5 - 0.5 * power) mu = (self.link_fn(sigma_points).T @ subband_mean)[:, 0] var = hyp / power + (self.link_fn(sigma_points).T**2 @ np.diag(subband_cov)[..., None])[:, 0] normpdf = const * (2 * pi * var)**-0.5 * np.exp(-0.5 * (y - mu)**2 / var) Z = np.sum(w * normpdf) Zinv = 1. / (Z + 1e-8) lZ = np.log(Z + 1e-8) dZ1 = np.sum(w * self.link_fn(sigma_points) * (y - mu) / var * normpdf, axis=-1) dZ2 = np.sum(w * (sigma_points - modulator_mean) * np.diag(modulator_cov)[..., None]**-1 * normpdf, axis=-1) dlZ = Zinv * np.block([dZ1, dZ2]) d2Z1 = np.sum(w * self.link_fn(sigma_points)**2 * (((y - mu) / var)**2 - var**-1) * normpdf, axis=-1) d2Z2 = np.sum(w * (((sigma_points - modulator_mean) * np.diag(modulator_cov)[..., None]**-1)**2 - np.diag(modulator_cov)[..., None]**-1) * normpdf, axis=-1) d2lZ = np.diag(-dlZ**2 + Zinv * np.block([d2Z1, d2Z2])) id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ[ ..., None] # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov
def get_Ydot(U,S,V,Adot): """ Approximation of the derivative (at a given time defined outside this function) of A(t), \dot Y INPUT: U,S,V: the dynamic rank k decomposition of A(t) Adot: the derivate of A OUTPUT: Ydot: the approximation of \dot A at a given time """ m = U.shape[0] n = V.shape[0] Sdot = U.T@Adot@V Udot = (I(m) - [email protected])@Adot@V@inv(S) Vdot = (I(n) - [email protected])@Adot.T@U@inv(S).T Ydot = Udot@[email protected] + U@[email protected] + U@[email protected] return Ydot
def update(self, likelihood, y, post_mean, post_cov, hyp=None, site_params=None): """ The update function takes a likelihood as input, and uses moment matching to update the site parameters """ if site_params is None: # if no site is provided, use the predictions/posterior as the cavity with ep_fraction=1 # calculate log marginal likelihood and the new sites via moment matching: lml, site_mean, site_cov = likelihood.moment_match( y, post_mean, post_cov, hyp, 1.0, self.cubature_func) site_mean, site_cov = np.atleast_2d(site_mean), np.atleast_2d( site_cov) site_cov = ensure_positive_variance(site_cov) return lml, site_mean, site_cov else: site_mean_prev, site_cov_prev = site_params # previous site params # --- Compute the cavity distribution --- cav_mean, cav_cov = compute_cavity(post_mean, post_cov, site_mean_prev, site_cov_prev, self.power) # check that the cavity variances are positive cav_cov = ensure_positive_variance(cav_cov) # calculate log marginal likelihood and the new sites via moment matching: lml, site_mean, site_cov = likelihood.moment_match( y, cav_mean, cav_cov, hyp, self.power, self.cubature_func) site_mean, site_cov = np.atleast_2d(site_mean), np.atleast_2d( site_cov) site_cov = ensure_positive_variance(site_cov) if self.damping != 1.: site_nat2, site_nat2_prev = inv(site_cov), inv(site_cov_prev) site_nat1, site_nat1_prev = site_nat2 @ site_mean, site_nat2_prev @ site_mean_prev site_cov = inv((1. - self.damping) * site_nat2_prev + self.damping * site_nat2) site_mean = site_cov @ ((1. - self.damping) * site_nat1_prev + self.damping * site_nat1) return lml, site_mean, site_cov
def moment_match(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ """ if cubature_func is None: x, w = gauss_hermite(1, 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(1) # sigma_points = np.sqrt(2) * np.sqrt(v) * x + m # scale locations according to cavity dist. sigma_points = np.sqrt(cav_cov[1, 1]) * x + cav_mean[ 1] # fsigᵢ=xᵢ√cₙ + mₙ: scale locations according to cavity f2 = self.link_fn(sigma_points)**2. / power obs_var = f2 + cav_cov[0, 0] const = power**-0.5 * (2 * pi * self.link_fn(sigma_points)**2.)**( 0.5 - 0.5 * power) normpdf = const * (2 * pi * obs_var)**-0.5 * np.exp( -0.5 * (y - cav_mean[0, 0])**2 / obs_var) Z = np.sum(w * normpdf) Zinv = 1. / np.maximum(Z, 1e-8) lZ = np.log(np.maximum(Z, 1e-8)) dZ_integrand1 = (y - cav_mean[0, 0]) / obs_var * normpdf dlZ1 = Zinv * np.sum(w * dZ_integrand1) dZ_integrand2 = (sigma_points - cav_mean[1, 0]) / cav_cov[1, 1] * normpdf dlZ2 = Zinv * np.sum(w * dZ_integrand2) d2Z_integrand1 = (-(f2 + cav_cov[0, 0])**-1 + ((y - cav_mean[0, 0]) / obs_var)**2) * normpdf d2lZ1 = -dlZ1**2 + Zinv * np.sum(w * d2Z_integrand1) d2Z_integrand2 = (-cav_cov[1, 1]**-1 + ( (sigma_points - cav_mean[1, 0]) / cav_cov[1, 1])**2) * normpdf d2lZ2 = -dlZ2**2 + Zinv * np.sum(w * d2Z_integrand2) dlZ = np.block([[dlZ1], [dlZ2]]) d2lZ = np.block([[d2lZ1, 0], [0., d2lZ2]]) id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov
def update(self, likelihood, y, post_mean, post_cov, hyp=None, site_params=None): """ The update function takes a likelihood as input, and uses CVI to update the site parameters """ if site_params is None: _, dE_dm, dE_dv = likelihood.variational_expectation( y, post_mean, post_cov, hyp, self.cubature_func) dE_dm, dE_dv = np.atleast_2d(dE_dm), np.atleast_2d(dE_dv) site_cov = 0.5 * inv( ensure_positive_precision(-dE_dv) + 1e-10 * np.eye(dE_dv.shape[0])) site_mean = post_mean + site_cov @ dE_dm else: site_mean, site_cov = site_params _, dE_dm, dE_dv = likelihood.variational_expectation( y, post_mean, post_cov, hyp, self.cubature_func) dE_dm, dE_dv = np.atleast_2d(dE_dm), np.atleast_2d(dE_dv) dE_dv = -ensure_positive_precision(-dE_dv) lambda_t_2 = inv(site_cov + 1e-10 * np.eye(site_cov.shape[0])) lambda_t_1 = lambda_t_2 @ site_mean lambda_t_1 = (1 - self.damping) * lambda_t_1 + self.damping * ( dE_dm - 2 * dE_dv @ post_mean) lambda_t_2 = (1 - self.damping) * lambda_t_2 + self.damping * ( -2 * dE_dv) site_cov = inv(lambda_t_2 + 1e-10 * np.eye(site_cov.shape[0])) site_mean = site_cov @ lambda_t_1 log_marg_lik, _, _ = likelihood.moment_match(y, post_mean, post_cov, hyp, 1.0, self.cubature_func) return log_marg_lik, site_mean, site_cov
def update(self, likelihood, y, post_mean, post_cov, hyp=None, site_params=None): """ The update function takes a likelihood as input, and uses statistical linear regression (SLR) w.r.t. the cavity distribution to update the site parameters. """ power = 1. if site_params is None else self.power log_marg_lik, _, _ = likelihood.moment_match(y, post_mean, post_cov, hyp, 1.0, self.cubature_func) if (site_params is None) or (power == 0): cav_mean, cav_cov = post_mean, post_cov else: site_mean_prev, site_cov_prev = site_params # previous site params # --- Compute the cavity distribution --- cav_mean, cav_cov = compute_cavity(post_mean, post_cov, site_mean_prev, site_cov_prev, power) # SLR gives a likelihood approximation p(yₙ|fₙ) ≈ 𝓝(yₙ|Afₙ+b,Ω+Var[yₙ|fₙ]) mu, S, C, omega = likelihood.statistical_linear_regression( cav_mean, cav_cov, hyp, self.cubature_func) # convert to a Gaussian site (a function of fₙ): residual = y - mu sigma = S + (power - 1) * C.T @ inv(cav_cov + 1e-10 * np.eye(cav_cov.shape[0])) @ C om_sig_om = omega.T @ inv(sigma) @ omega om_sig_om = np.diag(np.diag(om_sig_om)) # discard cross terms osigo = inv(om_sig_om + 1e-10 * np.eye(omega.shape[1])) site_mean = cav_mean + osigo @ omega.T @ inv( sigma) @ residual # approx. likelihood (site) mean site_cov = -power * cav_cov + osigo # approx. likelihood var. if (site_params is not None) and (self.damping != 1.): site_mean_prev, site_cov_prev = site_params # previous site params jitter = 1e-10 * np.eye(site_cov.shape[0]) site_nat2, site_nat2_prev = inv(site_cov + jitter), inv(site_cov_prev + jitter) site_nat1, site_nat1_prev = site_nat2 @ site_mean, site_nat2_prev @ site_mean_prev site_cov = inv((1. - self.damping) * site_nat2_prev + self.damping * site_nat2 + jitter) site_mean = site_cov @ ((1. - self.damping) * site_nat1_prev + self.damping * site_nat1) return log_marg_lik, site_mean, site_cov
def statistical_linear_regression_cubature(self, cav_mean, cav_cov, hyp=None, cubature_func=None): """ Perform statistical linear regression (SLR) using cubature. We aim to find a likelihood approximation p(yₙ|fₙ) ≈ 𝓝(yₙ|Afₙ+b,Ω+Var[yₙ|fₙ]). TODO: this currently assumes an additive noise model (ok for our current applications), make more general """ if cubature_func is None: x, w = gauss_hermite(cav_mean.shape[0], 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(cav_mean.shape[0]) # fsigᵢ=xᵢ√(vₙ) + mₙ: scale locations according to cavity dist. sigma_points = cholesky(cav_cov) @ np.atleast_2d(x) + cav_mean lik_expectation, lik_covariance = self.conditional_moments( sigma_points, hyp) # Compute zₙ via cubature: # zₙ = ∫ E[yₙ|fₙ] 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] mu = np.sum(w * lik_expectation, axis=-1)[:, None] # Compute variance S via cubature: # S = ∫ [(E[yₙ|fₙ]-zₙ) (E[yₙ|fₙ]-zₙ)' + Cov[yₙ|fₙ]] 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(E[yₙ|fsigᵢ]-zₙ) (E[yₙ|fsigᵢ]-zₙ)' + Cov[yₙ|fₙ]] # TODO: allow for multi-dim cubature S = np.sum(w * ((lik_expectation - mu) * (lik_expectation - mu) + lik_covariance), axis=-1)[:, None] # Compute cross covariance C via cubature: # C = ∫ (fₙ-mₙ) (E[yₙ|fₙ]-zₙ)' 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fsigᵢ -mₙ) (E[yₙ|fsigᵢ]-zₙ)' C = np.sum(w * (sigma_points - cav_mean) * (lik_expectation - mu), axis=-1)[:, None] # Compute derivative of z via cubature: # omega = ∫ E[yₙ|fₙ] vₙ⁻¹ (fₙ-mₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] vₙ⁻¹ (fsigᵢ-mₙ) omega = np.sum(w * lik_expectation * (inv(cav_cov) @ (sigma_points - cav_mean)), axis=-1)[None, :] return mu, S, C, omega
def rk2(U0,S0,V0,Adot,h,t,select_cay = "naive"): """ Performs one step of the second order Runge Kutta scheme for the (rank k) approximation a matrix A(t), by using an initial SVD decomposition and the derivative \dot A(t). Also performs a first order step yielding an approximation of the local error. INPUT: U0,S0,V0: is the SVD of A(t = 0) Adot: is the derivative of A(t) h: is the steplength in time t: is the current time. We approximate A(t + h) select_cay: string for selecting Cayley transformation or testing accuracy OUTPUT: S1est,U1est,V1est: the approximated rank k decomposition of A(t) by first order Runge Kutta S1,U1,V1: the approximated rank k decomposition of A(t) by second order Runge Kutta """ #Placeholder function that will be assigned a proper cayley transformation later cay = lambda d:0 m = U0.shape[0] n = V0.shape[0] FU = (I(m) - [email protected])@Adot(t)@V0@inv(S0) FV = (I(n) - [email protected])@Adot(t).T@U0@inv(S0).T #Selects Cayley transformations (for testing) or tests accuracy if select_cay == "naive": cay = cay_naive elif select_cay == "test_1": cay = cay_test_1 elif select_cay == "test_2": cay = cay_test_2 elif select_cay == "test_3": cay = cay_test_3 elif select_cay == "test_accuracy": n_t1 = 0.5*np.linalg.norm(cay_naive(FU,U0) - cay_test_1(FU,U0),ord='fro') n_t1 += 0.5*np.linalg.norm(cay_naive(FV,V0) - cay_test_1(FV,V0),ord='fro') n_t2 = 0.5*np.linalg.norm(cay_naive(FU,U0) - cay_test_2(FU,U0),ord='fro') n_t2 += 0.5*np.linalg.norm(cay_naive(FV,V0) - cay_test_2(FV,V0),ord='fro') n_t3 = 0.5*np.linalg.norm(cay_naive(FU,U0) - cay_test_3(FU,U0),ord='fro') n_t3 += 0.5*np.linalg.norm(cay_naive(FV,V0) - cay_test_3(FV,V0),ord='fro') print("Accuracy of method 1: ", n_t1) print("Accuracy of method 2: ", n_t2) print("Accuracy of method 3: ", n_t3) return #RKII scheme K1S = h*U0.T@Adot(t)@V0 Shlf = S0 + 0.5*K1S #these are constructed in the cay()-function #K1U = h*([email protected] - [email protected]) #K1V = h*([email protected] - [email protected]) S1est = Shlf + 0.5*K1S U1est = cay(h*FU,U0)@U0 V1est = cay(h*FV,V0)@V0 Uhlf = cay(h*0.5*FU,U0)@U0 Vhlf = cay(h*0.5*FV,V0)@V0 FUhlf = (I(m) - [email protected])@Adot(t+0.5*h)@Vhlf@inv(Shlf) FVhlf = (I(n) - [email protected])@Adot(t+0.5*h).T@Uhlf@inv(Shlf).T K2S = h*Uhlf.T@Adot(t + 0.5*h)@Vhlf #these are constructed in the cay()-function #K2U = h*([email protected] - [email protected]) #K2V = h*([email protected] - [email protected]) S1 = S0 + K2S U1 = cay(h*FUhlf,Uhlf)@U0 V1 = cay(h*FVhlf,Vhlf)@V0 return S1est,U1est,V1est,S1,U1,V1
def check_x_equal_y(ra, b3, pa, pb, p): rab = pow(ra, b3, p) painvpb = (pa * utils.inv(pb, p)) % p return rab == painvpb #Condition to check
def compute_rb(qa, qb, b3, p): return pow((qa * utils.inv(qb, p)) % p, b3, p)
def cay_naive(FU,U): B = [email protected] - [email protected] n = B.shape[0] return inv(I(n) - 0.5*B)@(I(n) + 0.5*B)
def compute_mu(g, lam, n): return utils.inv(l_func((g**lam) % (n**2), n), n) % n
def cay(B): n = B.shape[0] return inv(I(n) - 0.5*B)@(I(n) + 0.5*B)
def moment_match_cubature(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ TODO: N.B. THIS VERSION ALLOWS MULTI-DIMENSIONAL MOMENT MATCHING, BUT CAN BE UNSTABLE Perform moment matching via cubature. Moment matching invloves computing the log partition function, logZₙ, and its derivatives w.r.t. the cavity mean logZₙ = log ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ with EP power a. :param y: observed data (yₙ) [scalar] :param cav_mean: cavity mean (mₙ) [scalar] :param cav_cov: cavity covariance (cₙ) [scalar] :param hyp: likelihood hyperparameter [scalar] :param power: EP power / fraction (a) [scalar] :param cubature_func: the function to compute sigma points and weights to use during cubature :return: lZ: the log partition function, logZₙ [scalar] dlZ: first derivative of logZₙ w.r.t. mₙ (if derivatives=True) [scalar] d2lZ: second derivative of logZₙ w.r.t. mₙ (if derivatives=True) [scalar] """ if cubature_func is None: x, w = gauss_hermite(cav_mean.shape[0], 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(cav_mean.shape[0]) cav_cho, low = cho_factor(cav_cov) # fsigᵢ=xᵢ√cₙ + mₙ: scale locations according to cavity dist. sigma_points = cav_cho @ np.atleast_2d(x) + cav_mean # pre-compute wᵢ pᵃ(yₙ|xᵢ√(2vₙ) + mₙ) weighted_likelihood_eval = w * self.evaluate_likelihood( y, sigma_points, hyp)**power # Compute partition function via cubature: # Zₙ = ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ pᵃ(yₙ|fsigᵢ) Z = np.sum(weighted_likelihood_eval, axis=-1) lZ = np.log(np.maximum(Z, 1e-8)) Zinv = 1.0 / np.maximum(Z, 1e-8) # Compute derivative of partition function via cubature: # dZₙ/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fsigᵢ) d1 = vmap(gaussian_first_derivative_wrt_mean, (1, None, None, 1))(sigma_points[..., None], cav_mean, cav_cov, weighted_likelihood_eval) dZ = np.sum(d1, axis=0) # dlogZₙ/dmₙ = (dZₙ/dmₙ) / Zₙ dlZ = Zinv * dZ # Compute second derivative of partition function via cubature: # d²Zₙ/dmₙ² = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fsigᵢ) d2 = vmap(gaussian_second_derivative_wrt_mean, (1, None, None, 1))(sigma_points[..., None], cav_mean, cav_cov, weighted_likelihood_eval) d2Z = np.sum(d2, axis=0) # d²logZₙ/dmₙ² = d[(dZₙ/dmₙ) / Zₙ]/dmₙ # = (d²Zₙ/dmₙ² * Zₙ - (dZₙ/dmₙ)²) / Zₙ² # = d²Zₙ/dmₙ² / Zₙ - (dlogZₙ/dmₙ)² d2lZ = -dlZ @ dlZ.T + Zinv * d2Z id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov
def gaussian_second_derivative_wrt_mean(f, m, C, w): invC = inv(C) return (invC @ (f - m) @ (f - m).T @ invC - invC) * w
def gaussian_first_derivative_wrt_mean(f, m, C, w): invC = inv(C) return invC @ (f - m) * w
def moment_match_cubature(self, y, cav_mean, cav_cov, hyp=None, power=1.0, cubature_func=None): """ TODO: N.B. THIS VERSION IS SUPERCEDED BY THE FUNCTION BELOW. HOWEVER THIS ONE MAY BE MORE STABLE. Perform moment matching via cubature. Moment matching invloves computing the log partition function, logZₙ, and its derivatives w.r.t. the cavity mean logZₙ = log ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ with EP power a. :param y: observed data (yₙ) [scalar] :param cav_mean: cavity mean (mₙ) [scalar] :param cav_cov: cavity covariance (cₙ) [scalar] :param hyp: likelihood hyperparameter [scalar] :param power: EP power / fraction (a) [scalar] :param cubature_func: the function to compute sigma points and weights to use during cubature :return: lZ: the log partition function, logZₙ [scalar] dlZ: first derivative of logZₙ w.r.t. mₙ (if derivatives=True) [scalar] d2lZ: second derivative of logZₙ w.r.t. mₙ (if derivatives=True) [scalar] """ if cubature_func is None: x, w = gauss_hermite(cav_mean.shape[0], 20) # Gauss-Hermite sigma points and weights else: x, w = cubature_func(cav_mean.shape[0]) cav_cho, low = cho_factor(cav_cov) # fsigᵢ=xᵢ√cₙ + mₙ: scale locations according to cavity dist. sigma_points = cav_cho @ np.atleast_2d(x) + cav_mean # pre-compute wᵢ pᵃ(yₙ|xᵢ√(2vₙ) + mₙ) weighted_likelihood_eval = w * self.evaluate_likelihood( y, sigma_points, hyp)**power # a different approach, based on the log-likelihood, which can be more stable: # ll = self.evaluate_log_likelihood(y, sigma_points) # lmax = np.max(ll) # weighted_likelihood_eval = np.exp(lmax * power) * w * np.exp(power * (ll - lmax)) # Compute partition function via cubature: # Zₙ = ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ pᵃ(yₙ|fsigᵢ) Z = np.sum(weighted_likelihood_eval, axis=-1) lZ = np.log(Z) Zinv = 1.0 / Z # Compute derivative of partition function via cubature: # dZₙ/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fsigᵢ) covinv_f_m = cho_solve((cav_cho, low), sigma_points - cav_mean) dZ = np.sum( # (sigma_points - cav_mean) / cav_cov covinv_f_m * weighted_likelihood_eval, axis=-1) # dlogZₙ/dmₙ = (dZₙ/dmₙ) / Zₙ dlZ = Zinv * dZ # Compute second derivative of partition function via cubature: # d²Zₙ/dmₙ² = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ # ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fsigᵢ) d2Z = np.sum( ((sigma_points - cav_mean)**2 / cav_cov**2 - 1.0 / cav_cov) * weighted_likelihood_eval) # d²logZₙ/dmₙ² = d[(dZₙ/dmₙ) / Zₙ]/dmₙ # = (d²Zₙ/dmₙ² * Zₙ - (dZₙ/dmₙ)²) / Zₙ² # = d²Zₙ/dmₙ² / Zₙ - (dlogZₙ/dmₙ)² d2lZ = -dlZ @ dlZ.T + Zinv * d2Z id2lZ = inv( ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0])) site_mean = cav_mean + id2lZ @ dlZ # approx. likelihood (site) mean (see Rasmussen & Williams p75) site_cov = power * (-cav_cov + id2lZ ) # approx. likelihood (site) variance return lZ, site_mean, site_cov