def collapsed_predict(self, Z_new, full_output=True, full_cov=False): """ Predict the multinomial probability vector at a grid of points, Z_new by first integrating out the value of psi at the data, Z_test, given omega and the kernel parameters. """ assert len(self.data_list) == 1, "Must have one data list in order to predict." data = self.data_list[0] Z = data["Z"] assert Z_new is not None and Z_new.ndim == 2 and Z_new.shape[1] == self.D M_new = Z_new.shape[0] # Compute the kernel for Z_news C = self.kernel.K(Z, Z) Cnn = self.kernel.K(Z_new, Z_new) Cnv = self.kernel.K(Z_new, Z) # Predict the psis mu_psis_new = np.zeros((self.K-1, M_new)) Sig_psis_new = np.zeros((self.K-1, M_new, M_new)) for k in xrange(self.K-1): sys.stdout.write(".") sys.stdout.flush() # Throw out inputs where N[:,k] == 0 Omegak = data["omega"][:,k] kappak = data["kappa"][:,k] # Set the precision for invalid points to zero Omegak[Omegak == 0] = 1e-16 # Account for the mean from the omega potentials y = kappak/Omegak - self.mu[k] # The y's are noisy observations at inputs Z # with diagonal covariace Omegak^{-1} Cvv_noisy = C + np.diag(1./Omegak) Lvv_noisy = np.linalg.cholesky(Cvv_noisy) # Compute the conditional mean given noisy observations psik_pred = Cnv.dot(dpotrs(Lvv_noisy, y, lower=True)[0]) # Save these into the combined arrays mu_psis_new[k] = psik_pred + self.mu[k] if full_cov: Sig_psis_new[k] = Cnn - Cnv.dot(dpotrs(Lvv_noisy, Cnv.T, lower=True)[0]) sys.stdout.write("\n") sys.stdout.flush() # Convert these to pis pis_new = psi_to_pi(mu_psis_new) if full_output: return pis_new, mu_psis_new, Sig_psis_new else: return pis_new
def collapsed_predict(self, Z_new, full_output=True, full_cov=False): """ Predict the multinomial probability vector at a grid of points, Z_new by first integrating out the value of psi at the data, Z_test, given omega and the kernel parameters. """ assert len(self.data_list) == 1, "Must have one data list in order to predict." data = self.data_list[0] Z = data["Z"] assert Z_new is not None and Z_new.ndim == 2 and Z_new.shape[1] == self.D M_new = Z_new.shape[0] # Compute the kernel for Z_news C = self.kernel.K(Z, Z) Cnn = self.kernel.K(Z_new, Z_new) Cnv = self.kernel.K(Z_new, Z) # Predict the psis mu_psis_new = np.zeros((self.K-1, M_new)) Sig_psis_new = np.zeros((self.K-1, M_new, M_new)) for k in range(self.K-1): sys.stdout.write(".") sys.stdout.flush() # Throw out inputs where N[:,k] == 0 Omegak = data["omega"][:,k] kappak = data["kappa"][:,k] # Set the precision for invalid points to zero Omegak[Omegak == 0] = 1e-16 # Account for the mean from the omega potentials y = kappak/Omegak - self.mu[k] # The y's are noisy observations at inputs Z # with diagonal covariace Omegak^{-1} Cvv_noisy = C + np.diag(1./Omegak) Lvv_noisy = np.linalg.cholesky(Cvv_noisy) # Compute the conditional mean given noisy observations psik_pred = Cnv.dot(dpotrs(Lvv_noisy, y, lower=True)[0]) # Save these into the combined arrays mu_psis_new[k] = psik_pred + self.mu[k] if full_cov: Sig_psis_new[k] = Cnn - Cnv.dot(dpotrs(Lvv_noisy, Cnv.T, lower=True)[0]) sys.stdout.write("\n") sys.stdout.flush() # Convert these to pis pis_new = psi_to_pi(mu_psis_new) if full_output: return pis_new, mu_psis_new, Sig_psis_new else: return pis_new
def solve_cholesky(A, B): """ Solve the system Mx=B where A is the lower-triangular cholesky decomposition of the matrix M. """ X, _ = lapack.dpotrs(A, B, lower=1) return X
def __init__(self, X, Y, tuning): self.X = X self.Y = Y self.tuning = tuning self.K_uf = tuning(X).T KK = self.K_uf.dot(self.K_uf.T) Ky = self.K_uf.dot(Y) self.w_mean = dpotrs(dpotrf(KK)[0], Ky)[0] # faster than np.linalg.solve(KK, Ky) if np.any(np.isnan(self.w_mean)): try: self.w_mean = np.linalg.solve(KK, Ky) except: jitter = np.diag(KK).mean() * 1e-6 num_tries = 1 while num_tries <= 5 and np.isfinite(jitter): try: self.w_mean = np.linalg.solve( KK + np.eye(KK.shape[0]) * jitter, Ky) except: jitter *= 10 finally: num_tries += 1 self.wb_var = np.transpose([ scipy.optimize.nnls( np.vstack([self._SNRinv(X), np.ones(len(X))]).T, (self.mean(X) - Y)[:, a]**2)[0] for a in range(Y.shape[1]) ]) self.Gaussian_noise = Foo() self.variance, self.Gaussian_noise.variance = self.wb_var
def LML_se(self,theta,returnGradients=False): self.setTheta(theta) K,r = self.cov(self.X,retr=True) Ky = K.copy() Ky += np.eye(self.X.shape[0])*self.var_n + np.eye(self.X.shape[0])*1e-8 L = self.cholSafe(Ky) WlogDet = 2.*np.sum(np.log(np.diag(L))) alpha, status = dpotrs(L, self.Y, lower=1) dataFit = - np.sum(alpha * self.Y) modelComplexity = -self.Y.shape[1] * WlogDet normalizer = -self.Y.size * log2pi logMarginalLikelihood = 0.5*(dataFit + modelComplexity + normalizer) if returnGradients == False: return logMarginalLikelihood else: Wi, status = dpotri(-L, lower=1) Wi = np.asarray(Wi) # copy bottom triangle to top triangle triu = np.triu_indices_from(Wi,k=1) Wi[triu] = Wi.T[triu] # dL = change in LML, dK is change in Kernel(K) dL_dK = 0.5 * (np.dot(alpha,alpha.T) - self.Y.shape[1] * Wi) dL_dVarn = np.diag(dL_dK).sum() varfGradient = np.sum(K* dL_dK)/self.var_f dK_dr = -r*K dL_dr = dK_dr * dL_dK lengthscaleGradient = -np.sum(dL_dr*r)/self.charLen grads = np.array([varfGradient, lengthscaleGradient, dL_dVarn]) return logMarginalLikelihood, grads
def pd_solve(a, b): """ Fast matrix solve for positive definite matrix a""" L, info = dpotrf(a) if info == 0: return dpotrs(L, b)[0] else: return np.linalg.solve(a, b)
def predict(self, Z_new, full_output=True, full_cov=False): """ Predict the multinomial probability vector at a grid of points, Z :param Z_new: :return: """ assert len(self.data_list ) == 1, "Must have one data list in order to predict." data = self.data_list[0] M = data["M"] Z = data["Z"] assert Z_new is not None and Z_new.ndim == 2 and Z_new.shape[ 1] == self.D M_new = Z_new.shape[0] # Compute the kernel for Z_news C = self.kernel.K(Z, Z) Cvv = C + np.diag(1e-6 * np.ones(M)) Lvv = np.linalg.cholesky(Cvv) Cnn = self.kernel.K(Z_new, Z_new) # Compute the kernel between the new and valid points Cnv = self.kernel.K(Z_new, Z) # Predict the psis mu_psis_new = np.zeros((self.K, M_new)) Sig_psis_new = np.zeros((self.K, M_new, M_new)) for k in xrange(self.K): sys.stdout.write(".") sys.stdout.flush() psik = data["psi"][:, k] # Compute the predictive parameters y = solve_triangular(Lvv, psik, lower=True) x = solve_triangular(Lvv.T, y, lower=False) psik_pred = Cnv.dot(x) # Save these into the combined arrays mu_psis_new[k] = psik_pred + self.mu[k] if full_cov: # Sig_pred = Cnn - Cnv.dot(np.linalg.solve(Cvv, Cnv.T)) Sig_psis_new[k] = Cnn - Cnv.dot( dpotrs(Lvv, Cnv.T, lower=True)[0]) sys.stdout.write("\n") sys.stdout.flush() # Convert these to pis pis_new = np.array([ln_psi_to_pi(psi) for psi in mu_psis_new]) if full_output: return pis_new, mu_psis_new, Sig_psis_new else: return pis_new
def dpotrs(A, B, lower=0): """ Wrapper for lapack dpotrs function :param A: Matrix A :param B: Matrix B :param lower: is matrix lower (true) or upper (false) :returns: """ return lapack.dpotrs(A, B, lower=lower)
def dpotrs(A, B, lower=0): """Wrapper for lapack dpotrs function :param A: Matrix A :param B: Matrix B :param lower: is matrix lower (true) or upper (false) :returns: """ return lapack.dpotrs(A, B, lower=lower)
def dpotrs(A, B, lower=1): """ Wrapper for lapack dpotrs function :param A: Matrix A :param B: Matrix B :param lower: is matrix lower (true) or upper (false) :returns: """ A = force_F_ordered(A) return lapack.dpotrs(A, B, lower=lower)
def cho_solve(chol, y): """ Solves the systems chol * chol^T * x = y :param cov: np.array(nxn) :param y: np.array(n) :return: np.array(n) """ chol = np.asfortranarray(chol) return lapack.dpotrs(chol, y, lower=1)[0]
def predict(self, Z_new, full_output=True, full_cov=False): """ Predict the multinomial probability vector at a grid of points, Z :param Z_new: :return: """ assert len(self.data_list) == 1, "Must have one data list in order to predict." data = self.data_list[0] M = data["M"] Z = data["Z"] assert Z_new is not None and Z_new.ndim == 2 and Z_new.shape[1] == self.D M_new = Z_new.shape[0] # Compute the kernel for Z_news C = self.kernel.K(Z, Z) Cvv = C + np.diag(1e-6 * np.ones(M)) Lvv = np.linalg.cholesky(Cvv) Cnn = self.kernel.K(Z_new, Z_new) # Compute the kernel between the new and valid points Cnv = self.kernel.K(Z_new, Z) # Predict the psis mu_psis_new = np.zeros((self.K, M_new)) Sig_psis_new = np.zeros((self.K, M_new, M_new)) for k in xrange(self.K): sys.stdout.write(".") sys.stdout.flush() psik = data["psi"][:,k] # Compute the predictive parameters y = solve_triangular(Lvv, psik, lower=True) x = solve_triangular(Lvv.T, y, lower=False) psik_pred = Cnv.dot(x) # Save these into the combined arrays mu_psis_new[k] = psik_pred + self.mu[k] if full_cov: # Sig_pred = Cnn - Cnv.dot(np.linalg.solve(Cvv, Cnv.T)) Sig_psis_new[k] = Cnn - Cnv.dot(dpotrs(Lvv, Cnv.T, lower=True)[0]) sys.stdout.write("\n") sys.stdout.flush() # Convert these to pis pis_new = np.array([ln_psi_to_pi(psi) for psi in mu_psis_new]) if full_output: return pis_new, mu_psis_new, Sig_psis_new else: return pis_new
def _solve_cholesky(L, b, lower=True): ''' Solves `Ax = b` given the Cholesky decomposition of `A` using `dpotrs` ''' if any(i == 0 for i in b.shape): return np.zeros(b.shape, dtype=float) x, info = dpotrs(L, b, lower=lower) if info < 0: raise ValueError('The %s-th argument has an illegal value.' % -info) return x
def sample_gaussian(mu=None,Sigma=None,J=None,h=None): mean_params = mu is not None and Sigma is not None info_params = J is not None and h is not None assert mean_params or info_params if not any_none(mu,Sigma): return np.random.multivariate_normal(mu,Sigma) else: from scipy.linalg.lapack import dpotrs L = np.linalg.cholesky(J) x = np.random.randn(h.shape[0]) return scipy.linalg.solve_triangular(L,x,lower=True,trans='T') \ + dpotrs(L,h,lower=True)[0]
def sample_gaussian(mu=None,Sigma=None,J=None,h=None): # Copied from pybasicbayes mean_params = mu is not None and Sigma is not None info_params = J is not None and h is not None assert mean_params or info_params if mu is not None and Sigma is not None: return np.random.multivariate_normal(mu,Sigma) else: L = np.linalg.cholesky(J) x = np.random.randn(h.shape[0]) return solve_triangular(L,x,lower=True,trans='T') \ + dpotrs(L,h,lower=True)[0]
def solve_psd(A, b, chol=None, lower=True, overwrite_b=False, overwrite_A=False): if chol is None: return lapack.dposv(A, b, overwrite_b=overwrite_b, overwrite_a=overwrite_A)[1] else: return lapack.dpotrs(chol, b, lower, overwrite_b)[0]
def sample_gaussian(mu=None, Sigma=None, J=None, h=None): mean_params = mu is not None and Sigma is not None info_params = J is not None and h is not None assert mean_params or info_params if mu is not None and Sigma is not None: return np.random.multivariate_normal(mu, Sigma) else: from scipy.linalg.lapack import dpotrs L = np.linalg.cholesky(J) x = np.random.randn(h.shape[0]) return solve_triangular(L,x,lower=True,trans='T') \ + dpotrs(L,h,lower=True)[0]
def _marginal_likelihood(A_col, J_prior, h_prior, J_post, h_post): """ Compute the marginal likelihood as the ratio of log normalizers """ Aeff = np.concatenate(([1], np.repeat(A_col, B))).astype(np.bool) # Extract the entries for which A=1 J0 = J_prior[np.ix_(Aeff, Aeff)] h0 = h_prior[Aeff] Jp = J_post[np.ix_(Aeff, Aeff)] hp = h_post[Aeff] # Compute the marginal likelihood L0 = np.linalg.cholesky(J0) Lp = np.linalg.cholesky(Jp) ml = 0 ml -= np.sum(np.log(np.diag(Lp))) ml += np.sum(np.log(np.diag(L0))) ml += 0.5 * hp.T.dot(dpotrs(Lp, hp, lower=True)[0]) ml -= 0.5 * h0.T.dot(dpotrs(L0, h0, lower=True)[0]) return ml
def _marginal_likelihood(A_col, J_prior, h_prior, J_post, h_post): """ Compute the marginal likelihood as the ratio of log normalizers """ Aeff = np.concatenate(([1], np.repeat(A_col, B))).astype(np.bool) # Extract the entries for which A=1 J0 = J_prior[np.ix_(Aeff, Aeff)] h0 = h_prior[Aeff] Jp = J_post[np.ix_(Aeff, Aeff)] hp = h_post[Aeff] # Compute the marginal likelihood L0 = np.linalg.cholesky(J0) Lp = np.linalg.cholesky(Jp) ml = 0 ml -= np.sum(np.log(np.diag(Lp))) ml += np.sum(np.log(np.diag(L0))) ml += 0.5*hp.T.dot(dpotrs(Lp, hp, lower=True)[0]) ml -= 0.5*h0.T.dot(dpotrs(L0, h0, lower=True)[0]) return ml
def update(self, x, y): """Update the model with a single input/output sample.""" assert x.shape[0] == self.n assert y.shape[0] == self.p pred_y = self.predict(x) xp = self.mapping.evaluate(x) self.B += numpy.outer(xp, y) choluprk1(self.L, xp) #self.L, cvec, svec = dchud(self.L, xp) self.W, info = dpotrs(self.L, self.B) assert info == 0 return pred_y
def _marginal_likelihood(self, J_prior, h_prior, J_post, h_post): """ Compute the marginal likelihood as the ratio of log normalizers """ a = np.concatenate((np.repeat(self.a, self.B), [1])).astype(np.bool) # Extract the entries for which A=1 J0 = J_prior[np.ix_(a, a)] h0 = h_prior[a] Jp = J_post[np.ix_(a, a)] hp = h_post[a] # This relates to the mean/covariance parameterization as follows # log |C| = log |J^{-1}| = -log |J| # and # mu^T C^{-1} mu = mu^T h # = mu C^{-1} C h # = h^T C h # = h^T J^{-1} h # ml = 0 # ml -= 0.5*np.linalg.slogdet(Jp)[1] # ml += 0.5*np.linalg.slogdet(J0)[1] # ml += 0.5*hp.dot(np.linalg.solve(Jp, hp)) # ml -= 0.5*h0.T.dot(np.linalg.solve(J0, h0)) # Now compute it even faster using the Cholesky! L0 = np.linalg.cholesky(J0) Lp = np.linalg.cholesky(Jp) ml = 0 ml -= np.sum(np.log(np.diag(Lp))) ml += np.sum(np.log(np.diag(L0))) ml += 0.5 * hp.T.dot(dpotrs(Lp, hp, lower=True)[0]) ml -= 0.5 * h0.T.dot(dpotrs(L0, h0, lower=True)[0]) return ml
def _marginal_likelihood(self, J_prior, h_prior, J_post, h_post): """ Compute the marginal likelihood as the ratio of log normalizers """ a = np.concatenate((np.repeat(self.a, self.B), [1])).astype(np.bool) # Extract the entries for which A=1 J0 = J_prior[np.ix_(a, a)] h0 = h_prior[a] Jp = J_post[np.ix_(a, a)] hp = h_post[a] # This relates to the mean/covariance parameterization as follows # log |C| = log |J^{-1}| = -log |J| # and # mu^T C^{-1} mu = mu^T h # = mu C^{-1} C h # = h^T C h # = h^T J^{-1} h # ml = 0 # ml -= 0.5*np.linalg.slogdet(Jp)[1] # ml += 0.5*np.linalg.slogdet(J0)[1] # ml += 0.5*hp.dot(np.linalg.solve(Jp, hp)) # ml -= 0.5*h0.T.dot(np.linalg.solve(J0, h0)) # Now compute it even faster using the Cholesky! L0 = np.linalg.cholesky(J0) Lp = np.linalg.cholesky(Jp) ml = 0 ml -= np.sum(np.log(np.diag(Lp))) ml += np.sum(np.log(np.diag(L0))) ml += 0.5*hp.T.dot(dpotrs(Lp, hp, lower=True)[0]) ml -= 0.5*h0.T.dot(dpotrs(L0, h0, lower=True)[0]) return ml
def resample_psi(self, verbose=False): for data in self.data_list: # import pdb; pdb.set_trace() M = data["M"] Z = data["Z"] # Invert once for all k if "C_inv" in data: C_inv = data["C_inv"] else: C = self.kernel.K(Z) C += 1e-6 * np.eye(M) C_inv = np.linalg.inv(C) # Compute the posterior covariance psi = np.zeros((M, self.K - 1)) for k in xrange(self.K - 1): if verbose: sys.stdout.write(".") sys.stdout.flush() # Throw out inputs where N[:,k] == 0 Omegak = data["omega"][:, k] kappak = data["kappa"][:, k] # Set the precision for invalid points to zero Omegak[Omegak == 0] = 1e-32 # Account for the mean lkhd_mean = kappak / Omegak - self.mu[k] # Compute the posterior parameters L_post = np.linalg.cholesky(C_inv + np.diag(Omegak)) mu_post = dpotrs(L_post, Omegak * lkhd_mean, lower=True)[0] # Go through each GP and resample psi given the likelihood rand_vec = np.random.randn(M) psi[:, k] = mu_post + solve_triangular( L_post, rand_vec, lower=True, trans='T') assert np.all(np.isfinite(psi[:, k])) if verbose: sys.stdout.write("\n") sys.stdout.flush() data["psi"] = psi
def _sample_gaussian(self, J, h): """Copied from Linderman's `PyPolyaGamma`, who copied `pybasicbayes`. We actually want to compute V = inv(J) m = V @ h s ~ Normal(m, V) This function handles that computation more efficiently. See: https://stats.stackexchange.com/questions/32169/ """ L = np.linalg.cholesky(J) x = self.rng.randn(h.shape[0]) A = solve_triangular(L, x, lower=True, trans='T') B = dpotrs(L, h, lower=True)[0] return A + B
def resample_psi(self, verbose=False): for data in self.data_list: # import pdb; pdb.set_trace() M = data["M"] Z = data["Z"] # Invert once for all k if "C_inv" in data: C_inv = data["C_inv"] else: C = self.kernel.K(Z) C += 1e-6 * np.eye(M) C_inv = np.linalg.inv(C) # Compute the posterior covariance psi = np.zeros((M, self.K-1)) for k in xrange(self.K-1): if verbose: sys.stdout.write(".") sys.stdout.flush() # Throw out inputs where N[:,k] == 0 Omegak = data["omega"][:,k] kappak = data["kappa"][:,k] # Set the precision for invalid points to zero Omegak[Omegak == 0] = 1e-32 # Account for the mean lkhd_mean = kappak/Omegak - self.mu[k] # Compute the posterior parameters L_post = np.linalg.cholesky(C_inv + np.diag(Omegak)) mu_post = dpotrs(L_post, Omegak * lkhd_mean, lower=True)[0] # Go through each GP and resample psi given the likelihood rand_vec = np.random.randn(M) psi[:,k] = mu_post + solve_triangular(L_post, rand_vec, lower=True, trans='T') assert np.all(np.isfinite(psi[:,k])) if verbose: sys.stdout.write("\n") sys.stdout.flush() data["psi"] = psi
def solve_cholesky(L, b, lower=True): ''' Solves the system of equations *Ax = b* given the Cholesky decomposition of *A*. Uses the routine *dpotrs*. Parameters ---------- L : (N,N) float array b : (N,*) float array ''' if any(i == 0 for i in b.shape): return np.zeros(b.shape) x, info = dpotrs(L, b, lower=lower) if info < 0: raise ValueError('The %s-th argument has an illegal value.' % (-info)) return x
def mvn_loglike(y, cov): """ Evaluate the multivariate-normal log-likelihood for difference vector `y` and covariance matrix `cov`: log_p = -1/2*[(y^T).(C^-1).y + log(det(C))] + const. The likelihood is NOT NORMALIZED, since this does not affect MCMC. The normalization const = -n/2*log(2*pi), where n is the dimensionality. Arguments `y` and `cov` MUST be np.arrays with dtype == float64 and shapes (n) and (n, n), respectively. These requirements are NOT CHECKED. The calculation follows algorithm 2.1 in Rasmussen and Williams (Gaussian Processes for Machine Learning). """ # Compute the Cholesky decomposition of the covariance. # Use bare LAPACK function to avoid scipy.linalg wrapper overhead. L, info = lapack.dpotrf(cov, clean=False) if info < 0: raise ValueError( 'lapack dpotrf error: ' 'the {}-th argument had an illegal value'.format(-info) ) elif info < 0: raise np.linalg.LinAlgError( 'lapack dpotrf error: ' 'the leading minor of order {} is not positive definite' .format(info) ) # Solve for alpha = cov^-1.y using the Cholesky decomp. alpha, info = lapack.dpotrs(L, y) if info != 0: raise ValueError( 'lapack dpotrs error: ' 'the {}-th argument had an illegal value'.format(-info) ) return -.5*np.dot(y, alpha) - np.log(L.diagonal()).sum()
def block_determinant_add_rows(Pinv, Q, R, S, symm=False): """ Compute the determinant of the matrix A = [[P, Q], [R, S]] Given that we already know P^{-1} and det{P}. We follow the notation of Numerical Recipes S2.7 :param symm: If True, Q=R.T :return: A^{-1} """ # Let A^{-1} = [[Pt, Qt], # [Rt, St]] # where t is short for tilde # Precompute reusable pieces PiQ = Pinv.dot(Q) RPi = PiQ.T if symm else R.dot(Pinv) # Compute the outputs if symm: raise Exception("Broken!") F = S-R.dot(PiQ) L = np.linalg.cholesky(F) St = dpotrs( L, np.eye(F.shape[0]), lower=True)[0] Rt = -solve_triangular(L, RPi, lower=False) Pt = Pinv - PiQ.dot(solve_triangular(L, RPi)) Qt = Rt.T else: St = np.linalg.inv(S - R.dot(PiQ)) Pt = Pinv + PiQ.dot(St).dot(RPi) Qt = -PiQ.dot(St) Rt = Qt.T if symm else -St.dot(RPi) Ainv = np.vstack([np.hstack((Pt, Qt)), np.hstack((Rt, St))]) return Ainv
def block_determinant_add_rows(Pinv, Q, R, S, symm=False): """ Compute the determinant of the matrix A = [[P, Q], [R, S]] Given that we already know P^{-1} and det{P}. We follow the notation of Numerical Recipes S2.7 :param symm: If True, Q=R.T :return: A^{-1} """ # Let A^{-1} = [[Pt, Qt], # [Rt, St]] # where t is short for tilde # Precompute reusable pieces PiQ = Pinv.dot(Q) RPi = PiQ.T if symm else R.dot(Pinv) # Compute the outputs if symm: raise Exception("Broken!") F = S - R.dot(PiQ) L = np.linalg.cholesky(F) St = dpotrs(L, np.eye(F.shape[0]), lower=True)[0] Rt = -solve_triangular(L, RPi, lower=False) Pt = Pinv - PiQ.dot(solve_triangular(L, RPi)) Qt = Rt.T else: St = np.linalg.inv(S - R.dot(PiQ)) Pt = Pinv + PiQ.dot(St).dot(RPi) Qt = -PiQ.dot(St) Rt = Qt.T if symm else -St.dot(RPi) Ainv = np.vstack([np.hstack((Pt, Qt)), np.hstack((Rt, St))]) return Ainv
def _sample_beta_and_sigma_y(self): """Gibbs sample `beta` and noise parameter `sigma_y`. """ phi_X = self.phi(self.X, self.W, add_bias=True) cov_j = self.B0 + phi_X.T @ phi_X mu_j = np.tile((self.B0 @ self.b0), (self.J, 1)).T + \ (phi_X.T @ self.Y) # multi-output generalization of mvn sample code L = np.linalg.cholesky(cov_j) Z = self.rng.normal(size=self.beta.shape).T LZ = solve_triangular(L, Z, lower=True, trans='T') L_mu = dpotrs(L, mu_j, lower=True)[0] self.beta[:] = (LZ + L_mu).T # sample from inverse gamma a_post = self.gamma_a0 + .5 * self.N b_post = self.gamma_b0 + .5 * np.diag( (self.Y.T @ self.Y) + \ (self.b0 @ self.B0 @ self.b0.T) + \ (mu_j.T @ np.linalg.solve(cov_j, mu_j)) ) self.sigma_y = 1. / self.rng.gamma(a_post, 1. / b_post)
def test_cho_solver(): from scipy.linalg.lapack import dpotrs N = 5 y = np.random.uniform(size=N) Y = np.random.uniform(size=[N, 2]) a = np.random.uniform(size=[N, N]) a = a.T.dot(a) L = np.linalg.cholesky(a) X = cho_solve(L, Y, False) xa = cho_solve(L, Y[:, 0], False) xb = cho_solve(L, Y[:, 1], False) assert np.alltrue(np.isclose(X[:, 0], xa)), "a fails" assert np.alltrue(np.isclose(X[:, 1], xb)), "b fails" #with y vec mod (no copy) #built in #x1 = cho_solve((L,True),y) x1 = dpotrs(L, y, 1, 0) x2 = cho_solve(L, y, False) #x1 = dpotrs(L,y,1,1) assert np.all(np.isclose(x1[0], x2))
def __init__(self, X, Y, theta): theta = theta ** [2,1,2] [sf2, l2, sn2] = theta # evaluate RBF kernel for our given X r = dist.pdist(X) / l2 K = dist.squareform(sf2 * np.exp(-0.5 * r**2)) np.fill_diagonal(K, sf2) # add in Gaussian noise (+ a bit for numerical stability) Ky = K.copy() np.fill_diagonal(Ky, sf2 + sn2 + 1e-8) # compute the Cholesky factorization of our covariance matrix LW, info = lapack.dpotrf(Ky, lower=True) assert info == 0 # calculate lower half of inverse of K (assumes real symmetric positive definite) Wi, info = lapack.dpotri(LW, lower=True) assert info == 0 # make symmetric by filling in the upper half Wi += np.tril(Wi,-1).T # and solve alpha, info = lapack.dpotrs(LW, Y, lower=True) assert info == 0 # save these for later self.X = X self.Y = Y self.theta = theta self.r = r self.K = K self.Ky = Ky self.LW = LW self.Wi = Wi self.alpha = alpha
def __init__(self, X, Y, inducing_inputs, lengthscale): self.X = X self.Y = Y self.inducing_inputs = inducing_inputs self.lengthscale = lengthscale self.rectify = rectify self.kern = GPy.kern.RBF(input_dim=X.shape[1], variance=1., lengthscale=lengthscale, ARD=True) K_uf = self.kern.K(inducing_inputs, X) KK = K_uf.dot(K_uf.T) Ky = K_uf.dot(Y) self.w_mean = dpotrs(dpotrf(KK)[0], Ky)[0] # faster than np.linalg.solve(KK, Ky) if np.any(np.isnan(self.w_mean)): try: self.w_mean = np.linalg.solve(KK, Ky) except: jitter = np.diag(KK).mean() * 1e-6 num_tries = 1 while num_tries <= 5 and np.isfinite(jitter): try: self.w_mean = np.linalg.solve( KK + np.eye(KK.shape[0]) * jitter, Ky) except: jitter *= 10 finally: num_tries += 1 self.wb_var = np.transpose([ scipy.optimize.nnls( np.vstack([self._SNRinv(X), np.ones(len(X))]).T, (self.mean(X) - Y)[:, a]**2)[0] for a in range(Y.shape[1]) ]) self.Gaussian_noise = Foo() self.variance, self.Gaussian_noise.variance = self.wb_var
def mvn_loglike(y, cov): """ Calculate multi-varaite-normal log-likelihood log_p = -1/2 * [(y^T).(C^-1).y + log(det(C))] + const To normalize the likelihood, const = -n/2*log(2*pi), which is omitted here Args: y -- (n) cov -- shape (n, n) Returns: log_p """ L, info = lapack.dpotrf(cov, clean=False) if info != 0: raise ValueError('lapack dpotrf error: illegal value for info!') alpha, info = lapack.dpotrs(L, y) if info != 0: raise ValueError( 'lapack dpotrf error: illegal value for info! {}'.format(info)) return -.5 * np.dot(y, alpha) - np.log(L.diagonal()).sum()
def _solve_cholesky(L, b, lower=True): ''' Solves the system of equations `Ax = b` given the Cholesky decomposition of `A`. Uses the routine `dpotrs`. Parameters ---------- L : (n, n) float array b : (n, *) float array Returns ------- (n, *) float array ''' if any(i == 0 for i in b.shape): return np.zeros(b.shape) x, info = dpotrs(L, b, lower=lower) if info < 0: raise ValueError('The %s-th argument has an illegal value.' % -info) return x
def solve_psd(A,b,chol=None,lower=True,overwrite_b=False,overwrite_A=False): if chol is None: return lapack.dposv(A,b,overwrite_b=overwrite_b,overwrite_a=overwrite_A)[1] else: return lapack.dpotrs(chol,b,lower,overwrite_b)[0]
def kalman_filter(model, return_loglike=False): # Parameters dtype = model.dtype # Kalman filter properties filter_method = model.filter_method inversion_method = model.inversion_method stability_method = model.stability_method conserve_memory = model.conserve_memory tolerance = model.tolerance loglikelihood_burn = model.loglikelihood_burn # Check for acceptable values if not filter_method == FILTER_CONVENTIONAL: warn('The pure Python version of the kalman filter only supports the' ' conventional Kalman filter') implemented_inv_methods = INVERT_NUMPY | INVERT_UNIVARIATE | SOLVE_CHOLESKY if not inversion_method & implemented_inv_methods: warn('The pure Python version of the kalman filter only performs' ' inversion using `numpy.linalg.inv`.') if not tolerance == 0: warn('The pure Python version of the kalman filter does not check' ' for convergence.') # Convergence (this implementation does not consider convergence) converged = False period_converged = 0 # Dimensions nobs = model.nobs k_endog = model.k_endog k_states = model.k_states k_posdef = model.k_posdef # Allocate memory for variables filtered_state = np.zeros((k_states, nobs), dtype=dtype) filtered_state_cov = np.zeros((k_states, k_states, nobs), dtype=dtype) predicted_state = np.zeros((k_states, nobs+1), dtype=dtype) predicted_state_cov = np.zeros((k_states, k_states, nobs+1), dtype=dtype) forecast = np.zeros((k_endog, nobs), dtype=dtype) forecast_error = np.zeros((k_endog, nobs), dtype=dtype) forecast_error_cov = np.zeros((k_endog, k_endog, nobs), dtype=dtype) loglikelihood = np.zeros((nobs+1,), dtype=dtype) # Selected state covariance matrix selected_state_cov = ( np.dot( np.dot(model.selection[:, :, 0], model.state_cov[:, :, 0]), model.selection[:, :, 0].T ) ) # Initial values if model.initialization == 'known': initial_state = model._initial_state.astype(dtype) initial_state_cov = model._initial_state_cov.astype(dtype) elif model.initialization == 'approximate_diffuse': initial_state = np.zeros((k_states,), dtype=dtype) initial_state_cov = ( np.eye(k_states).astype(dtype) * model._initial_variance ) elif model.initialization == 'stationary': initial_state = np.zeros((k_states,), dtype=dtype) initial_state_cov = solve_discrete_lyapunov( np.array(model.transition[:, :, 0], dtype=dtype), np.array(selected_state_cov[:, :], dtype=dtype), ) else: raise RuntimeError('Statespace model not initialized.') # Copy initial values to predicted predicted_state[:, 0] = initial_state predicted_state_cov[:, :, 0] = initial_state_cov # print(predicted_state_cov[:, :, 0]) # Setup indices for possibly time-varying matrices design_t = 0 obs_intercept_t = 0 obs_cov_t = 0 transition_t = 0 state_intercept_t = 0 selection_t = 0 state_cov_t = 0 # Iterate forwards time_invariant = model.time_invariant for t in range(nobs): # Get indices for possibly time-varying arrays if not time_invariant: if model.design.shape[2] > 1: design_t = t if model.obs_intercept.shape[1] > 1: obs_intercept_t = t if model.obs_cov.shape[2] > 1: obs_cov_t = t if model.transition.shape[2] > 1: transition_t = t if model.state_intercept.shape[1] > 1: state_intercept_t = t if model.selection.shape[2] > 1: selection_t = t if model.state_cov.shape[2] > 1: state_cov_t = t # Selected state covariance matrix if model.selection.shape[2] > 1 or model.state_cov.shape[2] > 1: selected_state_cov = ( np.dot( np.dot(model.selection[:, :, selection_t], model.state_cov[:, :, state_cov_t]), model.selection[:, :, selection_t].T ) ) # #### Forecast for time t # `forecast` $= Z_t a_t + d_t$ # # *Note*: $a_t$ is given from the initialization (for $t = 0$) or # from the previous iteration of the filter (for $t > 0$). forecast[:, t] = ( np.dot(model.design[:, :, design_t], predicted_state[:, t]) + model.obs_intercept[:, obs_intercept_t] ) # *Intermediate calculation* (used just below and then once more) # `tmp1` array used here, dimension $(m \times p)$ # $\\#_1 = P_t Z_t'$ # $(m \times p) = (m \times m) (p \times m)'$ tmp1 = np.dot(predicted_state_cov[:, :, t], model.design[:, :, design_t].T) # #### Forecast error for time t # `forecast_error` $\equiv v_t = y_t -$ `forecast` forecast_error[:, t] = model.obs[:, t] - forecast[:, t] # #### Forecast error covariance matrix for time t # $F_t \equiv Z_t P_t Z_t' + H_t$ forecast_error_cov[:, :, t] = ( np.dot(model.design[:, :, design_t], tmp1) + model.obs_cov[:, :, obs_cov_t] ) # Store the inverse if k_endog == 1 and inversion_method & INVERT_UNIVARIATE: forecast_error_cov_inv = 1.0 / forecast_error_cov[0, 0, t] determinant = forecast_error_cov[0, 0, t] tmp2 = forecast_error_cov_inv * forecast_error[:, t] tmp3 = forecast_error_cov_inv * model.design[:, :, design_t] elif inversion_method & SOLVE_CHOLESKY: U, info = lapack.dpotrf(forecast_error_cov[:, :, t]) determinant = np.product(U.diagonal())**2 tmp2, info = lapack.dpotrs(U, forecast_error[:, t]) tmp3, info = lapack.dpotrs(U, model.design[:, :, design_t]) else: forecast_error_cov_inv = np.linalg.inv(forecast_error_cov[:, :, t]) determinant = np.linalg.det(forecast_error_cov[:, :, t]) tmp2 = np.dot(forecast_error_cov_inv, forecast_error[:, t]) tmp3 = np.dot(forecast_error_cov_inv, model.design[:, :, design_t]) # #### Filtered state for time t # $a_{t|t} = a_t + P_t Z_t' F_t^{-1} v_t$ # $a_{t|t} = 1.0 * \\#_1 \\#_2 + 1.0 a_t$ filtered_state[:, t] = ( predicted_state[:, t] + np.dot(tmp1, tmp2) ) # #### Filtered state covariance for time t # $P_{t|t} = P_t - P_t Z_t' F_t^{-1} Z_t P_t$ # $P_{t|t} = P_t - \\#_1 \\#_3 P_t$ filtered_state_cov[:, :, t] = ( predicted_state_cov[:, :, t] - np.dot( np.dot(tmp1, tmp3), predicted_state_cov[:, :, t] ) ) # #### Loglikelihood loglikelihood[t] = -0.5 * ( np.log((2*np.pi)**k_endog * determinant) + np.dot(forecast_error[:, t], tmp2) ) # #### Predicted state for time t+1 # $a_{t+1} = T_t a_{t|t} + c_t$ predicted_state[:, t+1] = ( np.dot(model.transition[:, :, transition_t], filtered_state[:, t]) + model.state_intercept[:, state_intercept_t] ) # #### Predicted state covariance matrix for time t+1 # $P_{t+1} = T_t P_{t|t} T_t' + Q_t^*$ predicted_state_cov[:, :, t+1] = ( np.dot( np.dot(model.transition[:, :, transition_t], filtered_state_cov[:, :, t]), model.transition[:, :, transition_t].T ) + selected_state_cov ) # Enforce symmetry of predicted covariance matrix predicted_state_cov[:, :, t+1] = ( predicted_state_cov[:, :, t+1] + predicted_state_cov[:, :, t+1].T ) / 2 if return_loglike: return np.array(loglikelihood) else: kwargs = dict( (k, v) for k, v in locals().items() if k in _kalman_filter._fields ) kwargs['model'] = _statespace( initial_state=initial_state, initial_state_cov=initial_state_cov ) kfilter = _kalman_filter(**kwargs) return FilterResults(model, kfilter)
def kalman_filter(model, return_loglike=False): # Parameters dtype = model.dtype # Kalman filter properties filter_method = model.filter_method inversion_method = model.inversion_method stability_method = model.stability_method conserve_memory = model.conserve_memory tolerance = model.tolerance loglikelihood_burn = model.loglikelihood_burn # Check for acceptable values if not filter_method == FILTER_CONVENTIONAL: warn('The pure Python version of the kalman filter only supports the' ' conventional Kalman filter') implemented_inv_methods = INVERT_NUMPY | INVERT_UNIVARIATE | SOLVE_CHOLESKY if not inversion_method & implemented_inv_methods: warn('The pure Python version of the kalman filter only performs' ' inversion using `numpy.linalg.inv`.') if not tolerance == 0: warn('The pure Python version of the kalman filter does not check' ' for convergence.') # Convergence (this implementation does not consider convergence) converged = False period_converged = 0 # Dimensions nobs = model.nobs k_endog = model.k_endog k_states = model.k_states k_posdef = model.k_posdef # Allocate memory for variables filtered_state = np.zeros((k_states, nobs), dtype=dtype) filtered_state_cov = np.zeros((k_states, k_states, nobs), dtype=dtype) predicted_state = np.zeros((k_states, nobs + 1), dtype=dtype) predicted_state_cov = np.zeros((k_states, k_states, nobs + 1), dtype=dtype) forecast = np.zeros((k_endog, nobs), dtype=dtype) forecast_error = np.zeros((k_endog, nobs), dtype=dtype) forecast_error_cov = np.zeros((k_endog, k_endog, nobs), dtype=dtype) loglikelihood = np.zeros((nobs + 1, ), dtype=dtype) # Selected state covariance matrix selected_state_cov = (np.dot( np.dot(model.selection[:, :, 0], model.state_cov[:, :, 0]), model.selection[:, :, 0].T)) # Initial values if model.initialization == 'known': initial_state = model._initial_state.astype(dtype) initial_state_cov = model._initial_state_cov.astype(dtype) elif model.initialization == 'approximate_diffuse': initial_state = np.zeros((k_states, ), dtype=dtype) initial_state_cov = (np.eye(k_states).astype(dtype) * model._initial_variance) elif model.initialization == 'stationary': initial_state = np.zeros((k_states, ), dtype=dtype) initial_state_cov = solve_discrete_lyapunov( np.array(model.transition[:, :, 0], dtype=dtype), np.array(selected_state_cov[:, :], dtype=dtype), ) else: raise RuntimeError('Statespace model not initialized.') # Copy initial values to predicted predicted_state[:, 0] = initial_state predicted_state_cov[:, :, 0] = initial_state_cov # print(predicted_state_cov[:, :, 0]) # Setup indices for possibly time-varying matrices design_t = 0 obs_intercept_t = 0 obs_cov_t = 0 transition_t = 0 state_intercept_t = 0 selection_t = 0 state_cov_t = 0 # Iterate forwards time_invariant = model.time_invariant for t in range(nobs): # Get indices for possibly time-varying arrays if not time_invariant: if model.design.shape[2] > 1: design_t = t if model.obs_intercept.shape[1] > 1: obs_intercept_t = t if model.obs_cov.shape[2] > 1: obs_cov_t = t if model.transition.shape[2] > 1: transition_t = t if model.state_intercept.shape[1] > 1: state_intercept_t = t if model.selection.shape[2] > 1: selection_t = t if model.state_cov.shape[2] > 1: state_cov_t = t # Selected state covariance matrix if model.selection.shape[2] > 1 or model.state_cov.shape[2] > 1: selected_state_cov = (np.dot( np.dot(model.selection[:, :, selection_t], model.state_cov[:, :, state_cov_t]), model.selection[:, :, selection_t].T)) # #### Forecast for time t # `forecast` $= Z_t a_t + d_t$ # # *Note*: $a_t$ is given from the initialization (for $t = 0$) or # from the previous iteration of the filter (for $t > 0$). forecast[:, t] = ( np.dot(model.design[:, :, design_t], predicted_state[:, t]) + model.obs_intercept[:, obs_intercept_t]) # *Intermediate calculation* (used just below and then once more) # `tmp1` array used here, dimension $(m \times p)$ # $\\#_1 = P_t Z_t'$ # $(m \times p) = (m \times m) (p \times m)'$ tmp1 = np.dot(predicted_state_cov[:, :, t], model.design[:, :, design_t].T) # #### Forecast error for time t # `forecast_error` $\equiv v_t = y_t -$ `forecast` forecast_error[:, t] = model.obs[:, t] - forecast[:, t] # #### Forecast error covariance matrix for time t # $F_t \equiv Z_t P_t Z_t' + H_t$ forecast_error_cov[:, :, t] = (np.dot(model.design[:, :, design_t], tmp1) + model.obs_cov[:, :, obs_cov_t]) # Store the inverse if k_endog == 1 and inversion_method & INVERT_UNIVARIATE: forecast_error_cov_inv = 1.0 / forecast_error_cov[0, 0, t] determinant = forecast_error_cov[0, 0, t] tmp2 = forecast_error_cov_inv * forecast_error[:, t] tmp3 = forecast_error_cov_inv * model.design[:, :, design_t] elif inversion_method & SOLVE_CHOLESKY: U, info = lapack.dpotrf(forecast_error_cov[:, :, t]) determinant = np.product(U.diagonal())**2 tmp2, info = lapack.dpotrs(U, forecast_error[:, t]) tmp3, info = lapack.dpotrs(U, model.design[:, :, design_t]) else: forecast_error_cov_inv = np.linalg.inv(forecast_error_cov[:, :, t]) determinant = np.linalg.det(forecast_error_cov[:, :, t]) tmp2 = np.dot(forecast_error_cov_inv, forecast_error[:, t]) tmp3 = np.dot(forecast_error_cov_inv, model.design[:, :, design_t]) # #### Filtered state for time t # $a_{t|t} = a_t + P_t Z_t' F_t^{-1} v_t$ # $a_{t|t} = 1.0 * \\#_1 \\#_2 + 1.0 a_t$ filtered_state[:, t] = (predicted_state[:, t] + np.dot(tmp1, tmp2)) # #### Filtered state covariance for time t # $P_{t|t} = P_t - P_t Z_t' F_t^{-1} Z_t P_t$ # $P_{t|t} = P_t - \\#_1 \\#_3 P_t$ filtered_state_cov[:, :, t] = ( predicted_state_cov[:, :, t] - np.dot(np.dot(tmp1, tmp3), predicted_state_cov[:, :, t])) # #### Loglikelihood loglikelihood[t] = -0.5 * (np.log((2 * np.pi)**k_endog * determinant) + np.dot(forecast_error[:, t], tmp2)) # #### Predicted state for time t+1 # $a_{t+1} = T_t a_{t|t} + c_t$ predicted_state[:, t + 1] = (np.dot(model.transition[:, :, transition_t], filtered_state[:, t]) + model.state_intercept[:, state_intercept_t]) # #### Predicted state covariance matrix for time t+1 # $P_{t+1} = T_t P_{t|t} T_t' + Q_t^*$ predicted_state_cov[:, :, t + 1] = (np.dot( np.dot(model.transition[:, :, transition_t], filtered_state_cov[:, :, t]), model.transition[:, :, transition_t].T) + selected_state_cov) # Enforce symmetry of predicted covariance matrix predicted_state_cov[:, :, t + 1] = (predicted_state_cov[:, :, t + 1] + predicted_state_cov[:, :, t + 1].T) / 2 if return_loglike: return np.array(loglikelihood) else: kwargs = dict( (k, v) for k, v in locals().items() if k in _kalman_filter._fields) kwargs['model'] = _statespace(initial_state=initial_state, initial_state_cov=initial_state_cov) kfilter = _kalman_filter(**kwargs) return FilterResults(model, kfilter)
def lnLL(dy, cov): L, info = lapack.dpotrf(cov, clean=False) alpha, info = lapack.dpotrs(L, dy) return -.5 * np.dot(dy, alpha) - np.log(L.diagonal()).sum()