def is_pos_def(A): """Check if matrix A is positive definite.""" try: la.cholesky(A) return True except np.linalg.LinAlgError: return False
def is_pd(K): from autograd.numpy.linalg import cholesky try: cholesky(K) print('Matrix IS positive definite') return 1 except: print('Matrix is NOT positive definite') return 0
def qa_posterior_moments(m, K_ff, y, noise): B = cholesky(K_ff + 1e-7 * np.eye(K_ff.shape[0])) Sigma = inv(np.dot(B.T, B) + noise * np.eye(B.shape[0])) / noise mu = np.dot(Sigma, np.dot(B.T, (y - m).T)) print(mu.shape, Sigma.shape, y.shape) return mu, Sigma
def ensure_psd(mtx_list): ''' Checks the positive-definiteness (psd) of a list of matrix. If a matrix is not psd it is replaced by a "similar" positive-definite matrix. mtx_list (list of 2d-array/3d-arrays): The list of matrices to check --------------------------------------------------------------------- returns (list of 2d-array/3d-arrays): A list of matrices that are all psd. ''' L = len(mtx_list) for l in range(L): for idx, X in enumerate(mtx_list[l]): try: cholesky(X) except LinAlgError: mtx_list[l][idx] = make_positive_definite(make_symm(X), tol = 10E-5) return mtx_list
def sample_gpp_multi(x, kernel='rbf', noise=1e-6): """ samples from the gp prior. x shape [N_data,1]""" covariance = kernel_dict[kernel] j = noise * np.eye(x.shape[0]) K = covariance(x, x) + j[None, :] L = cholesky(K) e = rs.randn(x.shape[0]) return np.dot(L, e) # [ns, nd]
def gen_point_source_psf_image(pixel_grid, image, loc, psf_weights, psf_means, psf_covars): # use image PSF icovs = np.array([npla.inv(c) for c in psf_covars]) dets = np.array([npla.det(c) for c in psf_covars]) chols = np.array([npla.cholesky(c) for c in psf_covars]) return mog_like(pixel_grid, psf_means, icovs, dets, psf_weights)
def sample_gpp(x, n_samples, kernel='rbf', noise=1e-10): """ samples from the gp prior. x shape [N_data,1]""" covariance = kernel_dict[kernel] K = covariance(x, x) + noise * np.eye(x.shape[0]) #print(K[0],K[:,0], K.shape) ; exit() L = cholesky(K) e = rs.randn(n_samples, x.shape[0]) return np.dot(e, L.T) # [ns, nd]
def informative_priors(centres, covs, K): a = np.ones(2) * (10**0.5) # large alpha means pi values are ~= b = np.ones(2) * ( 1000**0.5) # large beta keeps Gaussian from which mu is drawn small V = [inv(cholesky(covs[k])) / (1000**0.5) for k in range(K)] m = centres u = np.ones(2) * (1000) - 2 return a, b, V, m, u
def sample_normal(params, N_samples, full_cov=False): mean, cov = params if full_cov: jitter = 1e-7 * np.eye(mean.shape[0]) L = cholesky(cov + jitter) e = rs.randn(N_samples, mean.shape[0]) return np.dot(e, L) + mean else: return rs.randn(N_samples, mean.shape[0]) * cov + mean # [ns, nw]
def sample_gpp(x, n_samples): """ Samples from the gp prior x = inputs with shape [N_data] returns : samples from the gp prior [N_data, N_samples] """ x = np.ravel(x) n_data = len(x) K = covariance(x[:, None], x[:, None]) L = cholesky(K + 1e-7 * np.eye(n_data)) e = rs.randn(n_data, n_samples) return np.dot(L, e)
def sample_gp_prior(x, n_samples): """ Samples from the gp prior x = inputs with shape [N_data] returns : samples from the gp prior [N_samples, N_data] """ x = np.ravel(x) n_data = len(x) K = covariance(x[:, None], x[:, None]) L = cholesky(K + 1e-4 * np.eye(n_data)) e = np.random.normal(size=(n_data, n_samples)) f_gp_prior = np.dot(L, e) return f_gp_prior.T
def update_params(self, means, covs, pis): assert covs.shape[1] == covs.shape[2] == self.D assert self.K == covs.shape[0] == len(pis), "%d != %d != %d"%(self.K, covs.shape[0], len(pis)) #assert np.isclose(np.sum(pis), 1.) self.means = means self.covs = covs self.pis = pis self.dets = np.array([npla.det(c) for c in self.covs]) self.icovs = np.array([npla.inv(c) for c in self.covs]) self.chols = np.array([npla.cholesky(c) for c in self.covs])
def log_gp_prior(y_bnn, x): """ computes: the expectation value of the log of the gp prior : E [ log p_gp(f) ] where p_gp(f) = N(f|0,K) where f ~ p_BNN(f) = -0.5 * E [ (L^-1f)^T(L^-1f) ] + const; K = LL^T (cholesky decomposition) (we ignore constants for now as we are not optimizing the covariance hyper-params) bnn_weights | dim = [N_weights_samples, N_weights] K = covariance/Kernel matrix | dim = [N_data, N_data] ; dim L = dim K y_bnn output of a bnn | dim = [N_data, N_weights_samples] returns : E[log p_gp(y)] | dim = [N_function_samples] """ K = covariance(x, x)+noise_var*np.eye(len(x)) # shape [N_data, N_data] L = cholesky(K) # K = LL^T ; shape L = shape K a = solve(L, y_bnn) # a = L^-1 y_bnn ; shape L^-1 y_bnn = log_gp = -0.5*np.mean(a**2, axis=0) # Compute E [a^2] return log_gp
def log_pdf(self, hyp): x = np.atleast_2d(self.inputs) y = np.atleast_2d(self.targets) n, D = x.shape n, E = y.shape hyp = hyp.reshape(E, -1) K = self.kernel(hyp, x) # [E, n, n] L = cholesky(K) alpha = np.hstack([solve(K[i], y[:, i]) for i in range(E)]) y = y.flatten(order='F') logp = 0.5 * n * E * log(2 * np.pi) + 0.5 * np.dot(y, alpha) + np.sum( [log(np.diag(L[i])) for i in range(E)]) return logp
def log_gp_prior(f_bnn, x, t): """ computes: the expectation value of the log of the gp prior : E_{X~p(X)} [log p_gp(f)] where p_gp(f) = N(f|0,K) where f ~ p_BNN(f) = -0.5 * E_{X~p(X)} [ (L^-1f)^T(L^-1f) ] + const; K = LL^T (cholesky decomposition) (we ignore constants for now as we are not optimizing the covariance hyperparams) bnn_weights | dim = [N_weights_samples, N_weights] K = covariance/Kernel matrix | dim = [N_data, N_data] ; dim L = dim K f_bnn output of a bnn | dim = [N_data, N_weights_samples] returns : E[log p_gp(f)] | dim = [N_function_samples] """ s = 1e-6 * np.eye(len(x)) K = covariance(x, x) + s # shape [N_data, N_data] L = cholesky(K) + s # shape K = LL^T a = solve(L, f_bnn) # shape = shape f_bnn (L^-1 f_bnn) log_gp = -0.5 * np.mean(a**2, axis=0) # Compute E_{X~p(X)} return log_gp
def compute_z_moments(w_s, eta_old, H_old, psi_old): ''' Compute the first moment and the variance of the latent variable w_s (list of length s1): The path probabilities for all s in S1 eta_old (list of nb_layers elements of shape (K_l x r_{l-1}, 1)): eta estimators of the previous iteration for each layer H_old (list of nb_layers elements of shape (K_l x r_l-1, r_l)): Lambda estimators of the previous iteration for each layer psi_old (list of nb_layers elements of shape (K_l x r_l-1, r_l-1)): Psi estimators of the previous iteration for each layer ------------------------------------------------------------------------- returns (tuple of length 2): E(z^{(l)}) and Var(z^{(l)}) ''' k = [eta.shape[0] for eta in eta_old] L = len(eta_old) Ez = [[] for l in range(L)] AT = [[] for l in range(L)] w_reshaped = w_s.reshape(*k, order='C') for l in reversed(range(L)): # Compute E(z^{(l)}) idx_to_sum = tuple(set(range(L)) - set([l])) wl = w_reshaped.sum(idx_to_sum)[..., n_axis, n_axis] Ezl = (wl * eta_old[l]).sum(0, keepdims=True) Ez[l] = Ezl etaTeta = eta_old[l] @ t(eta_old[l], (0, 2, 1)) HlHlT = H_old[l] @ t(H_old[l], (0, 2, 1)) E_zlzlT = (wl * (HlHlT + psi_old[l] + etaTeta)).sum(0, keepdims=True) var_zl = E_zlzlT - Ezl @ t(Ezl, (0, 2, 1)) try: var_zl = ensure_psd([var_zl])[0] # Numeric stability check except: print(var_zl) raise RuntimeError('Var z1 was not psd') AT_l = cholesky(var_zl) AT[l] = AT_l return Ez, AT
def logevidence(self, params): kern_params, wnoise, mean_params = self.unpack_params(params, fudge=self.fudge) Kxx = self.build_Kxx(self.xt, self.xt, params, prior=True) L = cholesky(Kxx) iL = inv(L) inv_Kxx = iL.T @ iL if self.mean: mu = self.mean(self.xt, params)[None] # D x T yc = (self.ykdt - mu).reshape([self.k, -1]) else: yc = self.ykt_ logdet = self.k * np.sum(np.log(np.diag(L))) * 2 ll = -1 / 2 * np.sum( (yc @ inv_Kxx) * yc) - 1 / 2 * logdet - self.k / 2 * np.log( 2 * np.pi) * self.t * self.d # lp = mvn.logpdf(yc, yc[0].squeeze()*0, Kxx).sum() # check marg-log-likelihood return ll.sum()
def gen_prof_mog_params(image, loc, gal_sig, gal_rho, gal_phi, psf_weights, psf_means, psf_covars, prof_amp, prof_sig): v_s = image.equa2pixel(loc) R = galaxies.gen_galaxy_transformation(gal_sig, gal_rho, gal_phi) W = np.dot(R, R.T) K_psf = psf_weights.shape[0] K_prof = prof_amp.shape[0] # compute MOG components num_components = K_psf * K_prof weights = np.zeros(num_components, dtype=np.float) means = np.zeros((num_components, 2), dtype=np.float) covars = np.zeros((num_components, 2, 2), dtype=np.float) cnt = 0 for k in range(K_psf): # num PSF Componenets for j in range(K_prof): # galaxy type components ## compute weights and component mean/variances weights[cnt] = psf_weights[k] * prof_amp[j] ## compute means means[cnt,0] = v_s[0] + psf_means[k, 0] means[cnt,1] = v_s[1] + psf_means[k, 1] ## compute covariance matrices for ii in range(2): for jj in range(2): covars[cnt, ii, jj] = psf_covars[k, ii, jj] + \ prof_sig[j] * W[ii, jj] # increment index cnt += 1 icovs = np.array([npla.inv(c) for c in covars]) dets = np.array([npla.det(c) for c in covars]) chols = np.array([npla.cholesky(c) for c in covars]) return means, covars, icovs, dets, chols, weights
def jitchol(mat, jitter=0): """Run Cholesky decomposition with an increasing jitter, until the jitter becomes too large. Arguments --------- mat : (m, m) np.ndarray Positive-definite matrix jitter : float Initial jitter """ try: chol = cholesky(mat) return chol except np.linalg.LinAlgError: new_jitter = jitter*10.0 if jitter > 0.0 else 1e-15 if new_jitter > 1.0: raise RuntimeError('Matrix not positive definite even with jitter') warnings.warn( 'Matrix not positive-definite, adding jitter {:e}' .format(new_jitter), RuntimeWarning) return jitchol(mat + new_jitter * np.eye(mat.shape[-1]), new_jitter)
def log_gp_prior(y_bnn, K): # [nf, nd] [nd, nd] """ computes: log p_gp(f), f ~ p_BNN(f) """ L = cholesky(K) a = solve(L, y_bnn.T) # a = L^-1 y_bnn [nf, nd] return -0.5 * np.mean(a**2, axis=0) # [nf]
def plot_gp_posterior(x, xtest, y, s=1e-4, samples=10, title="", plot='gp'): N = len(x) print(N) n = len(xtest) K = covariance(x, x) + s * np.eye(N) print(K.shape) L = cholesky(K) # compute the mean at our test points. Lk = solve(L, covariance(x, xtest)) mu = np.dot(Lk.T, solve(L, y)) # compute the variance at our test points. K_ = covariance(xtest, xtest) var = np.diag(K_) - np.sum(Lk**2, axis=0) std = np.sqrt(var) # draw samples from the prior at our test points. L = cholesky(K_ + s * np.eye(n)) f_prior = np.dot(L, np.random.normal(size=(n, samples))) L = cholesky(K_ + s * np.eye(n) - np.dot(Lk.T, Lk)) f_post = mu + np.dot(L, np.random.normal(size=(n, samples))) # --------------------------PLOTTING-------------------------------- # PLOT PRIOR fig = plt.figure(facecolor='white') ax = fig.add_subplot(111) ax.plot(x, y, 'ko', ms=4) # Get critical values for the deciles lvls = 0.1 * np.linspace(1, 9, 9) alphas = 1 - 0.5 * lvls zs = norm.ppf(alphas) pal = pal_col[plot] cols = colors[plot] print(f_prior.shape) print(f_post.shape) # plot samples, mean and deciles mean = np.mean(f_prior, axis=1) std = np.std(f_prior, axis=1) ax.plot(xtest, f_prior, sns.xkcd_rgb[sample_col[plot]], lw=1) ax.plot(xtest, mean, sns.xkcd_rgb[cols[0]], lw=1) print(xtest.shape, mean.shape, std.shape) for z, col in zip(zs, pal): ax.fill_between(xtest.ravel(), mean - z * std, mean + z * std, color=col) plt.tick_params(labelbottom='off') plt.xlim([-8, 8]) plt.legend() plt.savefig(title + "GP prior_draws.pdf", bbox_inches='tight') # PLOT POSTERIOR plt.clf() std = np.sqrt(var) fig = plt.figure() bx = fig.add_subplot(111) bx.plot(x, y, 'ko', ms=4) print(col[0]) # plot samples, mean and deciles bx.plot(xtest, f_post, sns.xkcd_rgb[sample_col[plot]], lw=1) # bx.plot(xtest, mu, sns.xkcd_rgb[cols[0]], lw=1) print(xtest.shape, mu.shape, std.shape) mu = mu.ravel() #for z, col in zip(zs, pal): # bx.fill_between(xtest.ravel(), mu - z * std, mu + z * std, color=col) plt.tick_params(labelbottom='off') plt.xlim([-8, 8]) plt.ylim([-2, 3]) plt.legend() plt.savefig(title + "GP post_draws.pdf", bbox_inches='tight')
def sample_gpp(ker_params, x, n_samples): # x shape [nd,1] K = covariance(ker_params, x, x) + 1e-7 * np.eye(x.shape[0]) L = cholesky(K) e = rs.randn(n_samples, x.shape[0]) return np.dot(e, L.T) # [ns, nd]