def mat_cosine_dist(X, Y): prod = np.diagonal(np.dot(X, Y.T), offset=0, axis1=-1, axis2=-2) len1 = np.sqrt(np.diagonal(np.dot(X, X.T), offset=0, axis1=-1, axis2=-2)) len2 = np.sqrt(np.diagonal(np.dot(Y, Y.T), offset=0, axis1=-1, axis2=-2)) return np.divide(np.divide(prod, len1), len2)
def predict(self, x): pys = np.zeros((self.outdim, x.shape[1])) ps2s = np.zeros((self.outdim, x.shape[1])) pys[0], ps2 = self.main_function.predict(x) ps2s[0] = np.diagonal(ps2) for i in range(1, self.outdim): pys[i], ps2 = self.constr_list[i-1].predict(x) ps2s[i] = np.diagonal(ps2) return pys, ps2s
def logZ(natparam): neghalfJ, h, a, b = unpack_dense(natparam) J = -2*neghalfJ L = np.linalg.cholesky(J) return 1./2 * np.sum(h * np.linalg.solve(J, h)) \ - np.sum(np.log(np.diagonal(L, axis1=-1, axis2=-2))) \ + np.sum(a + b)
def diag_inv(X): """ returns diagonal matrix with reciprocal of diagonal of X """ return np.eye( X.shape[0]) * (1 / np.diagonal(X, offset=0, axis1=-1, axis2=-2))
def logZ(natparam): J, h = natparam[:2] J = -2 * J L = np.linalg.cholesky(J) return 1./2 * np.sum(h * np.linalg.solve(J, h)) \ - np.sum(np.log(np.diagonal(L, axis1=-1, axis2=-2))) \ - sum(map(np.sum, natparam[2:]))
def predict(self,x): pys = np.zeros((self.outdim,x.shape[1])) ps2s = np.zeros((self.outdim,x.shape[1])) for i in range(self.outdim): pys[i], ps2 = self.model[i].predict(x) ps2s[i] = np.diagonal(ps2) return pys, ps2s
def cost(X): mu = 0.132 global D2 global V1 global V2 global Cor1 global Cor2 global k_ coup = (np.linalg.norm(Cor1.T @ V1[:, 0:k_] - Cor2.T @ V2[:, 0:k_] @ X, 'fro'))**2 res = (X.T @ np.diag(D2[0:k_]) @ X)**2 diag_res = np.diagonal(res, offset=0, axis1=-1, axis2=-2) diag_res = np.sum(diag_res) sumres = np.sum(res) val = sumres - diag_res #val=np.linalg.norm(X.T @ diag2 @ X - diag2, 'fro') ** 2 # print(coup) res = val + mu * coup return res
def logZ(natparam): J, h = natparam[:2] J = -2*J L = np.linalg.cholesky(J) return 1./2 * np.sum(h * np.linalg.solve(J, h)) \ - np.sum(np.log(np.diagonal(L, axis1=-1, axis2=-2))) \ - sum(map(np.sum, natparam[2:]))
def _unvectorize_symmetric_matrix(vec_val): ld_mat = _unvectorize_ld_matrix(vec_val) mat_val = ld_mat + ld_mat.transpose() # We have double counted the diagonal. For some reason the autograd # diagonal functions require axis1=-1 and axis2=-2 mat_val = mat_val - \ np.make_diagonal(np.diagonal(ld_mat, axis1=-1, axis2=-2), axis1=-1, axis2=-2) return mat_val
def compute_stats(Ex, ExxT, ExnxT, inhomog): T = Ex.shape[-1] E_init_stats = ExxT[:,:,0], Ex[:,0], 1., 1. E_pair_stats = np.transpose(ExxT, (2, 0, 1))[:-1], \ ExnxT.T, np.transpose(ExxT, (2, 0, 1))[1:], np.ones(T-1) E_node_stats = np.diagonal(ExxT.T, axis1=-1, axis2=-2), Ex.T, np.ones(T) if not inhomog: E_pair_stats = map(lambda x: np.sum(x, axis=0), E_pair_stats) return E_init_stats, E_pair_stats, E_node_stats
def mvnlogpdf(x, mu, L): """ not really logpdf. we need to use the weights to keep track of normalizing factors that differ across clusters L cholesky decomposition of covariance matrix """ D = L.shape[0] logdet = 2 * np.sum(np.log(np.diagonal(L))) quad = np.inner(x - mu, solve(L.T, solve(L, (x - mu)))) return -0.5 *(D * np.log(2 * np.pi) + logdet + quad)
def _square_dist(self, X, basis=None): if basis is None: n, d = X.shape dist = np.matmul(X, X.T) diag_dist = np.outer(np.diagonal(dist), np.ones([1, n])) dist = diag_dist + diag_dist.T - 2 * dist else: n_x, d = X.shape n_y, d = basis.shape dist = -2 * np.matmul(X, basis.T) + np.outer( np.sum(np.square(X), axis=1), np.ones([1, n_y])) + np.outer( np.ones([n_x, 1]), np.sum(np.square(basis), axis=1)) return dist
def _compute_inverse_coefficient_innovation(self, k, mu_bar, P_bar): Rbar = self._R[k - 1] + np.kron(mu_bar.T @ self._V[k - 1] @ mu_bar, np.eye(self._d)) if np.all(Rbar == np.diag(np.diagonal(Rbar))) and Rbar.sum() > 0: Ri = np.diag(1 / np.diag(Rbar)) RiC = Ri @ self._C[k - 1] Pi = np.linalg.inv(P_bar) PiCRiC = Pi + self._C[k - 1].T @ RiC Skinv = Ri - RiC @ np.linalg.inv(PiCRiC) @ RiC.T else: Sk = self._C[k - 1] @ P_bar @ self._C[k - 1].T + Rbar Skinv = np.linalg.inv(Sk) return Skinv
def predict(self, test_x): log_sn = self.theta[0] log_sp = self.theta[1] log_lscales = self.theta[2:2+self.dim] w = self.theta[2+self.dim:] sn = np.exp(log_sn) sn2 = np.exp(2*log_sn) sp = np.exp(log_sp) sp2 = np.exp(2*log_sp) Phi_test = self.calc_Phi(w, scale_x(test_x, log_lscales)) py = self.mean + Phi_test.T.dot(self.alpha) ps2 = sn2 + sn2 * np.diagonal(Phi_test.T.dot(chol_solve(self.LA, Phi_test))); return py, ps2
def score_estimator(alpha, m, x, K, alphaz, S=100): """ Form score function estimator based on samples lmbda. """ N = x.shape[0] if x.ndim == 1: D = 1 else: D = x.shape[1] num_z = N * np.sum(K) L = K.shape[0] gradient = np.zeros((alpha.shape[0], 2)) f = np.zeros((2 * S, alpha.shape[0], 2)) h = np.zeros((2 * S, alpha.shape[0], 2)) for s in range(2 * S): lmbda = npr.gamma(alpha, 1.) lmbda[lmbda < 1e-300] = 1e-300 zw = m * lmbda / alpha lQ = logQ(zw, alpha, m) gradLQ = grad_logQ(zw, alpha, m) lP = logp(zw, K, x, alphaz) temp = lP - np.sum(lQ) f[s, :, :] = temp * gradLQ h[s, :, :] = gradLQ # CV covFH = np.zeros((alpha.shape[0], 2)) covFH[:, 0] = np.diagonal( np.cov(f[S:, :, 0], h[S:, :, 0], rowvar=False)[:alpha.shape[0], alpha.shape[0]:]) covFH[:, 1] = np.diagonal( np.cov(f[S:, :, 1], h[S:, :, 1], rowvar=False)[:alpha.shape[0], alpha.shape[0]:]) a = covFH / np.var(h[S:, :, :], axis=0) return np.mean(f[:S, :, :], axis=0) - a * np.mean(h[:S, :, :], axis=0)
def create_degeneracy_data(model, delta_beta=None, points=5000): if delta_beta == None: try: DB = np.sqrt(np.diagonal(model.inv_fisher))[-1] except: print( "Issue with fisher - please calculate fisher before calling this function" ) return 0 else: DB = delta_beta x = np.linspace(10e-5 * model.DL / 2, .9999999 * model.DL / 2, points) y = degeneracy_function_lambda(model, DB, x) return x, y
def cost(X): mu = 0.132 # mu=100.0 # graph_name='CA-GrQc' # q = 100 # k = 20 # k_=20 # # plt.imshow(A) # # plt.show() # # plt.imshow(B) # # plt.show() # t = np.linspace(1, 50, q) # V1=np.load('eigens_small/'+ graph_name +'/'+ graph_name+ '_evectors_orig.npy') # V2 = np.load('eigens_small/'+ graph_name +'/5/evectors_2.npy') # # n=np.shape(V1)[0] # # D1=np.load('eigens_small/'+ graph_name +'/'+ graph_name+ '_evalues_orig.npy') # D2=np.load('eigens_small/'+ graph_name +'/5/evalues_2.npy') # # #Cor1 = fu.calc_corresponding_functions(n, q, t, D1, V1) # #Cor2 = fu.calc_corresponding_functions(n, q, t, D2, V2) # Cor1= np.load('zwischenspeicher/Cor1.npy') # Cor2=np.load('zwischenspeicher/Cor2.npy') # # diag2=np.diag(D2[0:k_]) global D1 global D2 global V1 global V2 global Cor1 global Cor2 global k_ # print(V1[:,0:k_]) coup = (np.linalg.norm(Cor1.T@V1[:, 0:k_]-Cor2.T@V2[:, 0:k_]@X, 'fro'))**2 #print('coup: %f' %(coup)) res = (X.T @ np.diag(D2[0:k_]) @ X) ** 2 diag_res = np.diagonal(res, offset=0, axis1=-1, axis2=-2) diag_res = np.sum(diag_res) sumres = np.sum(res) val = sumres - diag_res # print('val: %f' %val) #val=np.linalg.norm(X.T @ diag2 @ X - diag2, 'fro') ** 2 # print(coup) res = val+mu*coup # print(res) return res
def log_observed_spikes(N, mu, Sig): """ log probability of observed spikes given eta N: (T, U) mu: (T, U) Sig: (T, U, U) or (T, U) """ out = N * mu if len(Sig.shape) == 3: out += -np.exp(mu + 0.5 * np.diagonal(Sig, 0, 1, 2)) else: out += -np.exp(mu + 0.5 * Sig) return np.sum(out)
def square_corrcoeff_full_cost(V, X, grad=True): ''' The cost function for the correlation analysis. This effectively measures the square difference in correlation coefficients after transforming to an orthonormal basis given by V. Args: V: 2D array of shape (N, K) with V.T * V = I X: 2D array of shape (P, N) containing centers of P manifolds in an N=P-1 dimensional orthonormal basis ''' # Verify that the shapes are correct P, N = X.shape N_v, K = V.shape assert N_v == N # Calculate the cost C = np.matmul(X, X.T) c = np.matmul(X, V) c0 = np.diagonal(C).reshape(P, 1) - np.sum( np.square(c), axis=1, keepdims=True) Fmn = np.square(C - np.matmul(c, c.T)) / np.matmul(c0, c0.T) cost = np.sum(Fmn) / 2 if grad is False: # skip gradient calc since not needed, or autograd is used gradient = None else: # Calculate the gradient X1 = np.reshape(X, [1, P, N, 1]) X2 = np.reshape(X, [P, 1, N, 1]) C1 = np.reshape(c, [P, 1, 1, K]) C2 = np.reshape(c, [1, P, 1, K]) # Sum the terms in the gradient PF1 = ((C - np.matmul(c, c.T)) / np.matmul(c0, c0.T)).reshape( P, P, 1, 1) PF2 = (np.square(C - np.matmul(c, c.T)) / np.square(np.matmul(c0, c0.T))).reshape(P, P, 1, 1) Gmni = -PF1 * C1 * X1 Gmni += -PF1 * C2 * X2 Gmni += PF2 * c0.reshape(P, 1, 1, 1) * C2 * X1 Gmni += PF2 * (c0.T).reshape(1, P, 1, 1) * C1 * X2 gradient = np.sum(Gmni, axis=(0, 1)) return cost, gradient
def leapfrog_friction(M, C, V, q, p, dVdq, path_len, step_size): """Leapfrog integrator for Stochastic Gradient Hamiltonian Monte Carlo. Includes friction term per https://arxiv.org/abs/1402.4102 Parameters ---------- M : np.matrix Mass of the Euclidean-Gaussian kinetic energy of shape D x D C : matrix Upper bound parameter for friction term V: matrix Covariance of the stochastic gradient noise, such that B = V/2 q : np.floatX Initial position p : np.floatX Initial momentum dVdq : callable Gradient of the velocity path_len : float How long to integrate for step_size : float How long each integration step should be Returns ------- q, p : np.floatX, np.floatX New position and momentum """ q, p = np.copy(q), np.copy(p) Minv = np.linalg.inv(M) B = 0.5 * V D = np.sqrt(2 * (C - B)) D = np.diagonal(D) p -= step_size * (dVdq(q) + np.dot(C, np.dot(Minv, p)) - np.random.normal() * D) / 2 # half step for _ in range(int(path_len / step_size) - 1): q += step_size * np.dot(Minv, p) # whole step p -= step_size * (dVdq(q) + np.dot(C, np.dot(Minv, p)) - np.random.normal() * D) # whole step q += step_size * np.dot(Minv, p) # whole steps p -= step_size * (dVdq(q) + np.dot(C, np.dot(Minv, p)) - np.random.normal() * D) / 2 # half step # momentum flip at end return q, -p
def log_bottleneck_variables(tau, mu_eta, Sig_eta, mu_a, sig_a, mu_b, Sig_b, X, mu_c, Sig_c, xi): """ E[log p(eta)] where eta_{.t} ~ MvNormal(m, S) m_{ut} = a_u + \sum_r b_{ru} x_{tr} + \sum_k c_{ku} z_{tk} """ T, R = X.shape xi1 = xi[:, 1, :] out = np.einsum('u, tuu ->', tau, Sig_eta) out += np.sum(tau * (mu_eta - mu_a - np.dot(X, mu_b.T) - np.dot(xi1, mu_c.T))**2) out += T * np.sum(tau * sig_a) out += np.einsum('u,tr,ts,urs', tau, X, X, Sig_b) out += np.einsum('u,tk,tj,ukj', tau, xi1, xi1, Sig_c) v = xi1 * (1 - xi1) W = np.diagonal(Sig_c, axis1=1, axis2=2) + mu_c**2 out += np.einsum('u,tk,uk->', tau, v, W) out += np.sum(np.linalg.slogdet(Sig_eta)[1]) return -0.5 * out
def get_node_stats(gaussian_stats): ExxT, Ex, En, En = gaussian_stats return np.diagonal(ExxT, axis1=-1, axis2=-2), Ex, En
def fun(D): return to_scalar(np.diagonal(D, axis1=-1, axis2=-2))
logmargres2 = [model1bopt.fun, model2bopt.fun, model3bopt.fun] plt.bar(x, logmargres2 / np.sum(logmargres2)) # In[32]: plt.scatter(x2, y2) # Calculating posterior mean of the weights for D2 # In[24]: a2h, sig2h = np.exp(model1bopt.x) p1 = 6 invpostcov = 1 / a2h * np.identity(p1) + 1 / sig2h * np.dot(phi21.T, phi21) postcov = np.linalg.inv(invpostcov) postvar = np.diagonal(postcov) postmean = (1 / sig2h) * np.dot(postcov, np.dot(phi21.T, y2)) print("posterior mean:", postmean) print("posterior variance:", postvar) # In[30]: a2h, sig2h = np.exp(model2bopt.x[0:2]) p2 = 2 phi22 = phi_2(x2, model2bopt.x[2:4]) invpostcov = 1 / a2h * np.identity(p2) + 1 / sig2h * np.dot(phi22.T, phi22) postcov = np.linalg.inv(invpostcov) postvar = np.diagonal(postcov) postmean = (1 / sig2h) * np.dot(postcov, np.dot(phi22.T, y2)) print("posterior mean:", postmean) print("posterior variance:", postvar)
def lds_logZ(Y, A, C, Q, R, mu0, Q0): """ Log-partition function computed via Kalman filter that broadcasts over the first dimension. Note: This function doesn't handle control inputs (yet). Y : ndarray, shape=(N, T, D) Observations A : ndarray, shape=(T, D, D) Time-varying dynamics matrices C : ndarray, shape=(p, D) Observation matrix mu0: ndarray, shape=(D,) mean of initial state variable Q0 : ndarray, shape=(D, D) Covariance of initial state variable Q : ndarray, shape=(T, D, D) Covariance of latent states R : ndarray, shape=(T, D, D) Covariance of observations """ N = Y.shape[0] T, D, _ = A.shape p = C.shape[0] mu_predict = np.stack([mu0 for _ in range(N)], axis=0) sigma_predict = np.stack([Q0 for _ in range(N)], axis=0) mus_filt = np.zeros((N, D)) sigmas_filt = np.zeros((N, D, D)) ll = 0. for t in range(T): # condition #sigma_pred = dot3(C, sigma_predict, C.T) + R tmp1 = einsum2('ik,nkj->nij', C, sigma_predict) sigma_y = einsum2('nik,jk->nij', tmp1, C) + R sigma_y = sym(sigma_y) L = np.linalg.cholesky(sigma_y) # res[n] = Y[n,t,:] = np.dot(C, mu_predict[n]) # the transpose works b/c of how dot broadcasts res = Y[..., t, :] - einsum2('ik,nk->ni', C, mu_predict) v = solve_triangular(L, res, lower=True) # log-likelihood over all trials ll += (-0.5 * np.sum(v * v) - np.sum(np.log(np.diagonal(L, axis1=-1, axis2=-2))) - p / 2. * np.log(2. * np.pi)) #mus_filt = mu_predict + np.dot(tmp1, solve_triangular(L, v, 'T')) mus_filt = mu_predict + einsum2( 'nki,nk->ni', tmp1, solve_triangular(L, v, trans='T', lower=True)) tmp2 = solve_triangular(L, tmp1, lower=True) #sigmas_filt = sigma_predict - np.dot(tmp2, tmp2.T) sigmas_filt = sigma_predict - einsum2('nki,nkj->nij', tmp2, tmp2) sigmas_filt = sym(sigmas_filt) # prediction #mu_predict = np.dot(A[t], mus_filt[t]) mu_predict = einsum2('ik,nk->ni', A[t], mus_filt) #sigma_predict = dot3(A[t], sigmas_filt[t], A[t].T) + Q[t] sigma_predict = einsum2('ik,nkl->nil', A[t], sigmas_filt) #sigma_predict = einsum2('nil,jl->nij', sigma_predict, A[t]) + Q[t] sigma_predict = einsum2('nil,jl->nij', sigma_predict, A[t]) + Q sigma_predict = sym(sigma_predict) return ll
def diag_inv(y): """ returns diagonal matrix with reciprocal of diagonal of y """ return np.diag(1 / np.diagonal(y))
def fun(D): return np.diagonal(D, axis1=-1, axis2=-2)
"tau": tau, "Xcif": Xcif, "Y": Y, "E": E, "r": r, "D": D, "W": W, "M": M } # Note: log distance (plus 1) ### TEST B ESTIMATOR ### m = M / np.ones_like(tau) m = m.T m_diag = np.diagonal(m) m_frac = m / m_diag # m = np.diag(M) # sigma_epsilon = .1 # epsilon = np.reshape(np.random.normal(0, sigma_epsilon, N ** 2), (N, N)) # np.fill_diagonal(epsilon, 0) theta_dict = dict() # theta_dict["b"] = b theta_dict["alpha"] = .5 theta_dict["c_hat"] = .2 theta_dict["sigma_epsilon"] = 1 theta_dict["gamma"] = .1 imp.reload(policies)
"W": W, "M": M, "ccodes": ccodes } # Note: log distance # v = np.ones(N) # v = np.array([1.08, 1.65, 1.61, 1.05, 1.05, 1.30]) # v = np.repeat(1.4, N) # TODO try just running inner loop, problem is that values of v change with theta as well, no reason we should run theta until covergence rather than iterating on v first. imp.reload(policies) imp.reload(economy) pecmy = policies.policies(data, params, ROWname) pecmy.W m_diag = np.diagonal(pecmy.m) m_frac = pecmy.m / m_diag m_frac[:, N - 1] tau_min_mat = copy.deepcopy(pecmy.ecmy.tau) np.fill_diagonal(tau_min_mat, 5) theta_dict = dict() theta_dict["eta"] = 1. theta_dict["c_hat"] = 25. theta_dict["alpha1"] = 0. theta_dict["alpha2"] = 0. theta_dict["gamma"] = 0. theta_dict["C"] = np.repeat(25., pecmy.N) theta_x = pecmy.unwrap_theta(theta_dict)
def hess_k(ws, fdensity, alpha, sig, psf_k): print('hess_k begin') mo = np.exp(-4.) ws = real_to_complex(ws) ws = ws.reshape((n_grid, n_grid)) ws = np.real(fft.ifft2(ws)) #calc l1 we only get diagonals here l1 = -1 * (psf_k**2 / sig_noise**2 / n_grid**2).flatten() #calc l2, the hessian of the prior is messy xsi = (1. - fdensity) * gaussian(np.log(ws), loc=np.log( mo), scale=sig) / ws + fdensity * (ws**alpha / w_norm) dxsi = -1 * gaussian(np.log(ws), loc=np.log(mo), scale=sig) * ( 1. - fdensity) / ws**2 - (1. - fdensity) * np.log(ws / mo) * np.exp( -np.log(ws / mo)**2 / 2 / sig**2) / np.sqrt( 2 * np.pi) / ws**2 / sig**3 + fdensity * alpha * ws**( alpha - 1) / w_norm dxsi_st = -1 * gaussian(np.log(ws), loc=np.log(mo), scale=sig) * ( 1. - fdensity) / ws**2 - (1. - fdensity) * np.log(ws / mo) * np.exp( -np.log(ws / mo)**2 / 2 / sig**2) / np.sqrt( 2 * np.pi) / ws**2 / sig**3 ddxsi_st = -1 * dxsi_st / ws - dxsi_st * np.log(ws / mo) / ws / sig**2 - ( 1. - fdensity) * (1 / np.sqrt(2 * np.pi) / sig) * np.exp( -np.log(ws / mo)**2 / 2 / sig**2) * (1 / sig**2 - np.log(ws / mo) / sig**2 - 1) / ws**3 ddxsi = ddxsi_st + fdensity * alpha * (alpha - 1) * ws**(alpha - 2) / w_norm l2 = -1 * (dxsi / xsi)**2 + ddxsi / np.absolute(xsi) #this is the hessian of the prior wrt m_x, not m_k l2_k = fft.ifft2(l2).flatten() / n_grid**2 #we assume that hessian of l2 is diagonal. Under assumption k = -k', then we only get the zeroth element along the diag #lets fill the entire matrix and see whats up; hess_m = np.zeros((n_grid**2, n_grid**2), dtype=complex) hess_l1 = np.zeros((n_grid**2, n_grid**2), dtype=complex) np.fill_diagonal(hess_l1, l1) off = [] #print(l2_k[0]); for i in range(0, n_grid**2): for j in range(0, n_grid**2): hess_m[i, j] = l2_k[int(np.absolute(i - j))] #check the off diagonals to make sure they are small if i != j: off.append(l2_k[int(np.absolute(i - j))]) hess_m = hess_l1 + hess_m ''' print('Sigma Real is:'); print(np.std(np.real(off))); print('Simga Imag is:'); print(np.std(np.imag(off))); fig, ax = plt.subplots(1,2) ax[0].imshow(np.real(hess_m)); ax[0].set_title('Real Hessian') #ax[1].imshow(data3[:-4,:-4]); ax[1].imshow(np.imag(hess_m)); ax[1].set_title('Imaginary Hessian') plt.show(); ''' l_tot = np.diagonal(hess_m) l_minr = min(np.real(l_tot)) l_mini = min(np.imag(l_tot)) #print(l_tot-l1); if l_minr < 0: l_tot = l_tot - l_minr + 0.1 if l_mini < 0: l_tot = l_tot - 1j * (l_mini + 0.1) ''' print('diag is:'); print(l2_k[0]); print('other is:'); print(l1); ''' ''' hess_m = np.zeros((n_grid**2,n_grid**2)); np.fill_diagonal(hess_m,l_tot); return hess_m; ''' #return l1,l2_k[0]; l_tot = complex_to_real(l_tot) #print('hess is'); #print(l_tot); return l_tot
def diag_extract(a: Numeric): return anp.diagonal(a, axis1=-2, axis2=-1)