def test_blocks_to_banded(T=5, D=3): """ Test blocks_to_banded correctness """ Ad = np.zeros((T, D, D)) Aod = np.zeros((T-1, D, D)) M = np.arange(1, D+1)[:, None] * 10 + np.arange(1, D+1) for t in range(T): Ad[t, :, :] = 100 * ((t+1)*10 + (t+1)) + M for t in range(T-1): Aod[t, :, :] = 100 * ((t+2)*10 + (t+1)) + M # print("Lower") # L = blocks_to_bands(Ad, Aod, lower=True) # print(L) # print("Upper") # U = blocks_to_bands(Ad, Aod, lower=False) # print(U) # Check inverse with random symmetric matrices Ad = npr.randn(T, D, D) Ad = (Ad + np.swapaxes(Ad, -1, -2)) / 2 Aod = npr.randn(T-1, D, D) Ad2, Aod2 = bands_to_blocks(blocks_to_bands(Ad, Aod, lower=True), lower=True) assert np.allclose(np.tril(Ad), np.tril(Ad2)) assert np.allclose(Aod, Aod2) Ad3, Aod3 = bands_to_blocks(blocks_to_bands(Ad, Aod, lower=False), lower=False) assert np.allclose(np.triu(Ad), np.triu(Ad3)) assert np.allclose(Aod, Aod3)
def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. if len(mat.shape) == 2: return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1, 2)) else: raise ArithmeticError
def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. if len(mat.shape) == 2: return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2)) else: raise ArithmeticError
def cost(X): Y = np.dot(X, X.T) # Shift the exponentials by the maximum value to reduce numerical # trouble due to possible overflows. s = np.triu(Y, 1).max() expY = np.exp((Y - s) / epsilon) # Zero out the diagonal np.fill_diagonal(expY, np.zeros(n)) u = np.triu(expY, 1).sum() return s + epsilon * np.log(u)
def cost(X): Y = X @ X.T # Shift the exponentials by the maximum value to reduce numerical # trouble due to possible overflows. s = np.triu(Y, 1).max() expY = np.exp((Y - s) / epsilon) # Zero out the diagonal expY -= np.diag(np.diag(expY)) u = np.triu(expY, 1).sum() return s + epsilon * np.log(u)
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): dimension = 3 # Dimension of the embedding space, i.e. R^k num_points = 24 # Points on the sphere # This value should be as close to 0 as affordable. If it is too close to # zero, optimization first becomes much slower, than simply doesn't work # anymore because of floating point overflow errors (NaN's and Inf's start # to appear). If it is too large, then log-sum-exp is a poor approximation # of the max function, and the spread will be less uniform. An okay value # seems to be 0.01 or 0.001 for example. Note that a better strategy than # using a small epsilon straightaway is to reduce epsilon bit by bit and to # warm-start subsequent optimization in that way. Trustregions will be more # appropriate for these fine tunings. epsilon = 0.0015 cost = create_cost(backend, dimension, num_points, epsilon) manifold = Elliptope(num_points, dimension) problem = pymanopt.Problem(manifold, cost) if quiet: problem.verbosity = 0 solver = ConjugateGradient(mingradnorm=1e-8, maxiter=1e5) Yopt = solver.solve(problem) if quiet: return Xopt = Yopt @ Yopt.T maxdot = np.triu(Xopt, 1).max() print("Maximum angle between any two points:", maxdot)
def KL_two_gaussians(params): d = np.shape(params)[0]-1 mu = params[0:d,0] toSigma = params[0:d,1:d+1] intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma))) Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma)) muPrior = np.zeros(d) sigmaPrior = np.identity(d) #print Sigma #print np.linalg.det(Sigma) return 1/2*(np.log(np.linalg.det(Sigma)/np.linalg.det(sigmaPrior))-d+np.trace(np.dot(np.linalg.inv(Sigma),sigmaPrior))+np.dot(np.transpose(mu-muPrior),np.dot(np.linalg.inv(Sigma),mu-muPrior)))
def loss_fun(self, y): """ Loss function for intrinsic coordinates y y: (n, dim) """ if self.D is None: self.compute_distances eps = 1e-100 D = self.D n = D.shape[0] y = y.reshape((n, self.manifold_dim)) dif = np.sqrt( np.sum((y[:, np.newaxis] - y[np.newaxis, :])**2, axis=-1) + eps) error = 1 / (D + np.eye(n)) * ( D - dif)**2 # diag prevents 1/0, not counted in triu dist = np.sum(np.triu(error, k=1)) return dist
def KL_two_gaussians(params): d = np.shape(params)[0] - 1 mu = params[0:d, 0] toSigma = params[0:d, 1:d + 1] intSigma = toSigma - np.diag(np.diag(toSigma)) + np.diag( np.exp(np.diag(toSigma))) Sigma = intSigma - np.tril(intSigma) + np.transpose(np.triu(intSigma)) muPrior = np.zeros(d) sigmaPrior = np.identity(d) #print Sigma #print np.linalg.det(Sigma) return 1 / 2 * (np.log(np.linalg.det(Sigma) / np.linalg.det(sigmaPrior)) - d + np.trace(np.dot(np.linalg.inv(Sigma), sigmaPrior)) + np.dot(np.transpose(mu - muPrior), np.dot(np.linalg.inv(Sigma), mu - muPrior)))
def rbf_kernel_median(data: np.ndarray, *args, without_two=False): """A list of RBF kernel matrices for data sets in arguments based on median heuristic""" if args is None: args = [] outs = [] for x in [data, *args]: D_squared = euclidean_distances(x, squared=True) # masking upper triangle and the diagonal. mask = np.triu(np.ones(D_squared.shape), 0) median_squared_distance = ma.median(ma.array(D_squared, mask=mask)) if without_two: kx = exp(-D_squared / median_squared_distance) else: kx = exp(-0.5 * D_squared / median_squared_distance) outs.append(kx) if len(outs) == 1: return outs[0] else: return outs
def expectation(params,y,X,eps,N,u): #for each sample of theta, calculate likelihood #likelihood has participants #for each participant, we have N particles #with L samples, n participants, N particles per participant and sample, we have #L*n*N particles #get the first column to be mu d = np.shape(X)[-1]+1 mu = params[0:d,0] toSigma = params[0:d,1:d+1] intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma))) Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma)) print mu print Sigma n = X.shape[0] E = 0 #iterate over number of samples of theta for j in range(np.shape(eps)[0]): beta = mu+np.dot(Sigma,eps[j,:]) #this log likelihood will iterate over both the participants and the particles E+=log_likelihood(beta,y,X,u[j*(n*N):(j+1)*(n*N)]) return E/len(beta)
def expectation(params, y, X, eps, N, u): #for each sample of theta, calculate likelihood #likelihood has participants #for each participant, we have N particles #with L samples, n participants, N particles per participant and sample, we have #L*n*N particles #get the first column to be mu d = np.shape(X)[-1] + 1 mu = params[0:d, 0] toSigma = params[0:d, 1:d + 1] intSigma = toSigma - np.diag(np.diag(toSigma)) + np.diag( np.exp(np.diag(toSigma))) Sigma = intSigma - np.tril(intSigma) + np.transpose(np.triu(intSigma)) print mu print Sigma n = X.shape[0] E = 0 #iterate over number of samples of theta for j in range(np.shape(eps)[0]): beta = mu + np.dot(Sigma, eps[j, :]) #this log likelihood will iterate over both the participants and the particles E += log_likelihood(beta, y, X, u[j * (n * N):(j + 1) * (n * N)]) return E / len(beta)
def load_graph_laplacian(coo, wfunc=np.ones_like): if len(coo.shape) < 2: coo = np.reshape(coo, (-1, 3)) if coo.shape[0] <= 1: return np.array([[1.0]]) coo = coo[coo[:, 0] != 0.0, :] row = coo[:, 1].astype('int') col = coo[:, 2].astype('int') data = wfunc(coo[:, 0].astype('float')) aapr = coo_matrix((data, (row, col))).todense() aa = np.triu(aapr, k=1) aa += aa.T dd = np.diagflat(np.sum(np.abs(aa), axis=-1)) ll = aa - dd #print(data) return ll
def fun(x): return to_scalar(np.triu(x, k = 2)) d_fun = lambda x : to_scalar(grad(fun)(x))
def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
s = np.triu(Y, 1).max() expY = np.exp((Y - s) / epsilon) # Zero out the diagonal np.fill_diagonal(expY, np.zeros(n)) u = np.triu(expY, 1).sum() return s + epsilon * np.log(u) problem = Problem(manifold, cost) return solver.solve(problem) if __name__ == "__main__": k = 3 # Dimension of the embedding space, i.e. R^k n = 24 # Points on the sphere # This value should be as close to 0 as affordable. If it is too close to # zero, optimization first becomes much slower, than simply doesn't work # anymore because of floating point overflow errors (NaN's and Inf's start # to appear). If it is too large, then log-sum-exp is a poor approximation # of the max function, and the spread will be less uniform. An okay value # seems to be 0.01 or 0.001 for example. Note that a better strategy than # using a small epsilon straightaway is to reduce epsilon bit by bit and to # warm-start subsequent optimization in that way. Trustregions will be more # appropriate for these fine tunings. epsilon = 0.0015 # Evaluate the maximum inner product between any two points of X. Yopt = packing_on_the_sphere(n, k, epsilon) Xopt = Yopt.dot(Yopt.T) maxdot = np.triu(Xopt, 1).max() print(maxdot)
def fun(x): return to_scalar(np.triu(x, k=2))
def _symmetrize(a): # assumes 0-diags bott = np.tril(a) + np.tril(a).T top = np.triu(a) + np.triu(a).T # return (bott+top)/2. + infs return np.fmax(bott, top)
anp.hsplit.defjvp(lambda g, ans, gvs, vs, ary, idxs: anp.hsplit(g, idxs)) anp.dsplit.defjvp(lambda g, ans, gvs, vs, ary, idxs: anp.dsplit(g, idxs)) anp.ravel.defjvp( lambda g, ans, gvs, vs, x, order=None: anp.ravel(g, order=order)) anp.expand_dims.defjvp( lambda g, ans, gvs, vs, x, axis: anp.expand_dims(g, axis)) anp.squeeze.defjvp(lambda g, ans, gvs, vs, x, axis=None: anp.squeeze(g, axis)) anp.diag.defjvp(lambda g, ans, gvs, vs, x, k=0: anp.diag(g, k)) anp.flipud.defjvp(lambda g, ans, gvs, vs, x, : anp.flipud(g)) anp.fliplr.defjvp(lambda g, ans, gvs, vs, x, : anp.fliplr(g)) anp.rot90.defjvp(lambda g, ans, gvs, vs, x, k=1: anp.rot90(g, k)) anp.trace.defjvp(lambda g, ans, gvs, vs, x, offset=0: anp.trace(g, offset)) anp.full.defjvp(lambda g, ans, gvs, vs, shape, fill_value, dtype=None: anp. full(shape, g, dtype), argnum=1) anp.triu.defjvp(lambda g, ans, gvs, vs, x, k=0: anp.triu(g, k=k)) anp.tril.defjvp(lambda g, ans, gvs, vs, x, k=0: anp.tril(g, k=k)) anp.clip.defjvp(lambda g, ans, gvs, vs, x, a_min, a_max: g * anp.logical_and( ans != a_min, ans != a_max)) anp.swapaxes.defjvp( lambda g, ans, gvs, vs, x, axis1, axis2: anp.swapaxes(g, axis1, axis2)) anp.rollaxis.defjvp( lambda g, ans, gvs, vs, a, axis, start=0: anp.rollaxis(g, axis, start)) anp.real_if_close.defjvp(lambda g, ans, gvs, vs, x: npg.match_complex(vs, g)) anp.real.defjvp(lambda g, ans, gvs, vs, x: anp.real(g)) anp.imag.defjvp(lambda g, ans, gvs, vs, x: npg.match_complex(vs, -1j * g)) anp.conj.defjvp(lambda g, ans, gvs, vs, x: anp.conj(g)) anp.angle.defjvp(lambda g, ans, gvs, vs, x: npg.match_complex( vs, g * anp.conj(x * 1j) / anp.abs(x)**2)) anp.where.defjvp(lambda g, ans, gvs, vs, c, x=None, y=None: anp.where(
def boxQP(H, g, lower, upper, x0): n = H.shape[0] clamped = np.zeros(n) free = np.ones(n) Hfree = np.zeros(n) oldvalue = 0 result = 0 nfactor = 0 clamp = lambda value: np.maximum(lower, np.minimum(upper, value)) maxIter = 100 minRelImprove = 1e-8 minGrad = 1e-8 stepDec = 0.6 minStep = 1e-22 Armijo = 0.1 if x0.shape[0] == n: x = clamp(x0) else: lu = np.array([lower, upper]) lu[np.isnan(lu)] = np.nan x = np.nanmean(lu, axis=1) value = np.dot(x.T, np.dot(H, x)) + np.dot(x.T, g) for iteration in range(maxIter): if result != 0: break if iteration > 1 and (oldvalue - value) < minRelImprove * abs(oldvalue): result = 4 logging.info("[QP info] Improvement smaller than tolerance") break oldvalue = value grad = g + np.dot(H, x) old_clamped = clamped clamped = np.zeros(n) clamped[np.logical_and(x == lower, grad > 0)] = 1 clamped[np.logical_and(x == upper, grad < 0)] = 1 free = np.logical_not(clamped) if np.all(clamped): result = 6 logging.info("[QP info] All dimensions are clamped") break if iteration == 0: factorize = True else: factorize = np.any(old_clamped != clamped) if factorize: try: if not np.all(np.allclose(H, H.T)): H = np.triu(H) Hfree = np.linalg.cholesky(H[np.ix_(free, free)]) except LinAlgError: eigs, _ = np.linalg.eig(H[np.ix_(free, free)]) print(eigs) result = -1 logging.info("[QP info] Hessian is not positive definite") break nfactor += 1 gnorm = np.linalg.norm(grad[free]) if gnorm < minGrad: result = 5 logging.info("[QP info] Gradient norm smaller than tolerance") break grad_clamped = g + np.dot(H, x*clamped) search = np.zeros(n) y = np.linalg.lstsq(Hfree.T, grad_clamped[free])[0] search[free] = -np.linalg.lstsq(Hfree, y)[0] - x[free] sdotg = np.sum(search*grad) if sdotg >= 0: print(f"[QP info] No descent direction found. Should not happen. Grad is {grad}") break # armijo linesearch step = 1 nstep = 0 xc = clamp(x + step*search) vc = np.dot(xc.T, g) + 0.5*np.dot(xc.T, np.dot(H, xc)) while (vc - oldvalue) / (step*sdotg) < Armijo: step *= stepDec nstep += 1 xc = clamp(x + step * search) vc = np.dot(xc.T, g) + 0.5 * np.dot(xc.T, np.dot(H, xc)) if step < minStep: result = 2 break # accept candidate x = xc value = vc # print(f"[QP info] Iteration {iteration}, value of the cost: {vc}") if iteration >= maxIter: result = 1 return x, result, Hfree, free
def backward(self, x, u, iter_number, lambda_): print("[INFO] Start backward pass, iteration:", iter_number) v = np.array([0.0, 0.0]) v_x = np.zeros((self.pred_time + 1, self.state_dim)) v_xx = np.zeros((self.pred_time + 1, self.state_dim, self.state_dim)) # self.v[-1] += self.lf(x_seq[-1]) v_x[-1] = self.lf_x(x[-1]) v_xx[-1] = self.lf_xx(x[-1]) k = np.zeros((self.pred_time, self.action_dim)) K = np.zeros((self.pred_time, self.action_dim, self.state_dim)) diverged_iteration = 0 for t in range(self.pred_time - 1, -1, -1): f_x_t = self.f_x(x[t], u[t]) f_u_t = self.f_u(x[t], u[t]) q_x = self.l_x(x[t], u[t]) + np.dot(f_x_t.T, v_x[t + 1]) q_u = self.l_u(x[t], u[t]) + np.dot(f_u_t.T, v_x[t + 1]) q_xx = self.l_xx(x[t], u[t]) + np.dot(np.dot(f_x_t.T, v_xx[t + 1]), f_x_t) + \ np.einsum("i,ijk->jk", v_x[t + 1], self.f_xx(x[t], u[t])) q_ux = self.l_ux(x[t], u[t]) + np.dot(np.dot(f_u_t.T, v_xx[t + 1]), f_x_t) + \ np.einsum('i,ijk->jk', v_x[t + 1], self.f_ux(x[t], u[t])) q_uu = self.l_uu(x[t], u[t]) + np.dot(np.dot(f_u_t.T, v_xx[t + 1]), f_u_t) + \ np.einsum('i,ijk->jk', v_x[t + 1], self.f_uu(x[t], u[t])) v_xx_reg = v_xx[t + 1] + (self.reg_type == 2)*lambda_*np.eye(self.state_dim) q_ux_reg = self.l_ux(x[t], u[t]) + np.dot(np.dot(f_u_t.T, v_xx_reg), f_x_t) + \ np.einsum('i,ijk->jk', v_x[t + 1], self.f_ux(x[t], u[t])) q_uu_F = self.l_uu(x[t], u[t]) + np.dot(np.dot(f_u_t.T, v_xx_reg), f_u_t) + \ np.einsum('i,ijk->jk', v_x[t + 1], self.f_uu(x[t], u[t])) + \ (self.reg_type == 1)*lambda_*np.eye(self.action_dim) #print("l_uu = ", self.l_uu(x_seq[t], u_seq[t])) #print("f_u.T V_xx f_u", np.dot(np.dot(f_u_t.T, v_xx_reg), f_u_t)) #print("V_x f_uu", np.einsum('i,ijk->jk', self.v_x[t + 1], self.f_uu(x_seq[t], u_seq[t]))) #print("lambda I", (self.reg_type == 1)*lambda_*np.eye(self.action_dim)) #print("q_uu_F", q_uu_F) if not np.all(np.allclose(q_uu_F, q_uu_F.T)): q_uu_F = np.triu(q_uu_F) if not self.apply_control_constrains: try: L = np.linalg.cholesky(q_uu_F) except LinAlgError as e: logging.warning(f"Q_uu_F is not positive-definite, Q_uu_F is {q_uu_F}, " "Q_uu_F eigenvalues are {np.linalg.eigvals(q_uu_F))}") diverged_iteration = t break kK = -np.linalg.lstsq(L, np.linalg.lstsq(L.T, np.concatenate([q_u[:, np.newaxis], q_ux_reg], axis=1))[0])[0] k_i = -kK[:, 0] K_i = -kK[:, 1:] assert k_i.shape == (self.action_dim,), f"k.shape is, {k_i.shape}" assert K_i.shape == (self.action_dim, self.state_dim), f"K.shape is, {K_i.shape}" else: lower = self.control_limits[:, 0] - u[t] upper = self.control_limits[:, 1] - u[t] k_i, result, L, free = boxQP(q_uu_F, q_u, lower, upper, x0=k[min(t+1, self.pred_time - 1), :]) #print("Norm of k_i", np.linalg.norm(k_i), "Norm of K_i:", np.linalg.norm(K)) if result < 1: print(f"[INFO] Backward pass was diverged at iteration {t}! Lambda is:", lambda_) diverged_iteration = t break K_i = np.zeros((self.action_dim, self.state_dim)) if np.any(free): Lfree = -np.linalg.lstsq(L, np.linalg.lstsq(L.T, q_ux_reg[free, :])[0])[0] K_i[free, :] = Lfree dv = np.array([np.dot(k_i.T, q_u), 0.5*np.dot(k_i.T, np.dot(q_uu, k_i))]) v += dv v_x[t] = q_x + np.dot(K_i.T, np.dot(q_uu, k_i)) + np.dot(K_i.T, q_u) + np.dot(q_ux.T, k_i) v_xx[t] = q_xx + np.dot(K_i.T, np.dot(q_uu, K_i)) + np.dot(K_i.T, q_ux) + np.dot(q_ux.T, K_i) # v_xx[t] = 0.5*(v_xx[t] + v_xx[t].T) k[t, :] = k_i K[t, :, :] = K_i return diverged_iteration, k, K, v
def fun(x): return np.triu(x, k=2)
def param_func(param, matrix): param = (anp.tril(param) if matrix.factor.lower else anp.triu(param)) return param @ param.T
def fun(x): return np.triu(x, k = 2) mat = npr.randn(5, 5)
def decompose(A): ''' Decompose homogenous affine transformation matrix `A` into parts. The parts are translations, rotations, zooms, shears. `A` can be any square matrix, but is typically shape (4,4). Decomposes A into ``T, R, Z, S``, such that, if A is shape (4,4):: Smat = np.array([[1, S[0], S[1]], [0, 1, S[2]], [0, 0, 1]]) RZS = np.dot(R, np.dot(np.diag(Z), Smat)) A = np.eye(4) A[:3,:3] = RZS A[:-1,-1] = T The order of transformations is therefore shears, followed by zooms, followed by rotations, followed by translations. The case above (A.shape == (4,4)) is the most common, and corresponds to a 3D affine, but in fact A need only be square. Parameters ---------- A : array shape (N,N) Returns ------- T : array, shape (N-1,) Translation vector R : array shape (N-1, N-1) rotation matrix Z : array, shape (N-1,) Zoom vector. May have one negative zoom to prevent need for negative determinant R matrix above S : array, shape (P,) Shear vector, such that shears fill upper triangle above diagonal to form shear matrix. P is the (N-2)th Triangular number, which happens to be 3 for a 4x4 affine. Examples -------- >>> T = [20, 30, 40] # translations >>> R = [[0, -1, 0], [1, 0, 0], [0, 0, 1]] # rotation matrix >>> Z = [2.0, 3.0, 4.0] # zooms >>> S = [0.2, 0.1, 0.3] # shears >>> # Now we make an affine matrix >>> A = np.eye(4) >>> Smat = np.array([[1, S[0], S[1]], ... [0, 1, S[2]], ... [0, 0, 1]]) >>> RZS = np.dot(R, np.dot(np.diag(Z), Smat)) >>> A[:3,:3] = RZS >>> A[:-1,-1] = T # set translations >>> Tdash, Rdash, Zdash, Sdash = decompose(A) >>> np.allclose(T, Tdash) True >>> np.allclose(R, Rdash) True >>> np.allclose(Z, Zdash) True >>> np.allclose(S, Sdash) True Notes ----- We have used a nice trick from SPM to get the shears. Let us call the starting N-1 by N-1 matrix ``RZS``, because it is the composition of the rotations on the zooms on the shears. The rotation matrix ``R`` must have the property ``np.dot(R.T, R) == np.eye(N-1)``. Thus ``np.dot(RZS.T, RZS)`` will, by the transpose rules, be equal to ``np.dot((ZS).T, (ZS))``. Because we are doing shears with the upper right part of the matrix, that means that the Cholesky decomposition of ``np.dot(RZS.T, RZS)`` will give us our ``ZS`` matrix, from which we take the zooms from the diagonal, and the shear values from the off-diagonal elements. ''' A = np.asarray(A) T = A[:-1, -1] RZS = A[:-1, :-1] ZS = np.linalg.cholesky(np.dot(RZS.T, RZS)).T Z = np.diag(ZS).copy() shears = ZS / Z[:, np.newaxis] n = len(Z) S = shears[np.triu(np.ones((n, n)), 1).astype(bool)] R = np.dot(RZS, np.linalg.inv(ZS)) if np.linalg.det(R) < 0: Z[0] *= -1 ZS[0] *= -1 R = np.dot(RZS, np.linalg.inv(ZS)) return T, R, Z, S