def inverse(params, state, log_pz, z, condition, **kwargs): # Passing back through the network, we just need to sample from N(x|Az,Sigma). # Assume we have already sampled z ~ N(0,I) A, Sigma_chol_flat = params # Compute Az if(z.ndim == 1): x = jnp.einsum('ij,j->i', A, z) elif(z.ndim == 2): x = jnp.einsum('ij,bj->bi', A, z) else: assert 0, 'Got an invalid shape. z.shape: %s'%(str(z.shape)) Sigma_chol = Sigma_chol_flat[triangular_indices] diag = jnp.diag(Sigma_chol) Sigma_chol = index_update(Sigma_chol, jnp.diag_indices(Sigma_chol.shape[0]), jnp.exp(diag)) key = kwargs.pop('key', None) if(key is not None): sigma = kwargs.get('sigma', 1.0) noise = random.normal(key, x.shape)*sigma x += jnp.dot(noise, Sigma_chol.T) else: noise = jnp.zeros_like(x) # Compute N(x|Az+b, Sigma) # log_px = util.gaussian_diag_cov_logpdf(noise, jnp.zeros_like(noise), log_diag_cov) return log_pz, x, state
def forward(params, state, log_px, x, condition, **kwargs): A, Sigma_chol_flat = params x_dim, z_dim = A.shape # Need to make the diagonal positive Sigma_chol = Sigma_chol_flat[triangular_indices] diag = jnp.diag(Sigma_chol) Sigma_chol = index_update(Sigma_chol, jnp.diag_indices(Sigma_chol.shape[0]), jnp.exp(diag)) # In case we want to change the noise model sigma = kwargs.get('sigma', 1.0) Sigma_chol = sigma*Sigma_chol Sigma_inv_A = jax.scipy.linalg.cho_solve((Sigma_chol, True), A) ATSA = jnp.eye(z_dim) + A.T@Sigma_inv_A ATSA_inv = jnp.linalg.inv(ATSA) if(x.ndim == 1): z = jnp.einsum('ij,j->i', ATSA_inv@Sigma_inv_A.T, x) x_proj = jnp.einsum('ij,j->i', Sigma_inv_A, z) a = util.upper_cho_solve(Sigma_chol, x) elif(x.ndim == 2): z = jnp.einsum('ij,bj->bi', ATSA_inv@Sigma_inv_A.T, x) x_proj = jnp.einsum('ij,bj->bi', Sigma_inv_A, z) a = vmap(partial(util.upper_cho_solve, Sigma_chol))(x) else: assert 0, 'Got an invalid shape. x.shape: %s'%(str(x.shape)) log_hx = -0.5*jnp.sum(x*(a - x_proj), axis=-1) log_hx -= 0.5*jnp.linalg.slogdet(ATSA)[1] log_hx -= diag.sum() log_hx -= 0.5*x_dim*jnp.log(2*jnp.pi) return log_px + log_hx, z, state
def make_cholesky_factor(l_param: np.ndarray) -> np.ndarray: """Get the actual cholesky factor from our parameterization of L.""" lmask = np.tri(l_param.shape[0]) lmask = index_update(lmask, (0, 0), 0) tmp = l_param * lmask idx = np.diag_indices(l_param.shape[0]) return index_update(tmp, idx, np.exp(tmp[idx]))
def chol_sample(key, d): idx_u = jnp.triu_indices(d) idx_d = jnp.diag_indices(d) L = random.normal(key, (d, d), dtype=jnp.float64) L = ops.index_update(L, idx_u, 0.0) L = ops.index_update(L, idx_d, random.normal(key, (d, ))**2) return L
def f_Coulomb_Matrix(x): z_atoms = jnp.array([7., 1., 1., 1.]) z_diag = 0.5 * z_atoms**2.4 M = jnp.multiply(z_atoms[:, None], z_atoms[None, :]) M = M.at[jnp.diag_indices(self.n_atoms)].set(z_diag) x = jnp.reshape(x, (self.n_atoms, 3)) r = x[:, None] - x[None, :] r = jnp.asarray(r) i0 = jnp.diag_indices(self.n_atoms, 2) r = r.at[i0].set(1.) r = jnp.linalg.norm(r, axis=2) r = 1. / r Z = jnp.multiply(M, r) # i0 = jnp.triu_indices(self.n_atoms,0) # Z[i0] return Z.ravel()
def CM_full_unsorted_matrix(Z, R, N=0, size=23): ''' Calculates unsorted coulomb matrix Parameters ---------- Z : 1 x n dimensional array contains nuclear charges R : 3 x n dimensional array contains nuclear positions Return ------ D : 2D array (matrix) Full Coulomb Matrix, dim(Z)xdim(Z) ''' nick_version = False n = Z.shape[0] D = jnp.zeros((size, size)) if nick_version: # calculate distances between atoms dr = R[:, None] - R distances = jnp.linalg.norm(dr, axis=2) # compute Zi*Zj matrix charge_matrix = jnp.outer(Z, Z) # returns i,i indexes (of diagonal elements) diagonal_idx = jnp.diag_indices(n, ndim=2) charge_matrix = ops.index_update(charge_matrix, diagonal_idx, 0.5 * Z**2.4) # fix diagonal elements to 1 in distance matrix distances = ops.index_update(distances, diagonal_idx, 1.0) #compute cm by dividing charge matrix by distance matrix cm_matrix = jnp.asarray(charge_matrix / distances) return (cm_matrix) #indexes need to be adapted to whatever form comes from xyz files for i in range(n): Zi = Z[i] D = ops.index_update(D, (i, i), Zi**(2.4) / 2) for j in range(n): if j != i: Zj = Z[j] Ri = R[i, :] Rj = R[j, :] distance = jnp.linalg.norm(Ri - Rj) D = ops.index_update(D, (i, j), Zi * Zj / (distance)) return (D)
def add_jitter(kern, jitter=DEFAULT_JITTER, diag_only=False): if diag_only: kern = kern + jitter else: kern = index_add( kern, jnp.diag_indices(min(kern.shape[0], kern.shape[1])), jitter ) return kern
def unpack_triu(x, n, hermi=0): R = np.zeros([n, n]) idx = np.triu_indices(n) R = jax.ops.index_update(R, idx, x) if hermi == 0: return R elif hermi == 1: R = R + R.conj().T R = jax.ops.index_mul(R, np.diag_indices(n), 0.5) return R elif hermi == 2: return R - R.conj().T else: raise KeyError
def chain(key, p): k1, k2 = random.split(key) idx_d = jnp.diag_indices(p) idx_n = ([], []) cliques = [] for i in range(p - 1): cliques.append([i, i + 1]) setidx(idx_n, i, i + 1) U = jnp.zeros((p, p)) U = ops.index_update(U, idx_d, random.normal(k1, (p, ))**2) U = ops.index_update(U, idx_n, random.normal(k2, (p - 1, ))) return cliques, U @ U.T
def mmd2_estimate(x, y, sigma=0.2): N, M = x.shape[0], y.shape[0] k = vmap(vmap(partial(kernel, sigma=sigma), in_axes=(0, None)), in_axes=(None, 0)) kxy = k(x, y) kxx = k(x, x) kyy = k(y, y) diag_idx = jax.ops.index[jnp.diag_indices(x.shape[0])] kxx_no_diag = jax.ops.index_update(kxx, diag_idx, 0.0) term1 = (kxx.sum() - jnp.diag(kxx).sum()) / (N * (N - 1)) term2 = (kyy.sum() - jnp.diag(kyy).sum()) / (M * (M - 1)) mmd2 = term1 + term2 - 2 * kxy.mean() return mmd2
def wrapped(theta, X, Y): """Compute normalized kernel matrix and do chain rule.""" kmatrix, grads = kernel_matrix_with_grad( pure_kernel_fn, theta, X, Y) if Y is None: diag = np.diag(kmatrix) grad_diag_indices = np.diag_indices(kmatrix.shape[0]) diag_grad = grads[grad_diag_indices] normalizer = np.sqrt(diag[:, None] * diag[None, :]) # Add dimensions for broadcasting K_xx = diag[:, None, None] K_yy = diag[None, :, None] K_xx_grad = diag_grad[:, None, :] K_yy_grad = diag_grad[None, :, :] # Do the chain rule grads = ((2 * K_xx * K_yy * grads - kmatrix * (K_xx_grad * K_yy + K_xx * K_yy_grad)) / (2 * (K_xx * K_yy)**(3 / 2))) return kmatrix / normalizer, grads else: # If y is not defined we need to compute the self # similarity of each instance kernel_fn_with_grad = partial( value_and_grad(pure_kernel_fn), theta) K_xx, K_xx_grad = vmap( lambda x: kernel_fn_with_grad(x, x))(X) K_yy, K_yy_grad = vmap( lambda y: kernel_fn_with_grad(y, y))(Y) # Add dimensions for broadcasting K_xx = K_xx[:, None, None] K_yy = K_yy[None, :, None] K_xx_grad = K_xx_grad[:, None, :] K_yy_grad = K_yy_grad[None, :, :] normalizer = np.sqrt(K_xx[:, None] * K_yy[None, :]) # d/dw(k(x, y, w)/sqrt(k(x, x, w) k(y, y, w))) = (2 # k(x, x, w) k(y, y, w) k^(0, 0, 1)(x, y, w) - k(x, y, # w) (k^(0, 0, 1)(x, x, w) k(y, y, w) + k(x, x, w) # k^(0, 0, 1)(y, y, w)))/(2 (k(x, x, w) k(y, y, # w))^(3/2)) grads = ((2 * K_xx * K_yy * grads - kmatrix * (K_xx_grad * K_yy + K_xx * K_yy_grad)) / (2 * (K_xx * K_yy)**(3 / 2))) return kmatrix / normalizer, grads
def _kernel_matrix_without_gradients(kernel_fn, theta, X, Y): kernel_fn = partial(kernel_fn, theta) if Y is None or (Y is X): if config_value('KERNEL_MATRIX_USE_LOOP'): n = len(X) with loops.Scope() as s: # s.scattered_values = np.empty((n, n)) s.index1, s.index2 = np.tril_indices(n, k=0) s.output = np.empty(len(s.index1)) for i in s.range(s.index1.shape[0]): i1, i2 = s.index1[i], s.index2[i] s.output = ops.index_update(s.output, i, kernel_fn(X[i1], X[i2])) first_update = ops.index_update(np.empty((n, n)), (s.index1, s.index2), s.output) second_update = ops.index_update(first_update, (s.index2, s.index1), s.output) return second_update else: n = len(X) values_scattered = np.empty((n, n)) index1, index2 = np.tril_indices(n, k=-1) inst1, inst2 = X[index1], X[index2] values = vmap(kernel_fn)(inst1, inst2) values_scattered = ops.index_update(values_scattered, (index1, index2), values) values_scattered = ops.index_update(values_scattered, (index2, index1), values) values_scattered = ops.index_update( values_scattered, np.diag_indices(n), vmap(lambda x: kernel_fn(x, x))(X)) return values_scattered else: if config_value('KERNEL_MATRIX_USE_LOOP'): with loops.Scope() as s: s.output = np.empty((X.shape[0], Y.shape[0])) for i in s.range(X.shape[0]): x = X[i] s.output = ops.index_update( s.output, i, vmap(lambda y: kernel_fn(x, y))(Y)) return s.output else: return vmap(lambda x: vmap(lambda y: kernel_fn(x, y))(Y))(X)
def _kernel_matrix_with_gradients(kernel_fn, theta, X, Y): kernel_fn = value_and_grad(kernel_fn) kernel_fn = partial(kernel_fn, theta) if Y is None or (Y is X): if config_value('KERNEL_MATRIX_USE_LOOP'): n = len(X) with loops.Scope() as s: s.scattered_values = np.empty((n, n)) s.scattered_grads = np.empty((n, n, len(theta))) index1, index2 = np.tril_indices(n, k=0) for i in s.range(index1.shape[0]): i1, i2 = index1[i], index2[i] value, grads = kernel_fn(X[i1], X[i2]) indexes = (np.stack([i1, i2]), np.stack([i2, i1])) s.scattered_values = ops.index_update( s.scattered_values, indexes, value) s.scattered_grads = ops.index_update( s.scattered_grads, indexes, grads) return s.scattered_values, s.scattered_grads else: n = len(X) values_scattered = np.empty((n, n)) grads_scattered = np.empty((n, n, len(theta))) index1, index2 = np.tril_indices(n, k=-1) inst1, inst2 = X[index1], X[index2] values, grads = vmap(kernel_fn)(inst1, inst2) # Scatter computed values into matrix values_scattered = ops.index_update(values_scattered, (index1, index2), values) values_scattered = ops.index_update(values_scattered, (index2, index1), values) grads_scattered = ops.index_update(grads_scattered, (index1, index2), grads) grads_scattered = ops.index_update(grads_scattered, (index2, index1), grads) diag_values, diag_grads = vmap(lambda x: kernel_fn(x, x))(X) diag_indices = np.diag_indices(n) values_scattered = ops.index_update(values_scattered, diag_indices, diag_values) grads_scattered = ops.index_update(grads_scattered, diag_indices, diag_grads) return values_scattered, grads_scattered else: return vmap(lambda x: vmap(lambda y: kernel_fn(x, y))(Y))(X)
def cycle_with_one_chord(key, p): k1, k2 = random.split(key) idx_d = jnp.diag_indices(p) idx_n = ([], []) cliques = [] for i in range(p - 1): cliques.append([i, i + 1]) setidx(idx_n, i, i + 1) setidx(idx_n, 0, p - 1) cliques.pop() cliques.append([0, p - 2, p - 1]) U = jnp.zeros((p, p)) U = ops.index_update(U, idx_d, random.normal(k1, (p, ))**2) U = ops.index_update(U, idx_n, random.normal(k2, (p, ))) return cliques, U @ U.T
def _fill_diagonal(X, vals): return jax.ops.index_update(X, jnp.diag_indices(X.shape[0]), vals)
def _gen_associated_legendre(l_max: int, x: jnp.ndarray, is_normalized: bool) -> jnp.ndarray: r"""Computes associated Legendre functions (ALFs) of the first kind. The ALFs of the first kind are used in spherical harmonics. The spherical harmonic of degree `l` and order `m` can be written as `Y_l^m(θ, φ) = N_l^m * P_l^m(cos(θ)) * exp(i m φ)`, where `N_l^m` is the normalization factor and θ and φ are the colatitude and longitude, repectively. `N_l^m` is chosen in the way that the spherical harmonics form a set of orthonormal basis function of L^2(S^2). For the computational efficiency of spherical harmonics transform, the normalization factor is used in the computation of the ALFs. In addition, normalizing `P_l^m` avoids overflow/underflow and achieves better numerical stability. Three recurrence relations are used in the computation. Args: l_max: The maximum degree of the associated Legendre function. Both the degrees and orders are `[0, 1, 2, ..., l_max]`. x: A vector of type `float32`, `float64` containing the sampled points in spherical coordinates, at which the ALFs are computed; `x` is essentially `cos(θ)`. For the numerical integration used by the spherical harmonics transforms, `x` contains the quadrature points in the interval of `[-1, 1]`. There are several approaches to provide the quadrature points: Gauss-Legendre method (`scipy.special.roots_legendre`), Gauss-Chebyshev method (`scipy.special.roots_chebyu`), and Driscoll & Healy method (Driscoll, James R., and Dennis M. Healy. "Computing Fourier transforms and convolutions on the 2-sphere." Advances in applied mathematics 15, no. 2 (1994): 202-250.). The Gauss-Legendre quadrature points are nearly equal-spaced along θ and provide exact discrete orthogonality, (P^m)^T W P_m = I, where `T` represents the transpose operation, `W` is a diagonal matrix containing the quadrature weights, and `I` is the identity matrix. The Gauss-Chebyshev points are equally spaced, which only provide approximate discrete orthogonality. The Driscoll & Healy qudarture points are equally spaced and provide the exact discrete orthogonality. The number of sampling points is required to be twice as the number of frequency points (modes) in the Driscoll & Healy approach, which enables FFT and achieves a fast spherical harmonics transform. is_normalized: True if the associated Legendre functions are normalized. With normalization, `N_l^m` is applied such that the spherical harmonics form a set of orthonormal basis functions of L^2(S^2). Returns: The 3D array of shape `(l_max + 1, l_max + 1, len(x))` containing the values of the ALFs at `x`; the dimensions in the sequence of order, degree, and evalution points. """ p = jnp.zeros((l_max + 1, l_max + 1, x.shape[0])) a_idx = jnp.arange(1, l_max + 1) b_idx = jnp.arange(l_max) if is_normalized: initial_value = 0.5 / jnp.sqrt(jnp.pi) # The initial value p(0,0). f_a = jnp.cumprod(-1 * jnp.sqrt(1.0 + 0.5 / a_idx)) f_b = jnp.sqrt(2.0 * b_idx + 3.0) else: initial_value = 1.0 # The initial value p(0,0). f_a = jnp.cumprod(1.0 - 2.0 * a_idx) f_b = 2.0 * b_idx + 1.0 p = p.at[(0, 0)].set(initial_value) # Compute the diagonal entries p(l,l) with recurrence. y = jnp.cumprod(jnp.broadcast_to(jnp.sqrt(1.0 - x * x), (l_max, x.shape[0])), axis=0) p_diag = initial_value * jnp.einsum('i,ij->ij', f_a, y) diag_indices = jnp.diag_indices(l_max + 1) p = p.at[(diag_indices[0][1:], diag_indices[1][1:])].set(p_diag) # Compute the off-diagonal entries with recurrence. p_offdiag = jnp.einsum('ij,ij->ij', jnp.einsum('i,j->ij', f_b, x), p[jnp.diag_indices(l_max)]) offdiag_indices = (diag_indices[0][:l_max], diag_indices[1][:l_max] + 1) p = p.at[offdiag_indices].set(p_offdiag) # Compute the remaining entries with recurrence. d0_mask_3d, d1_mask_3d = _gen_recurrence_mask(l_max, is_normalized=is_normalized) def body_fun(i, p_val): coeff_0 = d0_mask_3d[i] coeff_1 = d1_mask_3d[i] h = (jnp.einsum( 'ij,ijk->ijk', coeff_0, jnp.einsum('ijk,k->ijk', jnp.roll(p_val, shift=1, axis=1), x)) - jnp.einsum('ij,ijk->ijk', coeff_1, jnp.roll( p_val, shift=2, axis=1))) p_val = p_val + h return p_val # TODO(jakevdp): use some sort of fixed-point procedure here instead? p = p.astype(jnp.result_type(p, x, d0_mask_3d)) if l_max > 1: p = lax.fori_loop(lower=2, upper=l_max + 1, body_fun=body_fun, init_val=p) return p
def _fill_diagonal(X, vals): return X.at[jnp.diag_indices(X.shape[0])].set(vals)
def fill_diagonal(a, val): "From https://github.com/google/jax/issues/2680#issuecomment-804269672" assert a.ndim >= 2 i, j = jnp.diag_indices(min(a.shape[-2:])) return a.at[Ellipsis, i, j].set(val)
def fill_diagonal(mat, vec): (n, _) = mat.shape i, j = jnp.diag_indices(n) return mat.at[i, j].set(vec)
def diag_indices(n, ndim=2): return JaxArray(jnp.diag_indices(n, ndim))
def fill_diagonal(a, val): if isinstance(a, JaxArray): a = a.value assert a.ndim >= 2 i, j = jnp.diag_indices(_min(a.shape[-2:])) return JaxArray(a.at[..., i, j].set(val))
def _add_to_diagonal(X, val): new_diagonal = X.diagonal() + val diag_indices = jnp.diag_indices(X.shape[0]) return jax.ops.index_update(X, diag_indices, new_diagonal)
def diag_shift(mat: jnp.ndarray, val: Union[float, jnp.ndarray]) -> jnp.ndarray: """ Shifts the diagonal of mat by val. """ return ops.index_update(mat, jnp.diag_indices(mat.shape[-1], len(mat.shape)), jnp.diag(mat) + val)
def _ridge_cov(X, λ) -> np.DeviceArray: cov = (1 - λ) * np.cov(X, rowvar=False) cov = cov.at[np.diag_indices(cov.shape[0])].add(λ) return cov
def chol_logdet(L): d = L.shape[0] idx_d = jnp.diag_indices(d) return 2.0 * jnp.log(L[idx_d]).sum()
def _add_to_diagonal(X, val): new_diagonal = X.diagonal() + val diag_indices = jnp.diag_indices(X.shape[0]) return X.at[diag_indices].set(new_diagonal)
def flatten_scale(scale): dim = scale.shape[-1] log_diag = jnp.log(jnp.diag(scale)) scale = scale.at[jnp.diag_indices(dim)].set(log_diag) return scale[jnp.tril_indices(dim)]
def unflatten_scale(flat_scale, original_dim): out = jnp.zeros([original_dim, original_dim], dtype=flat_scale.dtype) out = out.at[jnp.tril_indices(original_dim)].set(flat_scale) exp_diag = jnp.exp(jnp.diag(out)) return out.at[jnp.diag_indices(original_dim)].set(exp_diag)
def add_to_diagonal(K: Array, constant: float) -> Array: return jax.ops.index_add(K, jnp.diag_indices(K.shape[0]), constant)