def jitter_cholesky(A): try: jitter1 = linalg.diag(1e-7 * tf.ones(A.shape[-1], dtype='float64')) return linalg.cholesky(A + jitter1) except: jitter2 = linalg.diag(1e-5 * tf.ones(A.shape[-1], dtype='float64')) return linalg.cholesky(A + jitter2)
def _build_interim_vals(self, train_inputs, train_outputs): _, var = self.lik(0, variances=0) # kxx (num_train, num_train) kxx = self.cov[0](train_inputs) + var * tf.eye( tf.shape(input=train_inputs)[-2]) jitter = JITTER * tf.eye(tf.shape(input=train_inputs)[-2]) # chol (same size as kxx), add jitter has to be added chol = tfl.cholesky(kxx + jitter) # alpha = chol.T \ (chol \ train_outputs) alpha = tfl.cholesky_solve(chol, train_outputs) return chol, alpha
def _transform_variables(self, inputs=1): """Transorm variables that were stored in a more compact form. Doing it like this allows us to put certain constraints on the variables. """ # Use softmax(raw_weights) to keep all weights normalized. weights, chol_covars, means, inducing_inputs = self.store(inputs) # Build the matrices of covariances between inducing inputs. kernel_mat = tf.stack([ self.cov[i](inducing_inputs[i, :, :]) for i in range(self.num_latents) ], 0) jitter = JITTER * tf.eye(tf.shape(input=inducing_inputs)[-2]) kernel_chol = tfl.cholesky(kernel_mat + jitter) return weights, chol_covars, kernel_chol, means, inducing_inputs
def solve_det_conditional(x, sigma, A, Q): """ Use matrix inversion lemma for the solve: .. math:: (\\Sigma - AQ^{-1}A^T)^{-1} X =\\ (\\Sigma^{-1} + \\Sigma^{-1} A (Q - A^T \\Sigma^{-1} A)^{-1} A^T \\Sigma^{-1}) X Use matrix determinant lemma for determinant: .. math:: \\log|(\\Sigma - AQ^{-1}A^T)| = \\log|Q - A^T \\Sigma^{-1} A| - \\log|Q| + \\log|\\Sigma| Parameters ---------- x: tf.Tensor Tensor to multiply the solve by sigma: brainiak.matnormal.CovBase Covariance object implementing solve and logdet A: tf.Tensor Factor multiplying the variable we conditioned on Q: brainiak.matnormal.CovBase Covariance object of conditioning variable, implementing solve and logdet """ # (Q - A^T Sigma^{-1} A) lemma_factor = tlinalg.cholesky( Q._cov - tf.matmul(A, sigma.solve(A), transpose_a=True)) logdet = (-Q.logdet + sigma.logdet + 2 * tf.reduce_sum( input_tensor=tf.math.log(tlinalg.diag_part(lemma_factor)))) # A^T Sigma^{-1} Atrp_Sinv = tf.matmul(A, sigma._prec, transpose_a=True) # (Q - A^T Sigma^{-1} A)^{-1} A^T Sigma^{-1} prod_term = tlinalg.cholesky_solve(lemma_factor, Atrp_Sinv) solve = tf.matmul( sigma.solve(scaled_I(1.0, sigma.size) + tf.matmul(A, prod_term)), x) return solve, logdet
def _build_entropy(self, weights, means, chol_covars): """Construct entropy. Args: weights: shape: (num_components) means: shape: (num_components, num_latents, num_inducing) chol_covars: shape: (num_components, num_latents, num_inducing[, num_inducing]) Returns: Entropy (scalar) """ # This part is to compute the product of the pdf of normal distributions """ chol_component_covar = [] component_mean = [] component_covar =[] covar_shape = tf.shape(chol_covars)[-2:] mean_shape = tf.shape(means)[-1:] # \Sigma_new = (\sum_{i=1}^{num_latents}( \Sigma_i^-1) )^{-1} # \Mu_new = \Sigma_new * (\sum_{i=1}^{num_latents} \Sigma_i^{-1} * \mu_i) for i in range(self.num_components): temp_cov = tf.zeros(covar_shape) temp_mean = tf.zeros(mean_shape)[..., tf.newaxis] for k in range(self.num_latents): # Compute the sum of (\Sigma_i)^{-1} temp_cov += tf.cholesky_solve(chol_covars[i, k, :, :], tf.eye(covar_shape[0])) # Compute the sum of (\Sigma_i)^{-1} * \mu_i temp_mean += tf.cholesky_solve(chol_covars[i, k, :, :], means[i, k, :, tf.newaxis]) # Compute \Sigma_new = temp_cov^{-1} temp_chol_covar = tf.cholesky(temp_cov) temp_component_covar = tf.cholesky_solve(temp_chol_covar, tf.eye(covar_shape[0])) component_covar.append(temp_component_covar) # Compute \Mu_new = \Sigma_new * (\sum_{i=1}^{num_latents} \Sigma_i^{-1} * \mu_i) temp_component_mean = temp_component_covar @ temp_mean component_mean.append(temp_component_mean) # Some functions need cholesky of \Sigma_new chol_component_covar.append(tf.cholesky(temp_component_covar)) chol_component_covar = tf.stack(chol_component_covar, 0) component_covar = tf.stack(component_covar, 0) component_mean = tf.squeeze(tf.stack(component_mean, 0), -1) """ # First build a square matrix of normals. if self.args['diag_post']: # construct normal distributions for all combinations of components variational_dist = tfd.MultivariateNormalDiag( means, tf.sqrt(chol_covars[tf.newaxis, ...] + chol_covars[:, tf.newaxis, ...])) else: if self.args['num_components'] == 1: # Use the fact that chol(S + S) = sqrt(2) * chol(S) chol_covars_sum = tf.sqrt(2.) * chol_covars[tf.newaxis, ...] else: # Here we use the original component_covar directly # TODO: Can we just stay in cholesky space somehow? component_covar = util.mat_square(chol_covars) chol_covars_sum = tfl.cholesky( component_covar[tf.newaxis, ...] + component_covar[:, tf.newaxis, ...]) # The class MultivariateNormalTriL only accepts cholesky decompositions of covariances variational_dist = tfd.MultivariateNormalTriL( means[tf.newaxis, ...], chol_covars_sum) # compute log probability of all means in all normal distributions # then sum over all latent functions # shape of log_normal_probs: (num_components, num_components) log_normal_probs = tf.reduce_sum( input_tensor=variational_dist.log_prob(means[:, tf.newaxis, ...]), axis=-1) # Now compute the entropy. # broadcast `weights` into dimension 1, then do `logsumexp` in that dimension weighted_logsumexp_probs = tf.reduce_logsumexp( input_tensor=tfm.log(weights) + log_normal_probs, axis=1) # multiply with weights again and then sum over it all return -util.mul_sum(weights, weighted_logsumexp_probs)
def solve_det_marginal(x, sigma, A, Q): """ Use matrix inversion lemma for the solve: .. math:: (\\Sigma + AQA^T)^{-1} X =\\ (\\Sigma^{-1} - \\Sigma^{-1} A (Q^{-1} + A^T \\Sigma^{-1} A)^{-1} A^T \\Sigma^{-1}) X Use matrix determinant lemma for determinant: .. math:: \\log|(\\Sigma + AQA^T)| = \\log|Q^{-1} + A^T \\Sigma^{-1} A| + \\log|Q| + \\log|\\Sigma| Parameters ---------- x: tf.Tensor Tensor to multiply the solve by sigma: brainiak.matnormal.CovBase Covariance object implementing solve and logdet A: tf.Tensor Factor multiplying the variable we marginalized out Q: brainiak.matnormal.CovBase Covariance object of marginalized variable, implementing solve and logdet """ # For diagnostics, we want to check condition numbers # of things we invert. This includes Q and Sigma, as well # as the "lemma factor" for lack of a better definition logging.log(logging.DEBUG, "Printing diagnostics for solve_det_marginal") lemma_cond = _condition(Q._prec + tf.matmul(A, sigma.solve(A), transpose_a=True)) logging.log( logging.DEBUG, f"lemma_factor condition={lemma_cond}", ) logging.log(logging.DEBUG, f"Q condition={_condition(Q._cov)}") logging.log(logging.DEBUG, f"sigma condition={_condition(sigma._cov)}") logging.log( logging.DEBUG, f"sigma max={tf.reduce_max(input_tensor=A)}," + f"sigma min={tf.reduce_min(input_tensor=A)}", ) # cholesky of (Qinv + A^T Sigma^{-1} A), which looks sort of like # a schur complement but isn't, so we call it the "lemma factor" # since we use it in woodbury and matrix determinant lemmas lemma_factor = tlinalg.cholesky( Q._prec + tf.matmul(A, sigma.solve(A), transpose_a=True)) logdet = (Q.logdet + sigma.logdet + 2 * tf.reduce_sum( input_tensor=tf.math.log(tlinalg.diag_part(lemma_factor)))) logging.log(logging.DEBUG, f"Log-determinant of Q={Q.logdet}") logging.log(logging.DEBUG, f"sigma logdet={sigma.logdet}") lemma_logdet = 2 * \ tf.reduce_sum(input_tensor=tf.math.log( tlinalg.diag_part(lemma_factor))) logging.log( logging.DEBUG, f"lemma factor logdet={lemma_logdet}", ) # A^T Sigma^{-1} Atrp_Sinv = tf.matmul(A, sigma._prec, transpose_a=True) # (Qinv + A^T Sigma^{-1} A)^{-1} A^T Sigma^{-1} prod_term = tlinalg.cholesky_solve(lemma_factor, Atrp_Sinv) solve = tf.matmul( sigma.solve(scaled_I(1.0, sigma.size) - tf.matmul(A, prod_term)), x) return solve, logdet