def sample_value_posterior_GIBBS(self,X,Y,V): # with the basic priors, the conditional posterior for the values reduces # to a Bayesian linear regression sigma = torch.cholesky_inverse(torch.cholesky(torch.matmul(torch.matmul(X.T,torch.diag(1/V)),X))) mu = torch.matmul(sigma,torch.matmul(torch.matmul(X.T,torch.diag(1/V)),Y)) return torch.distributions.MultivariateNormal(mu,sigma).sample()
def log_probs_cholesky(self, z): # by default log probs since probs can be easily untracktable N, M, D = z.size(0), self._mu.size(0), z.size(-1) inv = [] log_det_l = [] # cholesky_root = torch.cholesky(self._sigma) # computing log det using cholesky decomposition for i in range(self._mu.size(0)): try: print("Sigma size ", self._sigma.size()) print(self._sigma[i]) cholesky_root = torch.cholesky(self._sigma[i]) except: print("There is negative eigen value for cov mat " + str(i)) eigen_value, eigen_vector = torch.symeig(self._sigma[i], eigenvectors=True) print("Rule if min(eig) > -1e-5 replacing by 0") if (eigen_value.min() > -1e-5): print("Negative minimum eigen value is ", eigen_value.min().item(), " replace by 0") eigen_value[eigen_value < 1e-15] = 1e-10 self._sigma[i] = eigen_vector.mm( torch.diag(eigen_value).mm(eigen_vector.t())) eigen_value, eigen_vector = torch.symeig(self._sigma[i], eigenvectors=True) cholesky_root = torch.cholesky(self._sigma[i]) else: print("Negative minimum eigen value is ", eigen_value.min().item(), " exiting ") quit() log_det = cholesky_root.diag().log().sum() inv.append(torch.cholesky_inverse(cholesky_root).unsqueeze(0)) log_det_l.append(log_det.unsqueeze(0)) # MxDxD log_det = torch.cat(log_det_l, 0).float() # MX1 inv_sig = torch.cat(inv, 0).float() dtm = z.unsqueeze(1).expand(N, M, D) - self._mu.unsqueeze(0).expand( N, M, D) dtm_mm = [] for i in range(M): dtm_k = dtm[:, i, :].unsqueeze(1) inv_sig_k = inv_sig[i, :, :].unsqueeze(0).expand(N, D, D) exp_dist = dtm_k.bmm(inv_sig_k).bmm(dtm_k.transpose(-1, 1)).squeeze(-1) dtm_mm.append(exp_dist) dtm_mm = torch.cat(dtm_mm, -1) log_norm = N * math.log(2 * math.pi) + log_det log_pdf = -0.5 * (log_norm.unsqueeze(0).expand(N, M) + dtm_mm) weighted_pdf = torch.log(self._w.unsqueeze(0).expand(N, M)) + log_pdf return weighted_pdf
def toMahalanobisDistance(cls, target, mean, pred_cov, clamp_covariance=False): """ Generic function that can be used once vec2Cov is implemented can be reimplemented if a better way to do it exists with one parametrization Args: mean [n x 3] : vx, vy, vz pred_cov [n x params] : xx, yy, zz Returns: err [n x 1] : mahalanobis distance square """ cov_matrix = cls.vec2Cov(pred_cov) # compute the inverse of covariance matrices CovInv = torch.zeros_like(cov_matrix) N = target.shape[0] for i in range(N): u = torch.cholesky(cov_matrix[i, :, :]) CovInv[i, :, :] = torch.cholesky_inverse(u) # compute the error err = mean - target loss_part1 = torch.einsum("ki,kij,kj->k", err, CovInv, err) if clamp_covariance: loss_part2 = torch.log(cov_matrix.det().clamp(min=1e-10)) else: loss_part2 = torch.logdet(cov_matrix) loss = loss_part1 + loss_part2 return loss.reshape((N, -1))
def update(self, g, prec, iteration, *, ForceUpdate = False): if not ForceUpdate: raise RuntimeError g = g.to(dtype=torch.double) prec = prec.to(dtype=torch.double) # dirty solution self._Gamma = self._querry.Gamma.t() self._GammaTransposed = self.querry.GammaTransposed.t() self._alpha = self.querry.alpha cov = 1/prec Lambda = torch.einsum('im, m, sm -> is', [self._GammaTransposed, cov, self._GammaTransposed]) # checked this; seems to be okay Lambda += torch.diag(self.vo_variances) L = torch.cholesky(Lambda) LambdaInv = torch.cholesky_inverse(L) solvec = LambdaInv @ (self._GammaTransposed @ g - self._alpha) mean = g - torch.einsum('i, mi, m -> i', [cov, self._GammaTransposed, solvec]) A = self._GammaTransposed * cov postcov_diag_subtractor = torch.einsum('si, sm, mi -> i', [A, LambdaInv, A]) self._mean = mean self._vars = cov - postcov_diag_subtractor
def complexity(sigma, c, samples): n = sigma.shape[0] device = sigma.device id = torch.eye(n, device=device) assert ( sigma == sigma.t() ).all() u = torch.cholesky(sigma) inv = torch.cholesky_inverse(u) assert (torch.mm(sigma, inv) - id).abs().max() < 1e-03 nth_root_det = u.diag().pow(2/n).prod() inv_trace = inv.diag().sum() inv_proj = torch.dot(c, torch.mv(inv, c)) formula_1 = nth_root_det * ( (0.5 - 1/math.pi)*inv_trace + inv_proj/math.pi ) mat = nth_root_det*inv - id contribs = [] for _ in range(samples): z = torch.randn(n, device=device).abs() contrib = -0.5 * torch.dot(c*z, torch.mv(mat, c*z)) contribs.append(contrib.item()) contribs = np.array(contribs) max = np.max(contribs) contribs = contribs - max formula_0 = n*math.log(2) + math.log(samples) - max - math.log(np.sum(np.exp(contribs))) return formula_0.item(), formula_1.item()
def complexity(sigma, c, samples): n = sigma.shape[0] device = sigma.device id = torch.eye(n, device=device) assert (sigma == sigma.t()).all() u = torch.cholesky(sigma) inv = torch.cholesky_inverse(u) assert (torch.mm(sigma, inv) - id).abs().max() < 1e-03 nth_root_det = u.diag().pow(2 / n).prod() inv_trace = inv.diag().sum() inv_proj = torch.dot(c, torch.mv(inv, c)) formula_1 = n / 5.0 + nth_root_det * ( (0.5 - 1 / math.pi) * inv_trace + inv_proj / math.pi) mat = nth_root_det * inv - id z = torch.randn((n, samples), device=device).abs() cz = torch.mul(c.unsqueeze(1), z) contribs = -0.5 * (cz * torch.matmul(mat, cz)).sum(dim=0) formula_0 = n * math.log(2) + math.log(samples) - torch.logsumexp(contribs, dim=0) return formula_0.item(), formula_1.item()
def test_cholesky_inverse_no_batch(): """Checks that our Cholesky inverse matches `torch.cholesky_inverse()`.""" torch.autograd.set_detect_anomaly(True) matrix_dim = 3 L = fannypack.utils.tril_from_vector( torch.randn(fannypack.utils.tril_count_from_matrix_dim(matrix_dim)) ) torch.testing.assert_allclose( fannypack.utils.cholesky_inverse(L), torch.cholesky_inverse(L) )
def convert_model_params_to_ssm_params( A, B, C, D, m0, LV0inv_tril, LV0inv_logdiag, LRinv_tril, LRinv_logdiag, LQinv_tril, LQinv_logdiag, ): R = torch.cholesky_inverse( torch.tril(LRinv_tril, -1) + torch.diag(torch.exp(LRinv_logdiag))) Q = torch.cholesky_inverse( torch.tril(LQinv_tril, -1) + torch.diag(torch.exp(LQinv_logdiag))) V0 = torch.cholesky_inverse( torch.tril(LV0inv_tril, -1) + torch.diag(torch.exp(LV0inv_logdiag))) return Box(A=A, B=B, C=C, D=D, R=R, Q=Q, m0=m0, V0=V0)
def blas_lapack_ops(self): m = torch.randn(3, 3) a = torch.randn(10, 3, 4) b = torch.randn(10, 4, 3) v = torch.randn(3) return ( torch.addbmm(m, a, b), torch.addmm(torch.randn(2, 3), torch.randn(2, 3), torch.randn(3, 3)), torch.addmv(torch.randn(2), torch.randn(2, 3), torch.randn(3)), torch.addr(torch.zeros(3, 3), v, v), torch.baddbmm(m, a, b), torch.bmm(a, b), torch.chain_matmul(torch.randn(3, 3), torch.randn(3, 3), torch.randn(3, 3)), # torch.cholesky(a), # deprecated torch.cholesky_inverse(torch.randn(3, 3)), torch.cholesky_solve(torch.randn(3, 3), torch.randn(3, 3)), torch.dot(v, v), torch.eig(m), torch.geqrf(a), torch.ger(v, v), torch.inner(m, m), torch.inverse(m), torch.det(m), torch.logdet(m), torch.slogdet(m), torch.lstsq(m, m), torch.lu(m), torch.lu_solve(m, *torch.lu(m)), torch.lu_unpack(*torch.lu(m)), torch.matmul(m, m), torch.matrix_power(m, 2), # torch.matrix_rank(m), torch.matrix_exp(m), torch.mm(m, m), torch.mv(m, v), # torch.orgqr(a, m), # torch.ormqr(a, m, v), torch.outer(v, v), torch.pinverse(m), # torch.qr(a), torch.solve(m, m), torch.svd(a), # torch.svd_lowrank(a), # torch.pca_lowrank(a), # torch.symeig(a), # deprecated # torch.lobpcg(a, b), # not supported torch.trapz(m, m), torch.trapezoid(m, m), torch.cumulative_trapezoid(m, m), # torch.triangular_solve(m, m), torch.vdot(v, v), )
def gradientFn(P, mu, v): """ Compute vector-Jacobian product DJ(M) = DJ(P) DP(M) [b x m*n] DP(M) = (H^-1 * A^T * (A * H^-1 * A^T)^-1 * A * H^-1 - H^-1) * B H = D_YY^2 f(x, y) = diag(mu / vec(P)) B = D_XY^2 f(x, y) = I Using: Lemma 4.4 from Stephen Gould, Richard Hartley, and Dylan Campbell, 2019 "Deep Declarative Networks: A New Hope", arXiv:1909.04866 Arguments: P: (b, m, n) Torch tensor batch of transport matrices mu: float, regularisation factor v: (b, m*n) Torch tensor batch of gradients of J with respect to P Return Values: gradient: (b, m*n) Torch tensor, batch of gradients of J with respect to M """ with torch.no_grad(): b, m, n = P.size() B = P / mu hinv = B.flatten(start_dim=-2) d1inv = B.sum(-1)[:, 1:].reciprocal() # Remove first element d2 = B.sum(-2) B = B[:, 1:, :] # Remove top row S = -B.transpose(-2, -1).matmul(d1inv.unsqueeze(-1) * B) S[:, range(n), range(n)] += d2 Su = torch.cholesky(S) Sinv = torch.zeros_like(S) for i in range (b): Sinv[i, ...] = torch.cholesky_inverse(Su[i, ...]) # Currently cannot handle batches R = -B.matmul(Sinv) * d1inv.unsqueeze(-1) Q = -R.matmul(B.transpose(-2, -1) * d1inv.unsqueeze(-2)) Q[:, range(m - 1), range(m - 1)] += d1inv # Build vector-Jacobian product from left to right: vHinv = v * hinv # bxmn * bxmn -> bxmn # Break vHinv into m blocks of n elements: u1 = vHinv.reshape((-1, m, n)).sum(-1)[:, 1:].unsqueeze(-2) # remove first element u2 = vHinv.reshape((-1, m, n)).sum(-2).unsqueeze(-2) u3 = u1.matmul(Q) + u2.matmul(R.transpose(-2, -1)) u4 = u1.matmul(R) + u2.matmul(Sinv) u5 = u3.expand(-1, n, -1).transpose(-2, -1)+u4.expand(-1, m-1, -1) uHinv = torch.cat((u4, u5), dim=-2).flatten(start_dim=-2) * hinv gradient = uHinv - vHinv return gradient
def inverse(self, method='torch'): assert method in ['cholesky', 'torch', 'compress_inverse'] if method == 'cholesky': u = torch.cholesky(-self.mat) matinv = - torch.cholesky_inverse(u) elif method == 'torch': matinv = torch.inverse(self.mat) else: # compress inverse matinv = self.smat.inverse() self.matinv = matinv return matinv
def _update_inv(self, m, damping): """Do cholesky decomposition for computing inverse of the damped factors. :param m: The layer :return: no returns. """ n = self.m_a[m].size(0) a, g = self.a[m], self.g[m] if self.subsample == 'true' and m.__class__.__name__ == 'Conv2d': n *= self.num_ss_patches self.H_a[m] = a.t() @ torch.cholesky_inverse( self.aaT[m] + n * math.sqrt(damping) * torch.eye(n).to(self.aaT[m].device)) @ a self.H_a[m] = (torch.eye(self.H_a[m].size(0)).to(self.H_a[m].device) - self.H_a[m]) / math.sqrt(damping) self.H_g[m] = g.t() @ torch.cholesky_inverse( self.ggT[m] + n * math.sqrt(damping) * torch.eye(n).to(self.ggT[m].device)) @ g self.H_g[m] = (torch.eye(self.H_g[m].size(0)).to(self.H_g[m].device) - self.H_g[m]) / math.sqrt(damping)
def test_cholesky_inverse(): """Checks that our Cholesky inverse matches `torch.cholesky_inverse()`.""" torch.autograd.set_detect_anomaly(True) batch_dims = (5,) matrix_dim = 3 L = fannypack.utils.tril_from_vector( torch.randn( batch_dims + (fannypack.utils.tril_count_from_matrix_dim(matrix_dim),), ) ) for i, our_inverse in enumerate(fannypack.utils.cholesky_inverse(L)): torch.testing.assert_allclose(our_inverse, torch.cholesky_inverse(L[i]))
def _batch_chol_inv(self, mat_chol: Tensor) -> Tensor: r"""Wrapper to perform (batched) cholesky inverse""" # TODO: get rid of this once cholesky_inverse supports batch mode batch_eye = torch.eye(mat_chol.shape[-1], **self.tkwargs) if len(mat_chol.shape) == 2: mat_inv = torch.cholesky_inverse(mat_chol) elif len(mat_chol.shape) > 2 and (mat_chol.shape[-1] == mat_chol.shape[-2]): batch_eye = batch_eye.repeat(*(mat_chol.shape[:-2]), 1, 1) chol_inv = torch.triangular_solve(batch_eye, mat_chol, upper=False).solution mat_inv = chol_inv.transpose(-1, -2) @ chol_inv return mat_inv
def chol_invld(X, get_ld=False): try: A = torch.linalg.cholesky(X) Xi = torch.cholesky_inverse(A) if get_ld: ld = 2 * torch.diag(A).log().sum() return Xi, ld else: return Xi except: if get_ld: return -1, -1 else: return -1
def fit(self, X, y): y = self.kernel.cast(y) X = self.kernel.cast(X) if y.shape[-1] != 1: raise ValueError( 'Expected vector of dimension 1, but got vector of dimension {}' .format(y.shape[-1])) self.kernel.fit(X) self.inverse = torch.cholesky_inverse(self.kernel.kernel + torch.eye(self.kernel.n_dim_in) * (self.sigma**2)) self.labels = y return self
def HSIC(K, L, M, eps): n = K.shape[0] M_eps = torch.cholesky_inverse(M + n * eps * torch.eye(n, device=K.device)) M_eps_2 = torch.matmul(M_eps, M_eps) term_1 = torch.matmul(K, L) KM = torch.matmul(K, M) ML = torch.matmul(M, L) MLM = torch.matmul(M, torch.matmul(L, M)) term_2 = torch.matmul(KM, torch.matmul(M_eps_2, ML)) term_3 = torch.matmul( KM, torch.matmul(M_eps_2, torch.matmul(MLM, torch.matmul(M_eps_2, M)))) return torch.trace(term_1 - 2 * term_2 + term_3)
def potri(a, upper=True, out=None): r"""Computes the inverse of a symmetric positive-definite matrix :math:`A` using its Cholesky factor. For more information regarding :func:`torch.potri`, please check :func:`torch.cholesky_inverse`. .. warning:: :func:`torch.potri` is deprecated in favour of :func:`torch.cholesky_inverse` and will be removed in the next release. Please use :func:`torch.cholesky_inverse` instead and note that the :attr:`upper` argument in :func:`torch.cholesky_inverse` defaults to ``False``. """ warnings.warn("torch.potri is deprecated in favour of torch.cholesky_inverse and will be removed in " "the next release. Please use torch.cholesky_inverse instead and note that the :attr:`upper` " "argument in torch.cholesky_inverse defaults to ``False``.", stacklevel=2) return torch.cholesky_inverse(a, upper=upper, out=out)
def init_weight(self, feats, labels): # initial weight_y is obtained by linear regression A = torch.mm(feats.t(), feats) + 1e-05 * torch.eye(feats.size(1)) # (feats, feats) labels_one_hot = torch.zeros((feats.size(0), self.n_classes)) for i in range(labels.size(0)): l = labels[i] labels_one_hot[i, l] = 1 self.init_weight_y = nn.Parameter(torch.mm( torch.mm(torch.cholesky_inverse(A), feats.t()), labels_one_hot), requires_grad=False) nn.init.constant_(self.global_tau_1, 1 / 2) nn.init.constant_(self.global_tau_2, 1 / 2) return
def cholesky_inverse(u: torch.Tensor, upper: bool = False) -> torch.Tensor: """Alternative to `torch.cholesky_inverse()`, with support for batch dimensions. Relevant issue tracker: https://github.com/pytorch/pytorch/issues/7500 Args: u (torch.Tensor): Triangular Cholesky factor. Shape should be `(*, N, N)`. upper (bool, optional): Whether to consider the Cholesky factor as a lower or upper triangular matrix. Returns: torch.Tensor: """ if u.dim() == 2 and not u.requires_grad: return torch.cholesky_inverse(u, upper=upper) return torch.cholesky_solve(torch.eye(u.size(-1)).expand(u.size()), u, upper=upper)
def inv(a, method='lu', rcond=1e-15, out=None): r"""Matrix inversion. Parameters ---------- a : (..., m, n) tensor_like Input matrix. method : {'lu', 'chol', 'svd', 'pinv'}, default='lu' Inversion method: * 'lu' : LU decomposition. ``a`` must be invertible. * 'chol' : Cholesky decomposition. ``a`` must be positive definite. * 'svd' : Singular Value decomposition. * 'pinv' : Moore-Penrose pseudoinverse (by means of svd). rcond : float, default=1e-15 Cutoff for small singular values when ``method == 'pinv'``. out : tensor, optional Output tensor (only used by methods 'lu' and 'chol'). .. note:: if ``m != n``, the Moore-Penrose pseudoinverse is always used. Returns ------- x : (..., n, m) tensor Inverse matrix. """ a = utils.as_tensor(a) backend = dict(dtype=a.dtype, device=a.device) if a.shape[-1] != a.shape[-2]: method = 'pinv' if method.lower().startswith('lu'): return torch.inverse(a, out=out) elif method.lower().startswith('chol'): if a.dim() == 2: return torch.cholesky_inverse(a, upper=False, out=out) else: chol = torch.cholesky(a, upper=False) eye = torch.eye(a.shape[-2], **backend) return torch.cholesky_solve(eye, chol, upper=False, out=out) elif method.lower().startswith('svd'): u, s, v = torch.svd(a) s = s[..., None] return v.matmul(u.transpose(-1, -2) / s) elif method.lower().startswith('pinv'): return torch.pinverse(a, rcond=rcond) else: raise ValueError('Unknown inversion method {}.'.format(method))
def enkf_cholesky(ens, model_out, obs, gamma, batch_s, ensemble_size): mo_mean = model_out.mean(0) Cpp = torch.einsum("ijk, ilk -> kjl", model_out - mo_mean, model_out - mo_mean) / ensemble_size Cup = torch.einsum("ij, ilk -> kjl", ens - ens.mean(0), model_output - mo_mean) / ensemble_size tmp = torch.empty_like(Cpp) loss = torch.empty(batch_size, ensemble_size, gamma.shape[0]) for i in range(batch_s): tmp[i] = torch.cholesky_inverse(Cpp[i] + gamma) loss[i] = obs[i] - model_out[:, :, i] # loss = (-1 * model_out + obs.reshape(gamma.shape[0], -1) # ).reshape(-1, ensemble_size, gamma.shape[0]) mm = torch.matmul(loss, tmp) # new_ens = torch.matmul(Cup, mm) + ens new_ens = torch.einsum('ijk, ilk -> lj', Cup, mm) + ens return new_ens
def init_weight(self, feats, labels): # initial weight_y is obtained by linear regression A = torch.mm(feats.t(), feats) + 1e-05 * torch.eye(feats.size(1)) # (feats, feats) labels_one_hot = torch.zeros((feats.size(0), self.n_classes)) for i in range(labels.size(0)): l = labels[i] labels_one_hot[i, l] = 1 self.init_weight_y = nn.Parameter(torch.mm( torch.mm(torch.cholesky_inverse(A), feats.t()), labels_one_hot), requires_grad=False) for layer in self.layers: gain = nn.init.calculate_gain('relu') torch.nn.init.xavier_uniform_(layer.weight, gain=gain) return
def gaussian_nll(y, mu, covar_matrix, dims=1, jitter=1e-4, verbose=False): """Compute -ve log-likelihood of data under a multivariate Gaussian. Args: """ y, mu = expand_1d([y, mu]) covar_matrix += (torch.tensor(jitter)**2) * torch.eye(*covar_matrix.shape) L_covar = torch.cholesky(covar_matrix) inv_covar = torch.cholesky_inverse(L_covar) alpha = torch.mm(inv_covar, y - mu) data_fit_term = -0.5 * torch.mm((y - mu).T, alpha) complexity_term = -torch.sum(torch.diagonal(L_covar).log()) if verbose: print("Data fit : ", data_fit_term) print("Complexity : ", complexity_term) return -1 * (data_fit_term + complexity_term - (dims / 2) * np.log(2 * np.pi))
def estimate_gaussian_thompson_sampling_probabilities_rbmc( m, L, n_samples, renormalize=True ): """Rao-Blackwellized (conditional) Monte Carlo estimates of each variable being the maximum value in a multivariate normal distribution.""" # m is mean and L is lower triangular Cholesky factor of covariance matrix M = m.numel() dtype = m.dtype # samples x = L @ torch.randn(M, n_samples, dtype=dtype) + m.unsqueeze(1) x_max, inds = torch.max(x, dim=0) x[inds, range(n_samples)] = -np.inf x_2ndmax, inds_2nd = torch.max(x, dim=0) x[inds, range(n_samples)] = x_max x_max = x_max.expand_as(x).clone() x_max[inds, range(n_samples)] = x_2ndmax # conditional distributions # Lambda = torch.potri(L, upper=False).contiguous() # eta = torch.potrs(m, L, upper=False) Lambda = torch.cholesky_inverse(L, upper=False).contiguous() eta = torch.cholesky_solve(m.view(-1, 1), L, upper=False) # note: this is just Mx1 since variance does not depend on the conditioning s2_cond = 1.0 / Lambda.diag() # zero out diagonal to compute leave-one-variable-out sample quantities Lambda.view(-1)[0 : (M * M) : (M + 1)] = 0.0 B = Lambda @ x # note: this is M x n_samples m_cond = s2_cond.unsqueeze(1) * (eta - B) z = (x_max - m_cond) / (2.0 * s2_cond).sqrt().unsqueeze(1) probs = 0.5 * (1.0 - torch.erf(z)).mean(dim=1) # this doesn't calculate coherent multivariate estimates, so they do not # sum to 1 if renormalize: probs = probs / probs.sum() return probs
def compute_predictive_means_vars(self, test_data, training_data="stored", labels="stored", jitter=1e-4, to_np=True): """Compute predictive mean and variance over a set of test points. Args: test_data: Tensor of test points in time series. training_data: Used to condition GP, uses stored if None. labels: Used to condition GP, uses stored labels if None. jitter: Noise added to co-variance matrix diagonal for stability. to_np: If casting result to Numpy array. """ if training_data == "stored": training_data = self.training_data # Use all provided labels = self.labels if training_data.nelement(): covar_matrix = self.compute_covariance_matrix( training_data, jitter) condition_number = np.linalg.cond( covar_matrix.detach().cpu().numpy()) if condition_number > 1e10: print(f"Condition number : {condition_number}") L_covar = torch.cholesky(covar_matrix) inv_covar = torch.cholesky_inverse(L_covar) KxX = self.covar_kernel(test_data, training_data) product1 = torch.mm(KxX, inv_covar) mu_array = torch.mv(product1, labels) product2 = torch.mm(product1, torch.transpose(KxX, 0, 1)) else: product2 = 0 # No conditioning of co-variance matrix mu_array = torch.tensor([0]) # Zero-mean prior auto_cov = self.covar_kernel(test_data, test_data) var_array = auto_cov - product2 if not to_np: return mu_array, var_array return mu_array.detach().cpu().numpy(), var_array.detach().cpu().numpy( )
def cholesky_demo(): a = torch.randn(3, 3) a = torch.mm(a, a.t()) # make symmetric positive-definite l = torch.cholesky(a) print(a) print(l) print(torch.mm(l, l.t())) # a = l * l^T a = torch.randn(3, 3) a = torch.mm( a, a.t()) + 1e-05 * torch.eye(3) # make symmetric positive definite u = torch.cholesky(a) print(a) print(u) print(torch.cholesky_inverse(u)) print(a.inverse()) b = torch.randn(3, 2) print(b) print(torch.cholesky_solve(b, u)) print(torch.mm(a.inverse(), b))
def config_train_data(self, index): labels = self.graph.label[index] # ========================================================== # initial weight_y is obtained by linear regression feat = self.cache.feat.to(self.device) labels = gf.astensor(labels, device=self.device) A = torch.mm(feat.t(), feat) + 1e-05 * torch.eye(feat.size(1), device=feat.device) labels_one_hot = feat.new_zeros(feat.size(0), self.graph.num_classes) labels_one_hot[torch.arange(labels.size(0)), labels] = 1 self.model.init_weight_y = torch.mm( torch.mm(torch.cholesky_inverse(A), feat.t()), labels_one_hot) # ========================================================== sequence = FullBatchSequence([self.cache.feat, self.cache.g], labels, out_index=index, device=self.data_device, escape=type(self.cache.g)) return sequence
def compute_pd_inverse(K, jitter=1e-5): """Compute the inverse of a postive-(semi)definite matrix K using Cholesky inversion.""" n = K.shape[0] assert isinstance(jitter, float) or jitter.ndim == 0, 'only homoscedastic noise variance is allowed here!' is_successful = False fail_count = 0 max_fail = 3 while fail_count < max_fail and not is_successful: try: jitter_diag = jitter * torch.eye(n, device=K.device) * 10 ** fail_count K_ = K + jitter_diag Kc = torch.cholesky(K_) is_successful = True except RuntimeError: fail_count += 1 if not is_successful: print(K) raise RuntimeError("Gram matrix not positive definite despite of jitter") logDetK = -2 * torch.sum(torch.log(torch.diag(Kc))) K_i = torch.cholesky_inverse(Kc) return K_i.float(), logDetK.float()
def predict_decomposition(self, z, x, y, z_star, x_star, which_kernels): subset = ~torch.isnan(y).reshape(-1) if subset.sum() > 0: y = y[subset, :] z = z[subset, :] x = x[subset, :] Nstar = z_star.size()[0] y = (y - self.intercept) sigma2 = self.get_noise_var() K_all = self.get_K_without_noise(z, z, x, x, jitter=self.jitter) + sigma2 * torch.eye(y.size()[0]) L_all = torch.cholesky(K_all) K_all_inv = torch.cholesky_inverse(L_all) K_sf = self.get_K_without_noise(z_star, z, x_star, x, which_kernels=which_kernels) K_ss = self.get_K_without_noise(z_star, z_star, x_star, x_star, which_kernels=which_kernels) tmp = torch.mm(K_sf, K_all_inv) mean = self.intercept + torch.mm(tmp, y) var = torch.diag(K_ss - torch.mm(tmp, K_sf.t())) return mean.reshape(-1), var