def _compute(self, measure): # Extract processes and inputs. p_x, x = self.fdd.p, self.fdd.x p_z, z = self.u.p, self.u.x # Construct the necessary kernel matrices. K_zx = measure.kernels[p_z, p_x](z, x) K_z = convert(measure.kernels[p_z](z), AbstractMatrix) self._K_z_store[id(measure)] = K_z # Evaluating `e.kernel(x)` will yield incorrect results if `x` is a # `MultiInput`, because `x` then still designates the particular components # of `f`. Fix that by instead designating the elements of `e`. if isinstance(x, MultiInput): x_n = MultiInput(*(e(fdd.x) for e, fdd in zip(self.e.kernel.ps, x.get()))) else: x_n = x # Construct the noise kernel matrix. K_n = self.e.kernel(x_n) # The approximation can only handle diagonal noise matrices. if not isinstance(K_n, Diagonal): raise RuntimeError("Kernel matrix of noise must be diagonal.") # And construct the components for the inducing point approximation. L_z = B.cholesky(K_z) A = B.add(B.eye(K_z), B.iqf(K_n, B.transpose(B.solve(L_z, K_zx)))) self._A_store[id(measure)] = A y_bar = uprank(self.y) - self.e.mean(x_n) - measure.means[p_x](x) prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar)) # Compute the optimal mean. mu = B.add( measure.means[p_z](z), B.iqf(A, B.solve(L_z, K_z), prod_y_bar), ) self._mu_store[id(measure)] = mu # Compute the ELBO. # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal. # The rest, however, is completely generic. trace_part = B.ratio( Diagonal(measure.kernels[p_x].elwise(x)[:, 0]) - Diagonal(B.iqf_diag(K_z, K_zx)), K_n, ) det_part = B.logdet(2 * B.pi * K_n) + B.logdet(A) iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(A, prod_y_bar)[0, 0] self._elbo_store[id(measure)] = -0.5 * (trace_part + det_part + iqf_part)
def __init__(self, p: PromisedGP, x, noise): self.p = p self.x = x self.noise = _noise_as_matrix(noise, B.dtype(x), infer_size(p.kernel, x)) def var_diag(): return B.add(B.squeeze(p.kernel.elwise(x), axis=-1), B.diag(self.noise)) def mean_var(): mean, var = mlkernels.mean_var(p.mean, p.kernel, x) return mean, B.add(var, self.noise) def mean_var_diag(): mean, var_diag = mlkernels.mean_var_diag(p.mean, p.kernel, x) return mean, B.add(B.squeeze(var_diag, axis=-1), B.diag(self.noise)) Normal.__init__( self, lambda: p.mean(x), lambda: B.add(p.kernel(x), self.noise), var_diag=var_diag, mean_var=mean_var, mean_var_diag=mean_var_diag, )
def _compute(self): # Extract processes. p_x, x = type_parameter(self.x), self.x.get() p_z, z = type_parameter(self.z), self.z.get() # Construct the necessary kernel matrices. K_zx = self.graph.kernels[p_z, p_x](z, x) self._K_z = convert(self.graph.kernels[p_z](z), AbstractMatrix) # Evaluating `e.kernel(x)` will yield incorrect results if `x` is a # `MultiInput`, because `x` then still designates the particular # components of `f`. Fix that by instead designating the elements of # `e`. if isinstance(x, MultiInput): x_n = MultiInput(*(p(xi.get()) for p, xi in zip(self.e.kernel.ps, x.get()))) else: x_n = x # Construct the noise kernel matrix. K_n = self.e.kernel(x_n) # The approximation can only handle diagonal noise matrices. if not isinstance(K_n, Diagonal): raise RuntimeError('Kernel matrix of noise must be diagonal.') # And construct the components for the inducing point approximation. L_z = B.cholesky(self._K_z) self._A = B.add(B.eye(self._K_z), B.iqf(K_n, B.transpose(B.solve(L_z, K_zx)))) y_bar = uprank(self.y) - self.e.mean(x_n) - self.graph.means[p_x](x) prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar)) # Compute the optimal mean. self._mu = B.add(self.graph.means[p_z](z), B.iqf(self._A, B.solve(L_z, self._K_z), prod_y_bar)) # Compute the ELBO. # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal. # The rest, however, is completely generic. trace_part = B.ratio(Diagonal(self.graph.kernels[p_x].elwise(x)[:, 0]) - Diagonal(B.iqf_diag(self._K_z, K_zx)), K_n) det_part = B.logdet(2 * B.pi * K_n) + B.logdet(self._A) iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(self._A, prod_y_bar)[0, 0] self._elbo = -0.5 * (trace_part + det_part + iqf_part)
def _compute(self, measure): # Extract processes and inputs. p_x, x, noise_x = self.fdd.p, self.fdd.x, self.fdd.noise p_z, z, noise_z = self.u.p, self.u.x, self.u.noise # Construct the necessary kernel matrices. K_zx = measure.kernels[p_z, p_x](z, x) K_z = B.add(measure.kernels[p_z](z), noise_z) self._K_z_store[id(measure)] = K_z # Noise kernel matrix: K_n = noise_x # The approximation can only handle diagonal noise matrices. if not isinstance(K_n, Diagonal): raise RuntimeError( f"Kernel matrix of observation noise must be diagonal, " f'not "{type(K_n).__name__}".' ) # And construct the components for the inducing point approximation. L_z = B.cholesky(K_z) A = B.add(B.eye(K_z), B.iqf(K_n, B.transpose(B.solve(L_z, K_zx)))) self._A_store[id(measure)] = A y_bar = B.subtract(B.uprank(self.y), measure.means[p_x](x)) prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar)) # Compute the optimal mean. mu = B.add( measure.means[p_z](z), B.iqf(A, B.solve(L_z, K_z), prod_y_bar), ) self._mu_store[id(measure)] = mu # Compute the ELBO. # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal. # The rest, however, is completely generic. trace_part = B.ratio( Diagonal(measure.kernels[p_x].elwise(x)[:, 0]) - Diagonal(B.iqf_diag(K_z, K_zx)), K_n, ) det_part = B.logdet(2 * B.pi * K_n) + B.logdet(A) iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(A, prod_y_bar)[0, 0] self._elbo_store[id(measure)] = -0.5 * (trace_part + det_part + iqf_part)
def sample(self, num=1, noise=None): """Sample from the distribution. Args: num (int): Number of samples. noise (scalar, optional): Variance of noise to add to the samples. Must be positive. Returns: tensor: Samples as rank 2 column vectors. """ var = self.var # Add noise. if noise is not None: var = B.add(var, B.fill_diag(noise, self.dim)) # Perform sampling operation. sample = B.sample(var, num=num) if not self.mean_is_zero: sample = B.add(sample, self.mean) return B.dense(sample)
def elwise(self, x, y, B): return B.divide(self._compute_beta_raised(B), B.power(B.ew_sums(B.add(x, self.beta), y), self.alpha))
def __add__(self, other: "Normal"): return Normal( B.add(self.mean, other.mean), B.add(self.var, other.var), )
def __add__(self, other: B.Numeric): return Normal(B.add(self.mean, other), self.var)
def __call__(self, x, y): return B.divide(self._compute_beta_raised(), B.power(B.pw_sums(B.add(x, self.beta), y), self.alpha))
def __call__(self, x, y): pw_sums_raised = B.power(B.pw_sums(B.add(x, self.beta), y), self.alpha) return Dense(B.divide(self._compute_beta_raised(), pw_sums_raised))
def __call__(self, x): diff = B.subtract(self.y, self.m_z(self.z)) return B.add(self.m_i(x), B.qf(self.K_z, self.k_zi(self.z, x), diff))
def subtract(a, b): return B.add(a, -b)
def mean_var_diag(): mean, var_diag = mlkernels.mean_var_diag(p.mean, p.kernel, x) return mean, B.add(B.squeeze(var_diag, axis=-1), B.diag(self.noise))
def mean_var(): mean, var = mlkernels.mean_var(p.mean, p.kernel, x) return mean, B.add(var, self.noise)
def var_diag(): return B.add(B.squeeze(p.kernel.elwise(x), axis=-1), B.diag(self.noise))
def __call__(self, x, y, B): return B.add(self[0](x, y, B), self[1](x, y, B))
def elwise(self, x, y, B): return B.add(self[0].elwise(x, y, B), self[1].elwise(x, y, B))
def matmul(a, b, tr_a=False, tr_b=False): # Prioritise expanding out the Woodbury matrix. Give this one even higher # precedence to resolve ambiguity in the case of two Woodbury matrices. return B.add(B.matmul(a, b.lr, tr_a=tr_a, tr_b=tr_b), B.matmul(a, b.diag, tr_a=tr_a, tr_b=tr_b))
def __call__(self, x): return B.add(self[0](x), self[1](x))
def matmul(a, b, tr_a=False, tr_b=False): # Prioritise expanding out the Woodbury matrix. return B.add(B.matmul(a.lr, b, tr_a=tr_a, tr_b=tr_b), B.matmul(a.diag, b, tr_a=tr_a, tr_b=tr_b))