def forward(self, x, i, xx=None, ii=None): scale = positive(self._scale) variance = positive(self._variance) if xx is None: xx = x ii = i j, jj = torch.broadcast_tensors(i[:, None], ii[None]) r = (x[:, None]-xx[None]) / scale[j, jj] cov = (-(r**2).sum(dim=-1)/2).exp() * variance[j, jj] cov[j != jj] = 0.0 return cov
def cov_matrix(self, x, xx, d_dtheta=None, wrt=0, sumd=True): """ It returns K if d_dtheta=None. If d_dtheta is given it means that some derivative should be calculated. "theta" could be x, xx, or sth else. If d_dtheta = ones_like(x) and wrt=0 (ones_like(xx) and wrt=1) it returns dK/dx (dK/dxx). If d_dtheta has a dimensionality greater than 2 it means that it holds derivative of x (or xx depending on wrt) wrt sth else. Then, it will return dK/dtheta. It will sum over the features dimension if sumd=True. """ assert wrt == 0 or wrt == 1 scale = positive(self._scale) r = (x[:, None, ] - xx[None, ]) / scale cov = (-(r**2).sum(dim=-1)/2).exp() * self.diag() if d_dtheta is not None: r = r / scale for _ in range(d_dtheta.dim()-2): r = torch.unsqueeze(r, -1) cov = torch.unsqueeze(cov, -1) _derivative = (r*torch.unsqueeze(d_dtheta, dim=1-wrt) ) * (-(-1)**(wrt)) if sumd: cov = cov * _derivative.sum(dim=2) else: cov = torch.unsqueeze(cov, dim=2) * _derivative return cov
def diag_derivatives(self, d_dtheta): warnings.warn( 'diag_derivatives in RBF is deprecated! It may be incorrect!') scale = positive(self._scale) for _ in range(d_dtheta.dim()-2): scale = torch.unsqueeze(scale, -1) return ((d_dtheta / scale)**2).sum() * self.diag()
def matrices(self, x, xx, d_dx=False, d_dxx=False, d_dxdxx=False): # covariance matrix iscale = 1.0/positive(self._scale) r = (x[:, None, ] - xx[None, ]) * iscale cov = (-(r**2).sum(dim=-1)/2).exp() * self.diag() # derivatives _dx = _dxx = _dxdxx = None if d_dx or d_dxx or d_dxdxx: rr = (r*iscale) if d_dx or d_dxx: cov_r = cov[..., None] * rr if d_dx: _dx = -cov_r if d_dxx: _dxx = cov_r if d_dxdxx: rirj = -rr[..., None] * rr[..., None, :] d = torch.LongTensor(torch.arange(iscale.size()[0])) rirj[..., d, d] = rirj[..., d, d] + iscale**2 _dxdxx = rirj * cov[..., None, None] return cov, _dx, _dxx, _dxdxx
def evaluate(self): ZZ, ZX, _ = self.covariances() XZ = ZX.t() noise = positive(self._noise) # numerically stable calculation of _mu L, ridge = jitcholesky(ZZ, jitbase=2) A = torch.cat((XZ, noise * L.t())) Y = torch.cat((self.Y, torch.zeros(self.Z.size()[0], dtype=self.Y.dtype))) Q, R = torch.qr(A) self._mu = torch.mv(R.inverse(), torch.mv(Q.t(), Y)) # inducing function values (Z, u) self.u = self.mean + torch.mv(ZZ, self._mu) # covariance ------------------------------ TODO: this is slightly ugly! ZZ_i = torch.mm(L.t().inverse(), L.inverse()) SIGMA = ZZ + torch.mm(XZ.t(), XZ) / noise**2 # SIGMA_i = SIGMA.inverse() Q, R = torch.qr(SIGMA) SIGMA_i = torch.mm(R.inverse(), Q.t()) self._sig = SIGMA_i - ZZ_i # ------------------------------------------------------------------------ self.ready = 1
def forward(self): # covariances ZZ, ZX, tr = self.covariances() noise = positive(self._noise) # trace term Q, _, ridge = low_rank_factor(ZZ, ZX) trace = 0.5 * (tr - torch.einsum('ij,ij', Q, Q)) / noise**2 # low rank MVN p = LowRankMultivariateNormal(self.zeros, Q.t(), self.ones * noise**2) # loss loss = -p.log_prob(self.Y) + trace return loss
def forward(self): # covariances ZZ, ZX, diag, Y = self.matrices() tr = diag.sum() noise = positive(self._noise) # trace term Q, _, ridge = low_rank_factor(ZZ, ZX) trace = 0.5 * (tr - torch.einsum('ij,ij', Q, Q)) / noise**2 # low rank MVN p = LowRankMultivariateNormal(torch.zeros_like(Y), Q.t(), torch.ones_like(Y) * noise**2) # loss loss = -p.log_prob(Y) + trace return loss
def evaluate(self): ZZ, ZX, _, Y = self.matrices() XZ = ZX.t() noise = positive(self._noise) # numerically stable calculation of _mu L, ridge = jitcholesky(ZZ, jitbase=2) A = torch.cat((XZ, noise * L.t())) Y = torch.cat((Y, torch.zeros(self.Z.size(0), dtype=Y.dtype))) Q, R = torch.qr(A) self._mu = torch.mv(R.inverse(), torch.mv(Q.t(), Y)) # inducing function values (Z, u) self.u = torch.mv(ZZ, self._mu) # TODO: predicted covariance self.ready = 1
def matrices(self, x, xx, d_dx=False, d_dxx=False, d_dxdxx=False): # covariance matrix scale = positive(self._scale) r = (x[:, None, ]-xx[None, ])/scale cov = 1.0/(1.0+(r**2).sum(dim=-1)) # derivatives _dx = _dxx = _dxdxx = None if d_dx or d_dxx: a = (-2*r*cov[..., None]**2/scale) * self.diag() if d_dx: _dx = a if d_dxx: _dxx = -a if d_dxdxx: _dxdxx = -(8*r[..., None, :]*r[..., None]*cov[..., None, None] - 2*torch.eye(scale.size(0)) )*cov[..., None, None]**2/(scale[None, ]*scale[:, None]) * self.diag() cov = self.diag()*cov return cov, _dx, _dxx, _dxdxx
def extra_repr(self): print('\nRBF parameters: \nscale: {}\nvariance: {}\n'.format( positive(self._scale).data, positive(self._variance).data))
def diag(self): return positive(self._variance)
def noise(self): return 0. if self._noise is None else positive(self._noise)
def signal(self): return 1. if self._signal is None else positive(self._signal)
def scales(self): return positive(self._scales)
def beta(self): return positive(self._beta)
def extra_repr(self): print('\nSGPR:\nnoise: {}'.format(positive(self._noise))) print('mean function used: constant {}\n'.format(self.mean))