Пример #1
0
    def predict(self, xc, yc, xt, num_samples=None, return_base=False):
        with torch.no_grad():
            bxc, byc = SWR(xc, yc, num_samples=num_samples)
            sxc, syc = stack(xc, num_samples), stack(yc, num_samples)

            encoded = self.encode(bxc, byc, sxc)
            py_res = self.dec(encoded, sxc)

            mu, sigma = py_res.mean, py_res.scale
            res = SWR((syc - mu)/sigma).detach()
            res = (res - res.mean(-2, keepdim=True))

            bxc = sxc
            byc = mu + sigma * res

        encoded_base = self.encode(xc, yc, xt)

        sxt = stack(xt, num_samples)
        encoded_bs = self.encode(bxc, byc, sxt)

        py = self.dec(stack(encoded_base, num_samples),
                sxt, ctx=encoded_bs)

        if self.training or return_base:
            py_base = self.dec(encoded_base, xt)
            return py_base, py
        else:
            return py
Пример #2
0
 def predict(self, xc, yc, xt, z=None, num_samples=None):
     theta = stack(self.denc(xc, yc, xt), num_samples)
     if z is None:
         pz = self.lenc(xc, yc)
         z = pz.rsample() if num_samples is None \
                 else pz.rsample([num_samples])
     z = stack(z, xt.shape[-2], -2)
     encoded = torch.cat([theta, z], -1)
     return self.dec(encoded, stack(xt, num_samples))
Пример #3
0
    def forward(self, batch, num_samples=None, reduce_ll=True):
        outs = AttrDict()
        if self.training:
            pz = self.lenc(batch.xc, batch.yc)
            qz = self.lenc(batch.x, batch.y)
            z = qz.rsample() if num_samples is None else \
                    qz.rsample([num_samples])
            py = self.predict(batch.xc,
                              batch.yc,
                              batch.x,
                              z=z,
                              num_samples=num_samples)

            if num_samples > 1:
                # K * B * N
                recon = py.log_prob(stack(batch.y, num_samples)).sum(-1)
                # K * B
                log_qz = qz.log_prob(z).sum(-1)
                log_pz = pz.log_prob(z).sum(-1)

                # K * B
                log_w = recon.sum(-1) + log_pz - log_qz

                outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2]
            else:
                outs.recon = py.log_prob(batch.y).sum(-1).mean()
                outs.kld = kl_divergence(qz, pz).sum(-1).mean()
                outs.loss = -outs.recon + outs.kld / batch.x.shape[-2]

        else:
            py = self.predict(batch.xc,
                              batch.yc,
                              batch.x,
                              num_samples=num_samples)
            if num_samples is None:
                ll = py.log_prob(batch.y).sum(-1)
            else:
                y = torch.stack([batch.y] * num_samples)
                if reduce_ll:
                    ll = logmeanexp(py.log_prob(y).sum(-1))
                else:
                    ll = py.log_prob(y).sum(-1)
            num_ctx = batch.xc.shape[-2]

            if reduce_ll:
                outs.ctx_ll = ll[..., :num_ctx].mean()
                outs.tar_ll = ll[..., num_ctx:].mean()
            else:
                outs.ctx_ll = ll[..., :num_ctx]
                outs.tar_ll = ll[..., num_ctx:]

        return outs
Пример #4
0
 def encode(self, xc, yc, xt, mask=None):
     encoded = torch.cat(
         [self.enc1(xc, yc, mask=mask),
          self.enc2(xc, yc, mask=mask)], -1)
     return stack(encoded, xt.shape[-2], -2)