def U4(Z, small=False): Z = Z * 2 if small: mean = varify( np.array([[-2., 0.], [2., 0.], [0., 2.], [0., -2.]], dtype='float32')) lv = Variable(np.log(torch.ones(1) * 0.2)) else: mean = varify( np.array([[-5., 0.], [5., 0.], [0., 5.], [0., -5.]], dtype='float32')) lv = Variable(np.log(torch.ones(1) * 1.5)) if cuda: mean = mean.cuda() lv = lv.cuda() d1 = log_normal(Z, mean[None, 0, :], lv).sum(1) + np.log(0.1) d2 = log_normal(Z[:, :], mean[None, 1, :], lv).sum(1) + np.log(0.3) d3 = log_normal(Z[:, :], mean[None, 2, :], lv).sum(1) + np.log(0.4) d4 = log_normal(Z[:, :], mean[None, 3, :], lv).sum(1) + np.log(0.2) return logsumexp( torch.cat([d1[:, None], d2[:, None], d3[:, None], d4[:, None]], 1), 1) + 2.5
[transforms.ToTensor(), transforms_.binarize()]) train_loader = torch.utils.data.DataLoader(datasets.MNIST( droot + '/mnist', download=True, train=True, transform=ds_transforms), batch_size=batch_size, shuffle=True) enc = aes.BinaryLinear(784, zdim) dec = aes.BinaryLinear(zdim, 784) prior = aes.BinaryPrior(zdim) iib = nn.parameter.Parameter(torch.zeros(1) - 200) optim1 = optim.Adam(chain(dec.parameters(), prior.parameters(), [iib]), lr=lr1 / float(nmc)) optim2 = optim.Adam(chain(enc.parameters()), lr=lr2 / float(nmc)) zero = utils.varify(np.zeros(1).astype('float32')) def ELBO(x): z = enc.sample(x) px_z = dec.evaluate(z, x) qz_x = enc.evaluate(x, z) pz = prior.evaluate(z) elbo = px_z + pz - qz_x.detach() return elbo, qz_x def get_grad(x, multiply=1): n = x.size(0) x = x.repeat([multiply, 1]) elbo, q = ELBO(x) reinforce(elbo, q, idb=None, iib=iib) iwlb = utils.log_mean_exp(elbo.view(multiply, n).permute(1, 0), 1)
loss.backward() self.optim.step() if ((it + 1) % 10) == 0: print 'Epoch: [%2d] [%4d/%4d] loss: %.8f' % \ (epoch+1, it+1, self.data_loader.dataset.__len__() // 32, loss.data[0]) self.mdl.randomize() mdl = model() mdl.train() spl = utils.varify(np.random.randn(64,784).astype('float32')) ranks = (mdl.mdl.rx) ind = np.argsort(ranks) for i in range(784): out = mdl.mdl(spl) spl[:,ind[i]] = torch.bernoulli(nn_.sigmoid(out[:,ind[i]])) plt.imshow(nn_.sigmoid(out[56]).data.numpy().reshape(28,28), cmap='gray')
out = nn_.sigmoid(self.mdl((x, 0))[0]).permute(0, 3, 1, 2) loss = utils.bceloss(out, x).sum(1).sum(1).sum(1).mean() loss.backward() self.optim.step() if ((it + 1) % 100) == 0: print 'Epoch: [%2d] [%4d/%4d] loss: %.8f' % \ (epoch+1, it+1, self.data_loader.dataset.__len__() // 32, loss.data[0]) mdl = model() mdl.train() n = 16 spl = utils.varify(np.random.randn(n, 1, 28, 28).astype('float32')) spl.volative = True mdl.mdl = mdl.mdl.eval() for i in range(0, 28): for j in range(28): out, _ = mdl.mdl((spl, 0)) out = out.permute(0, 3, 1, 2) proba = nn_.sigmoid(out[:, 0, i, j]) spl.data[:, 0, i, j] = torch.bernoulli(proba).data #unif = torch.zeros_like(proba) #unif.data.uniform_(0,1) #spl[:,0,i,j] = torch.ge(proba,unif).float() #plt.imshow(nn_.sigmoid(out[3,0]).data.numpy().reshape(28,28), cmap='gray')
if __name__ == '__main__': np.random.seed(30) n = 20 h = 100 X = np.random.rand(n) * 8 - 4 Y = X**3 + np.random.randn(n) * 3 plt.scatter(X,Y) X = X.astype('float32').reshape(n,1) Y = Y.astype('float32').reshape(n,1) X_ = varify(X) Y_ = varify(Y) model = nn.Sequential( bnn.sWNLinear(1,h), nn.ELU(), bnn.sWNLinear(h,1)) bnn.params.merged_sampler.add_common_flowlayer( lambda dim: nn_.SequentialFlow( flows.IAF_DSF(dim, 512, 1, 2, num_ds_dim=16), flows.FlipFlow(1), flows.IAF_DSF(dim, 512, 1, 2, num_ds_dim=16)) )