def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logits, dim=1) # (batch, y_dim) p_y = 1 / 10 * torch.ones_like(y_prob) kl_y = torch.mean(ut.kl_cat(y_prob, y_logprob, torch.log(p_y)), dim=0) batch_size = x.shape[0] # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) x = ut.duplicate(x, self.y_dim) qm, qv = self.enc.encode(x, y) z = ut.sample_gaussian(qm, qv) recon_logits = self.dec.decode(z, y) p_x_given_yz = ut.log_bernoulli_with_logits(x, recon_logits) p_x_given_yz = p_x_given_yz.reshape(self.y_dim, batch_size).transpose( 0, 1) #[batch, 10] rec = -torch.mean(torch.sum(p_x_given_yz * y_prob, dim=1), dim=0) kl_z_over_xy = ut.kl_normal(qm, qv, self.z_prior_m, self.z_prior_v) kl_z_over_xy = kl_z_over_xy.reshape(self.y_dim, batch_size).transpose(0, 1) kl_z = torch.mean(torch.sum(kl_z_over_xy * y_prob, dim=1), dim=0) nelbo = rec + kl_y + kl_z ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logits, dim=1) # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) x = ut.duplicate(x, self.y_dim) # sample z from x and y qm, qv = self.enc.encode(x, y) z = ut.sample_gaussian(qm, qv) # compute kl x_logits = self.dec.decode(z, y) kl_y = ut.kl_cat(y_prob, y_logprob, np.log(1.0 / self.y_dim)) kl_z = ut.kl_normal(qm, qv, self.z_prior[0], self.z_prior[1]) rec_loss = -ut.log_bernoulli_with_logits(x, x_logits) # (y_dim * batch) # Compute the expected reconstruction and kl base on the distribution q(y|x), q(y,z|x) rec_loss_y = (y_prob.t() * rec_loss.reshape(self.y_dim, -1)).sum(0) kl_z_y = (y_prob.t() * kl_z.reshape(self.y_dim, -1)).sum(0) # Reduce to means kl_y, kl_z, rec = kl_y.mean(), kl_z_y.mean(), rec_loss_y.mean() nelbo = rec + kl_z + kl_y ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logprob, dim=1) # (batch, y_dim) # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) x = ut.duplicate(x, self.y_dim) # # Generate samples. qm, qv = self.enc.encode(x, y) z_sample = ut.sample_gaussian(qm, qv) xprime = self.dec.decode(z_sample, y) # # Compute loss. y_prior = torch.ones_like(y_logprob) / self.y_dim kl_y = ut.kl_cat(y_prob, y_logprob, y_prior) # # Data is duplicated in a way to make the batch dimension second. kl_z = ut.kl_normal(qm, qv, self.z_prior_m, self.z_prior_v).view(self.y_dim, -1) rec = -ut.log_bernoulli_with_logits(x, xprime).view(self.y_dim, -1) # # Swap axis where the probabilitiees are to match the new batch dimensions. nelbo = kl_y + (y_prob.t() * (kl_z + rec)).sum(0) nelbo = nelbo.mean() # Test set classification accuracy: 0.8104000091552734 ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_elbo_bound_gumbel(self, x, tau): """ Gumbel-softmax version. Not slated for release. """ y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = F.softmax(y_logits, dim=1) y = ut.gumbel_softmax(y_logits, tau) m, v = self.enc.encode(x, y) z = ut.sample_gaussian(m, v) x_logits = self.dec.decode(z, y) kl_y = ut.kl_cat(y_prob, y_logprob, np.log(1.0 / self.y_dim)).mean() kl_z = ut.kl_normal(m, v, self.z_prior[0], self.z_prior[1]).mean() rec = -ut.log_bernoulli_with_logits(x, x_logits).mean() nelbo = kl_y + kl_z + rec return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ N_batches, dims = x.size() y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logprob, dim=1) # (batch, y_dim) # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) x = ut.duplicate(x, self.y_dim) q_mu, q_var = self.enc.encode(x, y) z_samp = ut.sample_gaussian(q_mu, q_var) logits = self.dec.decode(z_samp, y) rec_ls = -ut.log_bernoulli_with_logits(x, logits) rec = torch.mean( torch.sum(y_prob * rec_ls.reshape(N_batches, -1), dim=1)) kl_y = torch.mean( ut.kl_cat(y_prob, y_logprob, torch.log(torch.ones_like(y_prob) / self.y_dim))) kl_z_ls = ut.kl_normal(q_mu, q_var, torch.zeros_like(q_mu), torch.ones_like(q_var)) kl_z = torch.mean( torch.sum(y_prob * kl_z_ls.reshape(N_batches, -1), dim=1)) nelbo = kl_z + kl_y + rec ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec
def negative_elbo_bound(self, x): """ Computes the Evidence Lower Bound, KL and, Reconstruction costs Args: x: tensor: (batch, dim): Observations Returns: nelbo: tensor: (): Negative evidence lower bound kl: tensor: (): ELBO KL divergence to prior rec: tensor: (): ELBO Reconstruction term """ ################################################################################ # TODO: Modify/complete the code here # Compute negative Evidence Lower Bound and its KL_Z, KL_Y and Rec decomposition # # To assist you in the vectorization of the summation over y, we have # the computation of q(y | x) and some tensor tiling code for you. # # Note that nelbo = kl_z + kl_y + rec # # Outputs should all be scalar ################################################################################ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') y_logits = self.cls.classify(x) y_logprob = F.log_softmax(y_logits, dim=1) y_prob = torch.softmax(y_logprob, dim=1) # (batch, y_dim) # Duplicate y based on x's batch size. Then duplicate x # This enumerates all possible combination of x with labels (0, 1, ..., 9) y = np.repeat(np.arange(self.y_dim), x.size(0)) y = x.new(np.eye(self.y_dim)[y]) #1000,10. 0,100,200 dupe x = ut.duplicate(x, self.y_dim) #1000,784. 0,100,200 dupe #100x10 y_prior = torch.tensor([0.1]).expand_as(y_prob).to(device) y_logprior = torch.log(y_prior) #(batch size,) kl_ys = ut.kl_cat(y_prob, y_logprob, y_logprior) kl_y = torch.mean(kl_ys) #1000 x 64. Still 0,100,200 corresponding... zqm, zqv = self.enc.encode(x, y) zpm = self.z_prior_m.expand_as(zqm) zpv = self.z_prior_v.expand_as(zqv) #so the zpm, zpv go as x quickly, y slowly #equivalent to y being the 0th dimension #(batch_size * y_dim,) kl_zs_flat = ut.kl_normal(zqm, zqv, zpm, zpv) kl_zs = kl_zs_flat.reshape(10,100).t() kl_zs_weighted = kl_zs * y_prob batch_kl_zs = kl_zs_weighted.sum(1) kl_z = batch_kl_zs.mean() #1000 x 64 z = ut.sample_gaussian(zqm, zqv) #1000 x 784 probs = self.dec.decode(z, y) #(batch_size * y_dim,) recs_flat = -1.0 * ut.log_bernoulli_with_logits(x, probs) recs = recs_flat.reshape(10,100).t() recs_weighted = recs * y_prob batch_recs = recs_weighted.sum(1) rec = batch_recs.mean() nelbos = kl_ys + batch_kl_zs + batch_recs nelbo = torch.mean(nelbos) ################################################################################ # End of code modification ################################################################################ return nelbo, kl_z, kl_y, rec