def messenger_log_prob(self, m0, m1, tp, wtp): log_p = 0 log_p += utils.log_bernoulli(m0, self.message_prob) log_p += utils.log_bernoulli(m1, self.message_prob) log_p += utils.log_bernoulli(tp, 0.5) log_p += utils.log_bernoulli(wtp, 0.5) return log_p
def intermediate_dist(t, z, mean, logvar, zeros, batch): logp1 = lognormal(z, mean, logvar) #[P,B] log_prior = lognormal(z, zeros, zeros) #[P,B] log_likelihood = log_bernoulli(model.decode(z), batch) logpT = log_prior + log_likelihood log_intermediate_2 = (1-float(t))*logp1 + float(t)*logpT return log_intermediate_2
def forward(self, x, k, warmup=1.): self.B = x.size()[0] #batch size self.zeros = Variable( torch.zeros(self.B, self.z_size).type(self.dtype)) self.logposterior = lambda aa: lognormal( aa, self.zeros, self.zeros) + log_bernoulli(self.decode(aa), x) z, logqz = self.q_dist.forward(k, x, self.logposterior) logpxz = self.logposterior(z) #Compute elbo elbo = logpxz - (warmup * logqz) #[P,B] if k > 1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] logpxz = torch.mean(logpxz) #[1] logqz = torch.mean(logqz) return elbo, logpxz, logqz
def predictive_elbo(self, x, k, s): # No pW or qW self.B = x.size()[0] #batch size # self.k = k #number of z samples aka particles P # self.s = s #number of W samples elbo1s = [] for i in range(s): Ws, logpW, logqW = self.sample_W() #_ , [1], [1] mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B] x_hat = self.decode(Ws, z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] # elbo1 = elbo1 #+ (logpW - logqW)*.00000001 #[B], logp(x|W)p(w)/q(w) elbo1s.append(elbo) elbo1s = torch.stack(elbo1s) #[S,B] if s>1: max_ = torch.max(elbo1s, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B] elbo = torch.mean(elbo1s) #[1] return elbo#, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
def forward3_prior(self, x, k): self.B = x.size()[0] #batch size self.zeros = Variable( torch.zeros(self.B, self.z_size).type(self.dtype)) self.logposterior = lambda aa: log_bernoulli(self.decode( aa), x) #+ lognormal(aa, self.zeros, self.zeros) # z, logqz = self.q_dist.forward(k, x, self.logposterior) z = Variable( torch.FloatTensor(k, self.B, self.z_size).normal_().type( self.dtype)) #[P,B,Z] logpxz = self.logposterior(z) #Compute elbo elbo = logpxz #- logqz #[P,B] if k > 1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] # logpxz = torch.mean(logpxz) #[1] # logqz = torch.mean(logqz) return elbo #, logpxz, logqz
def forward(self, x, k=1): self.k = k self.B = x.size()[0] mu, logvar = self.encode(x) z, logpz, logqz = self.sample(mu, logvar, k=k) x_hat, logpW, logqW = self.decode(z) logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz + (logpW - logqW) * .00000001 #[P,B] if k > 1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] #for printing logpx = torch.mean(logpx) logpz = torch.mean(logpz) logqz = torch.mean(logqz) self.x_hat_sigmoid = F.sigmoid(x_hat) return elbo, logpx, logpz, logqz, logpW, logqW
def forward(self, x, k): self.B = x.size()[0] #batch size #Encode mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B] #Decode x_hat = self.decode(z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] #Compute elbo elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] logpx = torch.mean(logpx) logpz = torch.mean(logpz) logqz = torch.mean(logqz) return elbo, logpx, logpz, logqz
def forward(self, x, k=1): self.k = k self.B = x.size()[0] mu, logvar = self.encode(x) z, logpz, logqz = self.sample(mu, logvar, k=k) x_hat, logpW, logqW = self.decode(z) logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz + (logpW - logqW)*.00000001 #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] #for printing logpx = torch.mean(logpx) logpz = torch.mean(logpz) logqz = torch.mean(logqz) self.x_hat_sigmoid = F.sigmoid(x_hat) return elbo, logpx, logpz, logqz, logpW, logqW
def forward(self, x, k=1): self.B = x.size()[0] mu, logvar = self.encode(x) z, logpz, logqz = self.sample(mu, logvar, k=k) z = z.view(-1, self.z_size) #[PB,Z] x_hat = self.decode(z) # print x_hat.size() x_hat = x_hat.view(k, self.B, self.x_size) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx # + logpz - logqz #[P,B] if k > 1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] #for printing logpx = torch.mean(logpx) logpz = torch.mean(logpz) logqz = torch.mean(logqz) self.x_hat_sigmoid = F.sigmoid(x_hat) return elbo, logpx, logpz, logqz
def intermediate_dist(t, z, mean, logvar, zeros, batch): logp1 = lognormal(z, mean, logvar) #[P,B] log_prior = lognormal(z, zeros, zeros) #[P,B] log_likelihood = log_bernoulli(model.decode(z), batch) logpT = log_prior + log_likelihood log_intermediate_2 = (1 - float(t)) * logp1 + float(t) * logpT return log_intermediate_2
def predictive_elbo(self, x, k, s): # No pW or qW self.B = x.size()[0] #batch size # self.k = k #number of z samples aka particles P # self.s = s #number of W samples elbo1s = [] for i in range(s): Ws, logpW, logqW = self.sample_W() #_ , [1], [1] mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B] x_hat = self.decode(Ws, z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k > 1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] # elbo1 = elbo1 #+ (logpW - logqW)*.00000001 #[B], logp(x|W)p(w)/q(w) elbo1s.append(elbo) elbo1s = torch.stack(elbo1s) #[S,B] if s > 1: max_ = torch.max(elbo1s, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B] elbo = torch.mean(elbo1s) #[1] return elbo #, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
def return_current_state(self, x, a, k): self.B = x.size()[0] self.T = x.size()[1] self.k = k a = a.float() x = x.float() states = [] prev_z = Variable(torch.zeros(k, self.B, self.z_size)) # prev_z = torch.zeros(k, self.B, self.z_size) for t in range(self.T): current_x = x[:,t] #[B,X] current_a = a[:,t] #[B,A] #Encode mu, logvar = self.encode(current_x, current_a, prev_z) #[P,B,Z] #Sample z, logqz = self.sample(mu, logvar) #[P,B,Z], [P,B] #Decode x_hat = self.decode(z) #[P,B,X] logpx = log_bernoulli(x_hat, current_x) #[P,B] #Transition/Prior prob prior_mean, prior_log_var = self.transition_prior(prev_z, current_a) #[P,B,Z] logpz = lognormal(z, prior_mean, prior_log_var) #[P,B] prev_z = z states.append(z) return states
def forward(self, x, k=1): self.B = x.size()[0] mu, logvar = self.encode(x) z, logpz, logqz = self.sample(mu, logvar, k=k) #[P,B,Z] x_hat = self.decode(z) #[PB,X] x_hat = x_hat.view(k, self.B, -1) # print x_hat.size() # print x_hat.size() # print x.size() logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] #for printing logpx = torch.mean(logpx) logpz = torch.mean(logpz) logqz = torch.mean(logqz) self.x_hat_sigmoid = F.sigmoid(x_hat) return elbo, logpx, logpz, logqz
def forward(self, x, k): self.B = x.size()[0] #batch size #Encode mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample(mu, logvar, k=k) #[P,B,Z], [P,B] #Decode x_hat = self.decode(z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] #Compute elbo elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] logpx = torch.mean(logpx) logpz = torch.mean(logpz) logqz = torch.mean(logqz) return elbo, logpx, logpz, logqz
def logposterior_func(self, x, z): self.B = x.size()[0] #batch size self.zeros = Variable(torch.zeros(self.B, self.z_size).type(self.dtype)) # print (x) #[B,X] # print(z) #[P,Z] z = Variable(z).type(self.dtype) z = z.view(-1,self.B,self.z_size) return lognormal(z, self.zeros, self.zeros) + log_bernoulli(self.decode(z), x)
def evaluate(self, variables, log = False): """ >>> m = SpeakerModel(0.1, 0.25) >>> np.abs(m.evaluate((1,0)) - 0.1 * 0.75) < 1e-8 True >>> np.abs(np.exp(m.evaluate((1,1), True)) - 0.1 * 0.25) < 1e-8 True """ log_p = 0 log_p += utils.log_bernoulli(variables[0], self.delta) log_p += utils.log_bernoulli(variables[1], self.p) if log: return log_p else: return np.exp(log_p)
def evaluate(self, variables, log=False): """ >>> m = SpeakerModel(0.1, 0.25) >>> np.abs(m.evaluate((1,0)) - 0.1 * 0.75) < 1e-8 True >>> np.abs(np.exp(m.evaluate((1,1), True)) - 0.1 * 0.25) < 1e-8 True """ log_p = 0 log_p += utils.log_bernoulli(variables[0], self.delta) log_p += utils.log_bernoulli(variables[1], self.p) if log: return log_p else: return np.exp(log_p)
def logposterior_func(self, x, z): self.B = x.size()[0] #batch size self.zeros = Variable(torch.zeros(self.B, self.z_size).type(self.dtype)) # print (x) #[B,X] # print(z) #[P,Z] z = Variable(z).type(self.dtype) z = z.view(-1,self.B,self.z_size) return lognormal(z, self.zeros, self.zeros) + log_bernoulli(self.generator.decode(z), x)
def sample_q(self, x, k): self.B = x.size()[0] #batch size self.zeros = Variable(torch.zeros(self.B, self.z_size).type(self.dtype)) self.logposterior = lambda aa: lognormal(aa, self.zeros, self.zeros) + log_bernoulli(self.generator.decode(aa), x) z, logqz = self.q_dist.forward(k=k, x=x, logposterior=self.logposterior) return z
def sample_q(self, x, k): self.B = x.size()[0] #batch size self.zeros = Variable(torch.zeros(self.B, self.z_size).type(self.dtype)) self.logposterior = lambda aa: lognormal(aa, self.zeros, self.zeros) + log_bernoulli(self.decode(aa), x) z, logqz = self.q_dist.forward(k=k, x=x, logposterior=self.logposterior) return z
def plot_isocontours_expected_W(ax, model, samp, xlimits=[-6, 6], ylimits=[-6, 6], numticks=101, cmap=None, alpha=1., legend=False): x = np.linspace(*xlimits, num=numticks) y = np.linspace(*ylimits, num=numticks) X, Y = np.meshgrid(x, y) # zs = np.exp(func(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T)) aaa = torch.from_numpy(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T).type(torch.FloatTensor) n_Ws = 10 for i in range(n_Ws): if i % 10 ==0: print i Ws, logpW, logqW = model.sample_W() #_ , [1], [1] func = lambda zs: log_bernoulli(model.decode(Ws, Variable(torch.unsqueeze(zs,1))), Variable(torch.unsqueeze(samp,0)))+ Variable(torch.unsqueeze(lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)), 1)) bbb = func(aaa) zs = bbb.data.numpy() # zs = np.exp(zs/784) # print zs.shape max_ = np.max(zs) # print max_ zs_sum = np.log(np.sum(np.exp(zs-max_))) + max_ zs = zs - zs_sum zs = np.exp(zs) if i ==0: sum_of_all = zs else: sum_of_all = sum_of_all + zs avg_of_all = sum_of_all / n_Ws Z = avg_of_all.reshape(X.shape) # Z = zs.view(X.shape) # Z=Z.numpy() cs = plt.contour(X, Y, Z, cmap=cmap, alpha=alpha) if legend: nm, lbl = cs.legend_elements() plt.legend(nm, lbl, fontsize=4) ax.set_yticks([]) ax.set_xticks([]) plt.gca().set_aspect('equal', adjustable='box')
def forward(self, x, k=1, warmup=1.): self.B = x.size()[0] #batch size self.zeros = Variable( torch.zeros(self.B, self.z_size).type(self.dtype)) #[B,Z] self.logposterior = lambda aa: lognormal( aa, self.zeros, self.zeros) + log_bernoulli(self.decode(aa), x) z, logqz = self.q_dist.forward(k, x, self.logposterior) # [PB,Z] # z = z.view(-1,self.z_size) logpxz = self.logposterior(z) #Compute elbo elbo = logpxz - logqz #[P,B] if k > 1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] logpxz = torch.mean(logpxz) #[1] logqz = torch.mean(logqz) # mu, logvar = self.encode(x) # z, logpz, logqz = self.sample(mu, logvar, k=k) #[P,B,Z] # x_hat = self.decode(z) #[PB,X] # x_hat = x_hat.view(k, self.B, -1) # logpx = log_bernoulli(x_hat, x) #[P,B] # elbo = logpx + warmup*(logpz - logqz) #[P,B] # if k>1: # max_ = torch.max(elbo, 0)[0] #[B] # elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] # elbo = torch.mean(elbo) #[1] # #for printing # logpx = torch.mean(logpx) # logpz = torch.mean(logpz) # logqz = torch.mean(logqz) # self.x_hat_sigmoid = F.sigmoid(x_hat) # return elbo, logpx, logpz, logqz return elbo, logpxz, logqz
def forward(self, x, k, s): self.B = x.size()[0] #batch size # self.k = k #number of z samples aka particles P # self.s = s #number of W samples elbo1s = [] logprobs = [[] for _ in range(5)] for i in range(s): Ws, logpW, logqW = self.sample_W() #_ , [1], [1] mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B] x_hat = self.decode(Ws, z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k > 1: max_ = torch.max(elbo, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = elbo + (logpW * .000001) - (logqW * self.qW_weight ) #[B], logp(x|W)p(w)/q(w) elbo1s.append(elbo) logprobs[0].append(torch.mean(logpx)) logprobs[1].append(torch.mean(logpz)) logprobs[2].append(torch.mean(logqz)) logprobs[3].append(torch.mean(logpW)) logprobs[4].append(torch.mean(logqW)) elbo1s = torch.stack(elbo1s) #[S,B] if s > 1: max_ = torch.max(elbo1s, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B] elbo = torch.mean(elbo1s) #[1] #for printing # logpx = torch.mean(logpx) # logpz = torch.mean(logpz) # logqz = torch.mean(logqz) # self.x_hat_sigmoid = F.sigmoid(x_hat) logprobs2 = [torch.mean(torch.stack(aa)) for aa in logprobs] return elbo, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[ 3], logprobs2[4]
def forward(self, x, k, s): self.B = x.size()[0] #batch size # self.k = k #number of z samples aka particles P # self.s = s #number of W samples elbo1s = [] logprobs = [[] for _ in range(5)] for i in range(s): Ws, logpW, logqW = self.sample_W() #_ , [1], [1] mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B] x_hat = self.decode(Ws, z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = elbo + (logpW*.000001) - (logqW*self.qW_weight) #[B], logp(x|W)p(w)/q(w) elbo1s.append(elbo) logprobs[0].append(torch.mean(logpx)) logprobs[1].append(torch.mean(logpz)) logprobs[2].append(torch.mean(logqz)) logprobs[3].append(torch.mean(logpW)) logprobs[4].append(torch.mean(logqW)) elbo1s = torch.stack(elbo1s) #[S,B] if s>1: max_ = torch.max(elbo1s, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B] elbo = torch.mean(elbo1s) #[1] #for printing # logpx = torch.mean(logpx) # logpz = torch.mean(logpz) # logqz = torch.mean(logqz) # self.x_hat_sigmoid = F.sigmoid(x_hat) logprobs2 = [torch.mean(torch.stack(aa)) for aa in logprobs] return elbo, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
def forward(self, x, k=1, warmup_const=1.): x = x.repeat(k, 1) mu, logvar = self.encode(x) z, logpz, logqz = self.sample(mu, logvar) x_logits = self.decode(z) logpx = utils.log_bernoulli(x_logits, x) elbo = logpx + logpz - warmup_const * logqz # need correction for Tensor.repeat elbo = utils.logmeanexp(elbo.view(k, -1).transpose(0, 1)) elbo = torch.mean(elbo) logpx = torch.mean(logpx) logpz = torch.mean(logpz) logqz = torch.mean(logqz) return elbo, logpx, logpz, logqz
def objective(self, x, encoder, decoder): ''' elbo: [1] ''' #Encode z_mean_logvar = encoder.model(x) #[B,Z*2] z_mean = tf.slice(z_mean_logvar, [0,0], [self.batch_size, self.z_size]) #[B,Z] z_logvar = tf.slice(z_mean_logvar, [0,self.z_size], [self.batch_size, self.z_size]) #[B,Z] # #Sample z # eps = tf.random_normal((self.batch_size, self.n_z_particles, self.z_size), 0, 1, dtype=tf.float32) #[B,P,Z] # z = tf.add(z_mean, tf.multiply(tf.sqrt(tf.exp(z_logvar)), eps)) #uses broadcasting,[B,P,Z] # Sample z [B,Z] eps = tf.random_normal((self.batch_size, self.z_size), 0, 1, dtype=tf.float32, seed=self.rs) #[B,Z] z = tf.add(z_mean, tf.multiply(tf.sqrt(tf.exp(z_logvar)), eps)) #[B,Z] # [B] log_pz = log_normal(z, tf.zeros([self.batch_size, self.z_size]), tf.log(tf.ones([self.batch_size, self.z_size]))) log_qz = log_normal(z, z_mean, z_logvar) # Decode [B,P,X], [P], [P] x_mean, log_pW, log_qW = decoder.model(z) # Likelihood [B,P] log_px = log_bernoulli(x, x_mean) # Objective self.log_px = tf.reduce_mean(log_px) #over batch + W_particles self.log_pz = tf.reduce_mean(log_pz) #over batch self.log_qz = tf.reduce_mean(log_qz) #over batch self.log_pW = tf.reduce_mean(log_pW) #W_particles self.log_qW = tf.reduce_mean(log_qW) #W_particles elbo = self.log_px + self.log_pz - self.log_qz + self.batch_frac*(self.log_pW - self.log_qW) self.z_elbo = self.log_px + self.log_pz - self.log_qz return elbo
def evaluate(self, variables, log = False): """ >>> m = ThomasModel(0.1, 0.25) >>> np.abs(m.evaluate((1,0,1,0,0)) - 0.1 * 0.75 * 0.25 * 0.5 * 0.5) < 1e-8 True >>> np.abs(np.exp(m.evaluate((1,0,1,0,0), True)) - 0.1 * 0.75 * 0.25 * 0.5 * 0.5) < 1e-8 True >>> m = ThomasModel(0.1, 0.25, 2) >>> np.abs(np.exp(m.evaluate((1,0,1,0,0,1,1,0,0), True)) - 0.1 * 0.75 * 0.25 * 0.5 * 0.5 * 0.25 * 0.25 * 0.5 * 0.5) < 1e-8 True """ log_p = 0 log_p += utils.log_bernoulli(variables[0], self.dog_day_prob) for i in range(self.n_messengers): log_p += self.messenger_log_prob(*variables[(1 + i*self.message_length):(1 + (i+1)*self.message_length)]) if log: return log_p else: return np.exp(log_p)
def forward2(self, x, k): self.B = x.size()[0] #batch size self.zeros = Variable(torch.zeros(self.B, self.z_size).type(self.dtype)) self.logposterior = lambda aa: lognormal(aa, self.zeros, self.zeros) + log_bernoulli(self.decode(aa), x) z, logqz = self.q_dist.forward(k, x, self.logposterior) logpxz = self.logposterior(z) #Compute elbo elbo = logpxz - logqz #[P,B] # if k>1: # max_ = torch.max(elbo, 0)[0] #[B] # elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] logpxz = torch.mean(logpxz) #[1] logqz = torch.mean(logqz) return elbo, logpxz, logqz
def evaluate(self, variables, log = False): """ >>> m = ExtendedThomasModel(0.1, 0.25) >>> np.abs(m.evaluate((1,0,1,0,0,0)) - 0.1 * 0.75 * 0.25 * 0.5 * 0.5 * 0.5) < 1e-8 True >>> np.abs(np.exp(m.evaluate((1,0,1,0,0,0), True)) - 0.1 * 0.75 * 0.25 * 0.5 * 0.5 * 0.5) < 1e-8 True >>> m = ExtendedThomasModel(0.1, 0.25, 2) >>> np.abs(np.exp(m.evaluate((1,0,1,0,0,0,1,1,0,0,0), True)) - 0.1 * 0.75 * 0.25 * 0.5 * 0.5 * 0.25 * 0.25 * 0.5 * 0.5 * 0.5 * 0.5) < 1e-8 True """ log_p = 0 log_p += utils.log_bernoulli(variables[0], self.dog_day_prob) for i in range(self.n_messengers): log_p += self.messenger_log_prob(*variables[(1 + i*self.message_length):(1 + (i+1)*self.message_length)]) if log: return log_p else: return np.exp(log_p)
def forward3_prior(self, x, k): self.B = x.size()[0] #batch size self.zeros = Variable(torch.zeros(self.B, self.z_size).type(self.dtype)) self.logposterior = lambda aa: log_bernoulli(self.decode(aa), x) #+ lognormal(aa, self.zeros, self.zeros) # z, logqz = self.q_dist.forward(k, x, self.logposterior) z = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_().type(self.dtype)) #[P,B,Z] logpxz = self.logposterior(z) #Compute elbo elbo = logpxz #- logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] # logpxz = torch.mean(logpxz) #[1] # logqz = torch.mean(logqz) return elbo#, logpxz, logqz
def plot_isocontours_expected_true_posterior_ind(ax, model, data, xlimits=[-6, 6], ylimits=[-6, 6], numticks=101, cmap=None, alpha=1., legend=False, n_samps=10, cs_to_use=None): x = np.linspace(*xlimits, num=numticks) y = np.linspace(*ylimits, num=numticks) X, Y = np.meshgrid(x, y) # zs = np.exp(func(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T)) aaa = torch.from_numpy( np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T).type(torch.FloatTensor) # n_samps = n_samps if len(data) < n_samps: n_samps = len(data) for samp_i in range(n_samps): # if samp_i % 100 == 0: # print samp_i samp = data[samp_i] n_Ws = 1 for i in range(n_Ws): # if i % 10 ==0: print i # print i Ws, logpW, logqW = model.sample_W() #_ , [1], [1] func = lambda zs: log_bernoulli( model.decode(Ws, Variable(torch.unsqueeze(zs, 1))), Variable(torch.unsqueeze(samp, 0))) + Variable( torch.unsqueeze( lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)), 1)) bbb = func(aaa) zs = bbb.data.numpy() max_ = np.max(zs) zs_sum = np.log(np.sum(np.exp(zs - max_))) + max_ zs = zs - zs_sum zs = np.exp(zs) Z = zs.reshape(X.shape) if cs_to_use != None: cs = plt.contour(X, Y, Z, cmap=cmap, alpha=alpha, levels=cs_to_use.levels) else: cs = plt.contour(X, Y, Z, cmap=cmap, alpha=alpha) # if i ==0: # sum_of_all_i = zs # else: # sum_of_all_i = sum_of_all_i + zs # if samp_i ==0: # sum_of_all = sum_of_all_i # else: # sum_of_all = sum_of_all + sum_of_all_i # avg_of_all = sum_of_all / n_samps # print 'sum:', np.sum(avg_of_all) # if legend: # nm, lbl = cs.legend_elements() # plt.legend(nm, lbl, fontsize=4) ax.set_yticks([]) ax.set_xticks([]) plt.gca().set_aspect('equal', adjustable='box') return Z
# func = lambda zs: lognormal4(torch.Tensor(zs), torch.squeeze(mean.data), torch.squeeze(logvar.data)) # plot_isocontours(ax, func, cmap='Reds') #Plot prob col += 1 ax = plt.subplot2grid((rows, cols), (samp_i, col), frameon=False) Ws, logpW, logqW = model.sample_W() #_ , [1], [1] func = lambda zs: lognormal4(torch.Tensor( zs), torch.squeeze(mean.data), torch.squeeze(logvar.data)) plot_isocontours(ax, func, cmap='Reds', xlimits=xlimits, ylimits=ylimits) func = lambda zs: log_bernoulli( model.decode(Ws, Variable(torch.unsqueeze(zs, 1))), Variable(torch.unsqueeze(samp, 0))) + Variable( torch.unsqueeze( lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)), 1)) plot_isocontours2_exp_norm(ax, func, cmap='Greens', legend=legend, xlimits=xlimits, ylimits=ylimits) if samp_i == 0: ax.annotate('p(z|x,W1)', xytext=(.1, 1.1), xy=(0, 1), textcoords='axes fraction')
# if samp_i==0: ax.annotate('p(z,x|W1)', xytext=(.1, 1.1), xy=(0, 1), textcoords='axes fraction') # func = lambda zs: lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)) # plot_isocontours(ax, func, cmap='Blues', alpha=.3) # func = lambda zs: lognormal4(torch.Tensor(zs), torch.squeeze(mean.data), torch.squeeze(logvar.data)) # plot_isocontours(ax, func, cmap='Reds') #Plot prob col +=1 ax = plt.subplot2grid((rows,cols), (samp_i,col), frameon=False) Ws, logpW, logqW = model.sample_W() #_ , [1], [1] func = lambda zs: lognormal4(torch.Tensor(zs), torch.squeeze(mean.data), torch.squeeze(logvar.data)) plot_isocontours(ax, func, cmap='Reds',xlimits=xlimits,ylimits=ylimits) func = lambda zs: log_bernoulli(model.decode(Ws, Variable(torch.unsqueeze(zs,1))), Variable(torch.unsqueeze(samp,0)))+ Variable(torch.unsqueeze(lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)), 1)) plot_isocontours2_exp_norm(ax, func, cmap='Greens', legend=legend,xlimits=xlimits,ylimits=ylimits) if samp_i==0: ax.annotate('p(z|x,W1)', xytext=(.1, 1.1), xy=(0, 1), textcoords='axes fraction') func = lambda zs: lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)) plot_isocontours(ax, func, cmap='Blues', alpha=.3,xlimits=xlimits,ylimits=ylimits) # #Plot prob # col +=1 # ax = plt.subplot2grid((rows,cols), (samp_i,col), frameon=False) # Ws, logpW, logqW = model.sample_W() #_ , [1], [1] # # func = lambda zs: lognormal4(torch.Tensor(zs), torch.squeeze(mean.data), torch.squeeze(logvar.data)) # # plot_isocontours(ax, func, cmap='Reds') # # func = lambda zs: log_bernoulli(model.decode(Ws, Variable(torch.unsqueeze(zs,1))), Variable(torch.unsqueeze(samp,0)))+ Variable(torch.unsqueeze(lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)), 1)) # plot_isocontours2_exp_norm_logspace(ax, func, cmap='Greens', legend=legend)
def test_ais(model, data_x, path_to_load_variables='', batch_size=20, display_epoch=4, k=10): def intermediate_dist(t, z, mean, logvar, zeros, batch): logp1 = lognormal(z, mean, logvar) #[P,B] log_prior = lognormal(z, zeros, zeros) #[P,B] log_likelihood = log_bernoulli(model.decode(z), batch) logpT = log_prior + log_likelihood log_intermediate_2 = (1-float(t))*logp1 + float(t)*logpT return log_intermediate_2 n_intermediate_dists = 25 n_HMC_steps = 5 step_size = .1 retain_graph = False volatile_ = False requires_grad = False if path_to_load_variables != '': # model.load_state_dict(torch.load(path_to_load_variables)) model.load_state_dict(torch.load(path_to_load_variables, map_location=lambda storage, loc: storage)) print 'loaded variables ' + path_to_load_variables logws = [] data_index= 0 for i in range(len(data_x)/ batch_size): print i #AIS schedule = np.linspace(0.,1.,n_intermediate_dists) model.B = batch_size batch = data_x[data_index:data_index+batch_size] data_index += batch_size if torch.cuda.is_available(): batch = Variable(batch, volatile=volatile_, requires_grad=requires_grad).cuda() zeros = Variable(torch.zeros(model.B, model.z_size), volatile=volatile_, requires_grad=requires_grad).cuda() # [B,Z] logw = Variable(torch.zeros(k, model.B), volatile=volatile_, requires_grad=requires_grad).cuda() grad_outputs = torch.ones(k, model.B).cuda() else: batch = Variable(batch) zeros = Variable(torch.zeros(model.B, model.z_size)) # [B,Z] logw = Variable(torch.zeros(k, model.B)) grad_outputs = torch.ones(k, model.B) #Encode x mean, logvar = model.encode(batch) #[B,Z] # print mean.data.numpy().shape # fasdf #Init z z, logpz, logqz = model.sample(mean, logvar, k=k) #[P,B,Z], [P,B], [P,B] # print logpz.data.numpy().shape # fasdf for (t0, t1) in zip(schedule[:-1], schedule[1:]): # gc.collect() memReport() print t0 #Compute intermediate distribution log prob # (1-t)*logp1(z) + (t)*logpT(z) logp1 = lognormal(z, mean, logvar) #[P,B] # print z.size() # print zeros.size() log_prior = lognormal(z, zeros, zeros) #[P,B] log_likelihood = log_bernoulli(model.decode(z), batch) logpT = log_prior + log_likelihood #log pt-1(zt-1) log_intermediate_1 = (1-float(t0))*logp1 + float(t0)*logpT #log pt(zt-1) log_intermediate_2 = (1-float(t1))*logp1 + float(t1)*logpT logw += log_intermediate_2 - log_intermediate_1 #HMC if torch.cuda.is_available(): v = Variable(torch.FloatTensor(z.size()).normal_(), volatile=volatile_, requires_grad=requires_grad).cuda() else: v = Variable(torch.FloatTensor(z.size()).normal_()) v0 = v z0 = z gradients = torch.autograd.grad(outputs=log_intermediate_2, inputs=z, grad_outputs=grad_outputs, create_graph=True, retain_graph=retain_graph, only_inputs=True)[0] v = v + .5 *step_size*gradients z = z + step_size*v for LF_step in range(n_HMC_steps): # for LF_step in range(1): # print LF_step # logp1 = lognormal(z, mean, logvar) #[P,B] # log_prior = lognormal(z, zeros, zeros) #[P,B] # log_likelihood = log_bernoulli(model.decode(z), batch) # logpT = log_prior + log_likelihood # log_intermediate_2 = (1-float(t1))*logp1 + float(t1)*logpT log_intermediate_2 = intermediate_dist(t1, z, mean, logvar, zeros, batch) gradients = torch.autograd.grad(outputs=log_intermediate_2, inputs=z, grad_outputs=grad_outputs, create_graph=True, retain_graph=retain_graph, only_inputs=True)[0] v = v + step_size*gradients z = z + step_size*v # logp1 = lognormal(z, mean, logvar) #[P,B] # log_prior = lognormal(z, zeros, zeros) #[P,B] # log_likelihood = log_bernoulli(model.decode(z), batch) # logpT = log_prior + log_likelihood # log_intermediate_2 = (1-float(t1))*logp1 + float(t1)*logpT log_intermediate_2 = intermediate_dist(t1, z, mean, logvar, zeros, batch) gradients = torch.autograd.grad(outputs=log_intermediate_2, inputs=z, grad_outputs=grad_outputs, create_graph=True, retain_graph=retain_graph, only_inputs=True)[0] v = v + .5 *step_size*gradients #MH step # logp1 = lognormal(z0, mean, logvar) #[P,B] # log_prior = lognormal(z0, zeros, zeros) #[P,B] # log_likelihood = log_bernoulli(model.decode(z0), batch) # logpT = log_prior + log_likelihood # log_intermediate_2 = (1-float(t1))*logp1 + float(t1)*logpT log_intermediate_2 = intermediate_dist(t1, z0, mean, logvar, zeros, batch) logpv0 = lognormal(v0, zeros, zeros) #[P,B] hamil_0 = log_intermediate_2 + logpv0 # logp1 = lognormal(z, mean, logvar) #[P,B] # log_prior = lognormal(z, zeros, zeros) #[P,B] # log_likelihood = log_bernoulli(model.decode(z), batch) # logpT = log_prior + log_likelihood # log_intermediate_2 = (1-float(t1))*logp1 + float(t1)*logpT log_intermediate_2 = intermediate_dist(t1, z, mean, logvar, zeros, batch) logpvT = lognormal(v, zeros, zeros) #[P,B] hamil_T = log_intermediate_2 + logpvT # print hamil_T.data.numpy().shape accept_prob = torch.exp(hamil_T - hamil_0) if torch.cuda.is_available(): rand_uni = Variable(torch.FloatTensor(accept_prob.size()).uniform_(), volatile=volatile_, requires_grad=requires_grad).cuda() else: rand_uni = Variable(torch.FloatTensor(accept_prob.size()).uniform_()) accept = accept_prob > rand_uni if torch.cuda.is_available(): accept = accept.type(torch.FloatTensor).cuda() else: accept = accept.type(torch.FloatTensor) accept = accept.view(k, model.B, 1) # print accept.data.numpy().shape # print torch.mean(accept) z = (accept * z) + ((1-accept) * z0) avg_acceptance_rate = torch.mean(accept) # print avg_acceptance_rate.data.numpy() # if avg_acceptance_rate.data.numpy() > .7: # if avg_acceptance_rate > .7: if avg_acceptance_rate.cpu().data.numpy() > .7: step_size = 1.02 * step_size else: step_size = .98 * step_size if step_size < 0.0001: step_size = 0.0001 if step_size > 0.5: step_size = 0.5 #lgo sum exp max_ = torch.max(logw,0)[0] #[B] logw = torch.log(torch.mean(torch.exp(logw - max_), 0)) + max_ #[B] logws.append(torch.mean(logw.cpu()).data.numpy()) if i%display_epoch==0: print i,len(data_x)/ batch_size, np.mean(logws) return np.mean(logws)
def plot_isocontours_expected_true_posterior_ind(ax, model, data, xlimits=[-6, 6], ylimits=[-6, 6], numticks=101, cmap=None, alpha=1., legend=False, n_samps=10, cs_to_use=None): x = np.linspace(*xlimits, num=numticks) y = np.linspace(*ylimits, num=numticks) X, Y = np.meshgrid(x, y) # zs = np.exp(func(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T)) aaa = torch.from_numpy(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T).type(torch.FloatTensor) # n_samps = n_samps if len(data) < n_samps: n_samps = len(data) for samp_i in range(n_samps): if samp_i % 100 == 0: print samp_i samp = data[samp_i] n_Ws = 1 for i in range(n_Ws): # if i % 10 ==0: print i # print i Ws, logpW, logqW = model.sample_W() #_ , [1], [1] func = lambda zs: log_bernoulli(model.decode(Ws, Variable(torch.unsqueeze(zs,1))), Variable(torch.unsqueeze(samp,0)))+ Variable(torch.unsqueeze(lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)), 1)) bbb = func(aaa) zs = bbb.data.numpy() max_ = np.max(zs) zs_sum = np.log(np.sum(np.exp(zs-max_))) + max_ zs = zs - zs_sum zs = np.exp(zs) Z = zs.reshape(X.shape) if cs_to_use != None: cs = plt.contour(X, Y, Z, cmap=cmap, alpha=alpha, levels=cs_to_use.levels) else: cs = plt.contour(X, Y, Z, cmap=cmap, alpha=alpha) # if i ==0: # sum_of_all_i = zs # else: # sum_of_all_i = sum_of_all_i + zs # if samp_i ==0: # sum_of_all = sum_of_all_i # else: # sum_of_all = sum_of_all + sum_of_all_i # avg_of_all = sum_of_all / n_samps # print 'sum:', np.sum(avg_of_all) # if legend: # nm, lbl = cs.legend_elements() # plt.legend(nm, lbl, fontsize=4) ax.set_yticks([]) ax.set_xticks([]) plt.gca().set_aspect('equal', adjustable='box') return Z
def forward(self, x, a, k=1, current_state=None): ''' x: [B,T,X] a: [B,T,A] output: elbo scalar ''' self.B = x.size()[0] self.T = x.size()[1] self.k = k a = a.float() x = x.float() # log_probs = [[] for i in range(k)] # log_probs = [] logpxs = [] logpzs = [] logqzs = [] # if current_state==None: prev_z = Variable(torch.zeros(k, self.B, self.z_size)) # else: # prev_z = current_state for t in range(self.T): current_x = x[:,t] #[B,X] current_a = a[:,t] #[B,A] #Encode mu, logvar = self.encode(current_x, current_a, prev_z) #[P,B,Z] #Sample z, logqz = self.sample(mu, logvar) #[P,B,Z], [P,B] #Decode x_hat = self.decode(z) #[P,B,X] logpx = log_bernoulli(x_hat, current_x) #[P,B] #Transition/Prior prob prior_mean, prior_log_var = self.transition_prior(prev_z, current_a) #[P,B,Z] logpz = lognormal(z, prior_mean, prior_log_var) #[P,B] logpxs.append(logpx) logpzs.append(logpz) logqzs.append(logqz) # log_probs.append(logpx + logpz - logqz) prev_z = z logpxs = torch.stack(logpxs) logpzs = torch.stack(logpzs) logqzs = torch.stack(logqzs) #[T,P,B] logws = logpxs + logpzs - logqzs #[T,P,B] logws = torch.mean(logws, 0) #[P,B] # elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(logws, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(logws - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #over batch else: elbo = torch.mean(logws) # print log_probs[0] # #for printing logpx = torch.mean(logpxs) logpz = torch.mean(logpzs) logqz = torch.mean(logqzs) # self.x_hat_sigmoid = F.sigmoid(x_hat) # elbo = torch.mean(torch.stack(log_probs)) #[1] # elbo = logpx + logpz - logqz return elbo, logpx, logpz, logqz
# plot_isocontours(ax, func, cmap='Blues') # if samp_i==0: ax.annotate('Prior p(z)', xytext=(.3, 1.1), xy=(0, 1), textcoords='axes fraction') #Plot q col +=1 val = 3 ax = plt.subplot2grid((rows,cols), (samp_i,col), frameon=False) mean, logvar = model.encode(Variable(torch.unsqueeze(samp,0))) func = lambda zs: lognormal4(torch.Tensor(zs), torch.squeeze(mean.data), torch.squeeze(logvar.data)) plot_isocontours(ax, func, cmap='Reds', xlimits=[-val, val], ylimits=[-val, val]) if samp_i==0: ax.annotate('p(z)\nq(z|x)\np(z|x)', xytext=(.3, 1.1), xy=(0, 1), textcoords='axes fraction') func = lambda zs: lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)) plot_isocontours(ax, func, cmap='Blues', alpha=.3, xlimits=[-val, val], ylimits=[-val, val]) Ws, logpW, logqW = model.sample_W() #_ , [1], [1] func = lambda zs: log_bernoulli(model.decode(Ws, Variable(torch.unsqueeze(zs,1))), Variable(torch.unsqueeze(samp,0)))+ Variable(torch.unsqueeze(lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)), 1)) plot_isocontours2_exp_norm(ax, func, cmap='Greens', legend=legend, xlimits=[-val, val], ylimits=[-val, val]) # #Plot logprior # col +=1 # ax = plt.subplot2grid((rows,cols), (samp_i,col), frameon=False) # func = lambda zs: lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)) # plot_isocontoursNoExp(ax, func, cmap='Blues', legend=legend) # if samp_i==0: ax.annotate('Prior\nlogp(z)', xytext=(.3, 1.1), xy=(0, 1), textcoords='axes fraction') # #Plot logq # col +=1 # ax = plt.subplot2grid((rows,cols), (samp_i,col), frameon=False) # mean, logvar = model.encode(Variable(torch.unsqueeze(samp,0))) # func = lambda zs: lognormal4(torch.Tensor(zs), torch.squeeze(mean.data), torch.squeeze(logvar.data)) # plot_isocontoursNoExp(ax, func, cmap='Reds', legend=legend)
def forward(self, x, a, k=1, current_state=None): ''' x: [B,T,X] a: [B,T,A] output: elbo scalar ''' self.B = x.size()[0] self.T = x.size()[1] self.k = k a = a.float() x = x.float() # log_probs = [[] for i in range(k)] # log_probs = [] logpxs = [] logpzs = [] logqzs = [] weights = Variable(torch.ones(k, self.B)/k) # if current_state==None: prev_z = Variable(torch.zeros(k, self.B, self.z_size)) # else: # prev_z = current_state for t in range(self.T): current_x = x[:,t] #[B,X] current_a = a[:,t] #[B,A] #Encode mu, logvar = self.encode(current_x, current_a, prev_z) #[P,B,Z] #Sample z, logqz = self.sample(mu, logvar) #[P,B,Z], [P,B] #Decode x_hat = self.decode(z) #[P,B,X] logpx = log_bernoulli(x_hat, current_x) #[P,B] #Transition/Prior prob prior_mean, prior_log_var = self.transition_prior(prev_z, current_a) #[P,B,Z] logpz = lognormal(z, prior_mean, prior_log_var) #[P,B] log_alpha_t = logpx + logpz - logqz #[P,B] log_weights_tmp = torch.log(weights * torch.exp(log_alpha_t)) max_ = torch.max(log_weights_tmp, 0)[0] #[B] log_p_hat = torch.log(torch.sum(torch.exp(log_weights_tmp - max_), 0)) + max_ #[B] # p_hat = torch.sum(alpha_t,0) #[B] normalized_alpha_t = log_weights_tmp - log_p_hat #[P,B] weights = torch.exp(normalized_alpha_t) #[P,B] #if resample if t%2==0: # print weights #[B,P] indices of the particles for each bactch sampled_indices = torch.multinomial(torch.t(weights), k, replacement=True).detach() new_z = [] for b in range(self.B): tmp = z[:,b] #[P,Z] z_b = tmp[sampled_indices[b]] #[P,Z] new_z.append(z_b) new_z = torch.stack(new_z, 1) #[P,B,Z] weights = Variable(torch.ones(k, self.B)/k) z = new_z logpxs.append(logpx) logpzs.append(logpz) logqzs.append(logqz) # log_probs.append(logpx + logpz - logqz) prev_z = z logpxs = torch.stack(logpxs) logpzs = torch.stack(logpzs) logqzs = torch.stack(logqzs) #[T,P,B] logws = logpxs + logpzs - logqzs #[T,P,B] logws = torch.mean(logws, 0) #[P,B] # elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(logws, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(logws - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #over batch else: elbo = torch.mean(logws) # print log_probs[0] # #for printing logpx = torch.mean(logpxs) logpz = torch.mean(logpzs) logqz = torch.mean(logqzs) # self.x_hat_sigmoid = F.sigmoid(x_hat) # elbo = torch.mean(torch.stack(log_probs)) #[1] # elbo = logpx + logpz - logqz return elbo, logpx, logpz, logqz