def get_loss_func(self,x, C=1.0): batchsize = len(self.encode(x)[0]) z=list() mu, ln_var = self.encode(x) for l in six.moves.range(self.sampling_number): z.append(F.gaussian(mu, ln_var)) for iii in range(self.sampling_number): if iii==0: rec_loss=0 z = F.gaussian(mu, ln_var) rec_loss += F.sum(F.bernoulli_nll(x, self.decode(z, sigmoid=False), reduce='no'),axis=1)/(batchsize) loss=rec_loss+F.sum(C * gaussian_kl_divergence(mu, ln_var,reduce='no'),axis=1)/ batchsize loss=F.reshape(loss,[batchsize,1]) else: rec_loss=0 z = F.gaussian(mu, ln_var) rec_loss += F.sum(F.bernoulli_nll(x, self.decode(z, sigmoid=False), reduce='no'),axis=1)/(batchsize) tmp_loss=rec_loss+F.sum(C * gaussian_kl_divergence(mu, ln_var,reduce='no'),axis=1)/ batchsize tmp_loss=F.reshape(tmp_loss,[batchsize,1]) loss=F.concat((loss,tmp_loss),axis=1) importance_weight = F.softmax(loss) self.total_loss=F.sum(importance_weight*loss) return self.total_loss
def loss_gen(self, gen, y_fake, mu, ln_var): batchsize = y_fake.shape[0] GEN_loss = F.sum(F.softplus(-y_fake)) / batchsize KL_loss = gaussian_kl_divergence(mu, ln_var) / batchsize loss = GEN_loss + self.KL_COEFF * KL_loss chainer.report({'loss': loss, 'kl_loss': KL_loss}, gen) return loss
def lf(x): # x = x.reshape(-1, 3*64*64) mu, ln_var = self.encode(x) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for _ in range(self.k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \ / (self.k * batchsize) self.rec_loss = rec_loss # latent loss lat_loss = self.beta * gaussian_kl_divergence(mu, ln_var) / batchsize self.lat_loss = lat_loss self.loss = rec_loss + lat_loss chainer.report( { "rec_loss": rec_loss, "lat_loss": lat_loss, "loss": self.loss }, observer=self) return self.loss
def lf(xs): T, batchsize = xs.shape[:2] self.loss = 0. self.losses = [0.] * T self.l_x = [0.] * T self.l_z = [0.] * T prev_dist = None for t in range(T): # reconstruction loss x l_x, dist = self.reconstruction_loss(xs[t], k) # D_KL [ q || p ] if t == 0: l_z = vae.gaussian_kl_divergence(dist.mu, dist.ln_var) \ / batchsize else: l_z = 0. for z in prev_dist.samples: mu, ln_var = self.transition(z) kl = self.gaussian_kl_divergence( dist.mu, dist.ln_var, mu, ln_var) # k == len(prev_dist.samples) l_z += kl / (k * batchsize) self.l_x[t] = l_x self.l_z[t] = l_z self.losses[t] = l_x + c * l_z self.loss += self.losses[t] prev_dist = dist return self.loss
def __call__(self, x, t): # データ数 num_data = x.shape[0] # Forwardとlossの計算 mu, var = self.vae.encoder(x) z = F.gaussian(mu, var) reconst_loss = 0 for i in range(self.k): # MSEで誤差計算を行う if self.loss_function == 'mse': reconst = self.vae.decoder(z, use_sigmoid=True) reconst_loss += F.mean_squared_error(x, reconst) / self.k # その他の場合はベルヌーイ分布により計算 else: # bernoulli_nllがsigmoidを内包しているので学習時はsigmoid=False reconst = self.vae.decoder(z, use_sigmoid=False) reconst_loss += F.bernoulli_nll(x, reconst) / (self.k * num_data) kld = gaussian_kl_divergence(mu, var, reduce='mean') loss = reconst_loss + self.beta * kld # report reporter.report({'loss': loss}, self) reporter.report({'reconst_loss': reconst_loss}, self) reporter.report({'kld': kld}, self) return loss
def train(model, epoch0=0): optimizer = optimizers.Adam() optimizer.setup(model) for epoch in xrange(epoch0, n_epoch): logger.print_epoch(epoch) # training perm = np.random.permutation(N_train) for i in xrange(0, N_train, batchsize): x = Variable(xp.asarray(x_train[perm[i:i + batchsize]])) mu, ln_var = model.encode(x) rec_loss = 0 for l in xrange(1): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, model.decode(z, sigmoid=False)) / batchsize reg_loss = gaussian_kl_divergence(mu, ln_var) / batchsize loss = rec_loss + reg_loss optimizer.zero_grads() loss.backward() loss.unchain_backward() optimizer.update() logger.save_loss(reg_loss.data, rec_loss.data, train=True) # evaluation for i in xrange(0, N_test, batchsize): x = Variable(xp.asarray(x_train[i:i + batchsize]), volatile='on') mu, ln_var = model.encode(x) rec_loss = 0 for l in xrange(1): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, model.decode(z, sigmoid=False)) / batchsize reg_loss = gaussian_kl_divergence(mu, ln_var) / batchsize loss = rec_loss + reg_loss logger.save_loss(reg_loss.data, rec_loss.data, train=False) logger.epoch_end() logger.terminate() serializer.save(model, optimizer, epoch+1) # everything works well return 0
def cost(self, x_var, C=1.0, k=1): mu, ln_var = self.encode(x_var) batchsize = len(mu.data) rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x_var, self.decode(z, sigmoid=False)) \ / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + C * gaussian_kl_divergence(mu, ln_var) / batchsize return self.loss
def update_core(self): vae_optimizer = self.get_optimizer('opt_vae') xp = self.vae.xp batch = self.get_iterator('main').next() batchsize = len(batch) x = chainer.dataset.concat_examples(batch, device=self.device) latent_dist = self.vae.encode(x) # reconstruction loss rec_loss = 0 for _ in range(self.vae.k): reconstructions = self.vae(x, sigmoid=False, mode="sample") rec_loss += F.bernoulli_nll(x, reconstructions) \ / (self.vae.k * batchsize) ### latent loss # latent loss for continuous cont_capacity_loss = 0 if self.vae.is_continuous: mu, ln_var = latent_dist['cont'] kl_cont_loss = gaussian_kl_divergence(mu, ln_var) / batchsize # Anealing loss cont_min, cont_max, cont_num_iters, cont_gamma = \ self.vae.cont_capacity cont_cap_now = (cont_max - cont_min) * self.iteration / float(cont_num_iters) + cont_min cont_cap_now = min(cont_cap_now, cont_max) cont_capacity_loss = cont_gamma * F.absolute(cont_cap_now - kl_cont_loss) # latent loss for discrete disc_capacity_loss = 0 if self.vae.is_discrete: kl_disc_loss = kl_multiple_discrete_loss(latent_dist['disc']) # Anealing loss disc_min, disc_max, disc_num_iters, disc_gamma = \ self.vae.disc_capacity disc_cap_now = (disc_max - disc_min) * self.iteration / float(disc_num_iters) + disc_min disc_cap_now = min(disc_cap_now, disc_max) # Require float conversion here to not end up with numpy float disc_theoretical_max = 0 for disc_dim in self.vae.latent_spec["disc"]: disc_theoretical_max += xp.log(disc_dim) disc_cap_now = min(disc_cap_now, disc_theoretical_max.astype("float32")) disc_capacity_loss = disc_gamma * F.absolute(disc_cap_now - kl_disc_loss) joint_vae_loss = rec_loss + cont_capacity_loss + disc_capacity_loss self.vae.cleargrads() joint_vae_loss.backward() vae_optimizer.update() chainer.reporter.report({"rec_loss": rec_loss, "cont_loss": cont_capacity_loss, "disc_loss": disc_capacity_loss, "vae_loss": joint_vae_loss, }) return
def update_core(self): vae_optimizer = self.get_optimizer('opt_vae') dis_optimizer = self.get_optimizer('opt_dis') xp = self.vae.xp batch = self.get_iterator('main').next() batchsize = len(batch) x = chainer.dataset.concat_examples(batch, device=self.device) mu, ln_var = self.vae.encode(x) z_sampled = F.gaussian(mu, ln_var) z_shuffled = shuffle_codes(z_sampled) logits_z, probs_z = self.dis(z_sampled) _, probs_z_shuffle = self.dis(z_shuffled) reconstructions = self.vae.decode(z_sampled, sigmoid=False) # reconstruction loss rec_loss = 0 for _ in range(self.vae.k): rec_loss += F.bernoulli_nll(x, reconstructions) \ / (self.vae.k * batchsize) # latent loss lat_loss = self.vae.beta * gaussian_kl_divergence(mu, ln_var) / batchsize tc_loss = F.mean(logits_z[:, 0] - logits_z[:, 1]) factor_vae_loss = rec_loss + lat_loss + self.vae.gamma * tc_loss dis_loss = -(0.5 * F.mean(F.log(probs_z[:, 0])) \ + 0.5 * F.mean(F.log(probs_z_shuffle[:, 1]))) self.vae.cleargrads() self.dis.cleargrads() factor_vae_loss.backward() vae_optimizer.update() # avoid backword duplicate z_sampled.unchain_backward() self.dis.cleargrads() self.vae.cleargrads() dis_loss.backward() dis_optimizer.update() chainer.reporter.report({ "rec_loss": rec_loss, "lat_loss": lat_loss, "tc_loss": tc_loss, "vae_loss": factor_vae_loss, "dis_loss": dis_loss }) return
def lf(x): mu, ln_var = self.encode(x) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \ / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + \ C * gaussian_kl_divergence(mu, ln_var) / batchsize return self.loss
def lf(x): mu, ln_var = self.encode(x) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \ / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + \ C * gaussian_kl_divergence(mu, ln_var) / batchsize return self.loss
def lf(x): mu, log_var = self.encode(x) batch_size = len(mu.data) self.vae_loss = gaussian_kl_divergence(mu, log_var) / batch_size z = chf.gaussian(mu, log_var) output_dec = chf.exp(self.decode(z)) # exp(log(power)) = power self.dec_loss = chf.sum(chf.log(output_dec) + x / output_dec) / batch_size self.loss = self.vae_loss + self.dec_loss return self.loss
def __call__(self, x, sigmoid=True): """AutoEncoder""" mu, ln_var = self.encode(x) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for l in six.moves.range(self.k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \ / (self.k * batchsize) loss = rec_loss + \ self.C * gaussian_kl_divergence(mu, ln_var) / batchsize chainer.report({'loss': loss}, self) return loss
def lf(x): mu, ln_var = self.encode(x) batchsize = len(mu) # 復元誤差の計算 rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + \ beta * gaussian_kl_divergence(mu, ln_var) / batchsize chainer.report( {'rec_loss': rec_loss, 'loss': self.loss}, observer=self) return self.loss
def lf(x): mu, ln_var = self.encode(x) batchsize = len(mu) # reconstruction loss rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \ / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + \ beta * gaussian_kl_divergence(mu, ln_var) / batchsize chainer.report( {'rec_loss': rec_loss, 'loss': self.loss}, observer=self) return self.loss
def __call__(self, x, C=1.0, k=1): mu, ln_var = self.encode(x) mb_size = mu.data.shape[0] # reconstruction loss rec_loss = 0 for l in range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) rec_loss /= (k * mb_size) kld_loss = gaussian_kl_divergence(mu, ln_var) / mb_size loss = rec_loss + C * kld_loss return loss, float(rec_loss.data), float(kld_loss.data)
def lf_regress(self, in_img, in_rel_labels, rel_masks, object_labels, object_label_masks, eef, k=1): batchsize = float(len(in_img)) x_true = eef.astype(cp.float32) # eef mvae_in = in_img # images, masks, labels mse_loss = 0 kl = 0 x_pred, mus, ln_vars = self.encode(mvae_in) x_pred = mus # KL TERM if self.beta != 0: kl += gaussian_kl_divergence(mus, ln_vars) / batchsize else: kl = chainer.Variable(cp.zeros(1).astype(cp.float32)) # MSE TERM if self.alpha != 0: mse_loss += F.sum(F.mean_squared_error(x_true, x_pred)) / batchsize else: mse_loss = chainer.Variable(cp.zeros(1).astype(cp.float32)) self.mse_loss = self.alpha * mse_loss self.kl = self.beta * kl self.loss = chainer.Variable(cp.zeros(1).astype(cp.float32)) if self.alpha: self.loss += self.mse_loss if self.beta: self.loss += self.kl chainer.report({'loss': self.loss}, self) chainer.report({'mse_l': self.mse_loss}, self) chainer.report({'kl': self.kl}, self) return self.loss
def lf(x): in_img = x[0] in_labels = x[1:-1] mask = x[-1] # escape dividing by 0 when there are no labelled data points in the batch non_masked = sum(mask) + 1 mask_flipped = 1 - mask rec_loss = 0 label_loss = 0 label_acc = 0 mu, ln_var = self.encode(in_img) batchsize = len(mu.data) for l in six.moves.range(k): z = F.gaussian(mu, ln_var) out_img = self.decode(z, sigmoid=False) rec_loss += F.bernoulli_nll(in_img, out_img) / (k * batchsize) out_labels = self.predict_label(mu, ln_var, softmax=False) for i in range(self.n_latent): n = self.groups_len[i] - 1 # certain labels should not contribute to the calculation of the label loss values fixed_labels = (cupy.tile(cupy.array([1] + [-100] * n), (batchsize, 1)) * mask_flipped[:, cupy.newaxis]) out_labels[i] = out_labels[ i] * mask[:, cupy.newaxis] + fixed_labels label_acc += F.accuracy(out_labels[i], in_labels[i]) label_loss += F.softmax_cross_entropy( out_labels[i], in_labels[i]) / (k * non_masked) self.rec_loss = self.alpha * rec_loss self.label_loss = self.gamma * label_loss self.label_acc = label_acc kl = gaussian_kl_divergence(mu, ln_var) / (batchsize) self.kl = self.beta * kl self.loss = self.rec_loss + self.label_loss + self.kl return self.loss, self.rec_loss, self.label_loss, self.label_acc, self.kl
def lf(x): # getting the mu and ln_var of the prior of z with the encoder mu, ln_var = self.encode(x) batchsize = len(mu.data) # creating the latent variable z by sampling from the encoder output z = F.gaussian(mu, ln_var) # computing the reconstruction loss self.rec_loss = F.bernoulli_nll(x, self.decode( z, sigmoid=False)) / batchsize #self.rec_loss = F.sigmoid_cross_entropy(x, self.decode(z, sigmoid=False)) # computing the KL divergence self.KL_loss = gaussian_kl_divergence(mu, ln_var) / batchsize # computing the total loss self.loss = self.rec_loss + self.KL_loss # returning the losses separately return [self.rec_loss, self.loss]
def lf(x): mu, ln_var = self.encode(x) batch_size = len(mu.data) # reconstruction loss rec_loss = 0 for l in range(k): z = f.gaussian(mu, ln_var) z.name = "z" rec_loss += f.bernoulli_nll(x, self.decode(z, sigmoid=True)) \ / (k * batch_size) self.rec_loss = rec_loss self.rec_loss.name = "reconstruction error" self.latent_loss = C * gaussian_kl_divergence(mu, ln_var) / batch_size self.latent_loss.name = "latent loss" self.loss = self.rec_loss + self.latent_loss self.loss.name = "loss" return self.loss
def lf(self, x): k = 1 beta = 1.0 mu, ln_var = self.encode(x) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \ / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + \ beta * gaussian_kl_divergence(mu, ln_var) / batchsize chainer.report({ 'rec_loss': rec_loss, 'loss': self.loss }, observer=self) return self.loss
def lf(x): images, labels = zip(*x) images = list(images) mu, ln_var = self.encode(images) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(images, self.decode(z, sigmoid=False)) \ / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + \ C * gaussian_kl_divergence(mu, ln_var) / batchsize chainer.report({ 'rec_loss': rec_loss, 'loss': self.loss }, observer=self) return self.loss
def lf(x): mu, ln_var = self.encode(x) mean_mu, mean_sigma = calculate_means(mu, ln_var) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) mu_, ln_var_ = self.decode(z) rec_loss += F.gaussian_nll(x, mu_, ln_var_) / (k * batchsize) self.rec_loss = rec_loss kl = gaussian_kl_divergence(mu, ln_var) / batchsize self.loss = self.rec_loss + C * kl chainer.report( { 'rec_loss': rec_loss, 'loss': self.loss, 'kl': kl, 'mu': mean_mu, 'sigma': mean_sigma, }, observer=self) return self.loss
def lf(x): mu, ln_var = self.encode(x) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for _ in range(self.k): z_sampled = F.gaussian(mu, ln_var) rec_logits = self.decode(z_sampled, sigmoid=False) rec_loss += F.bernoulli_nll(x, rec_logits) / (self.k * batchsize) self.rec_loss = rec_loss # latent loss lat_loss = self.beta * gaussian_kl_divergence(mu, ln_var) / batchsize self.lat_loss = lat_loss # dip loss dip_loss = self.regularizer(mu, ln_var, z_sampled) self.loss = rec_loss + lat_loss + dip_loss chainer.report({"rec_loss": rec_loss, "lat_loss": lat_loss, "dip_loss": dip_loss, "loss": self.loss}, observer=self) return self.loss
def lf(*args, **kwargs): t0 = args[-1] xp = cupy.get_array_module(t0) t = xp.expand_dims(t0.astype(xp.float32), axis=1) #t = xp.eye(self.n_label, dtype=np.float32)[t0] x = args[0] mu, ln_var = self.encode(x, t) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, t, sigmoid=False)) \ / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + \ C * gaussian_kl_divergence(mu, ln_var) / batchsize chainer.report({ 'rec_loss': rec_loss, 'loss': self.loss }, observer=self) return self.loss
sum_dis_loss = 0. sum_gan_loss = 0. sum_like_loss = 0. sum_prior_loss = 0. sum_L_base = 0. sum_L_rec = 0. sum_L_p = 0. ## バッチ学習 for i in six.moves.range(0, N_train, batchsize): x = chainer.Variable(xp.asarray(x_train[perm[i:i + batchsize]])) # バッチ分のデータの抽出 ##### ForwardとLossの計算 # KL距離 mu, ln_var = encode(x, test=False) x_rec = decode(mu, sigmoid=True) batchsize = len(mu.data) kl_loss = gaussian_kl_divergence(mu, ln_var) / reduce(lambda x,y:x*y, mu.data.shape) # ランダムzの生成とランダムzでのdecode ## zはN(0, 1)から生成 z_p = xp.random.standard_normal(mu.data.shape).astype('float32') z_p = chainer.Variable(z_p) x_p = decode(z_p) # Discriminatorの出力を得る d_x_rec, h_out_rec = disc(x_rec) d_x_base, h_out_base = disc(x) d_x_p, h_out_p = disc(x_p) # Discriminatorのsoftmax_cross_entropy L_rec = F.softmax_cross_entropy(d_x_rec, Variable(xp.zeros(batchsize, dtype=np.int32))) L_base = F.softmax_cross_entropy(d_x_base, Variable(xp.ones(batchsize, dtype=np.int32))) L_p = F.softmax_cross_entropy(d_x_p, Variable(xp.zeros(batchsize, dtype=np.int32)))
def lf(self, in_img, in_rel_labels, rel_masks, object_labels, object_label_masks, k=1): batchsize = float(len(in_img)) denom = (k * batchsize * self.objects_n) latents = [] rec_loss = 0 kl = 0 label_obj_loss = 0 label_obj_acc = 0 label_rel_loss = 0 label_rel_acc = 0 latents, mus, ln_vars = self.get_latent_indiv(in_img) offset_channels_n = self.rgb_channels_n + self.bg_channel_n + self.depth_channel_n for obj_idx in range(self.objects_n): rec_mask = in_img[:, obj_idx + offset_channels_n][:, None, :, :] # KL TERM if self.beta != 0: kl += gaussian_kl_divergence(mus[obj_idx], ln_vars[obj_idx]) / denom else: kl = chainer.Variable(cp.zeros(1).astype(cp.float32)) # RESAMPLING for l in six.moves.range(k): # RECONSTRUCTION TERM if self.alpha != 0: out_img = self.spatial_decode(latents[obj_idx], sigmoid=False) x_true = in_img[:, :self.rgb_channels_n + self.depth_channel_n] x_pred = out_img[:, :self.rgb_channels_n + self.depth_channel_n] rec_loss += F.sum( F.bernoulli_nll(x_true, x_pred, reduce="no") * rec_mask) / denom x_true = in_img[:, obj_idx + offset_channels_n] x_pred = out_img[:, 4] rec_loss += F.bernoulli_nll(x_true, x_pred) / denom else: rec_loss = chainer.Variable(cp.zeros(1).astype(cp.float32)) # OBJECT CLASSIFICATION TERM if self.gamma_obj != 0: out_obj_labels = self.predict_obj_label(latents[obj_idx], softmax=False) in_obj_labels = object_labels[:, obj_idx, :self. groups_obj_n].astype( cp.int32) masks = object_label_masks[:, obj_idx].astype(cp.float32) for i in range(self.groups_obj_n): o_mask = masks[:, i].astype(cp.float32) if F.sum(o_mask).data == 0: label_obj_loss += 0 label_obj_acc += 1 / (k * self.objects_n) continue label_obj_loss += F.sum( F.softmax_cross_entropy(out_obj_labels[i], in_obj_labels[:, i], reduce='no') * o_mask) / (k * F.sum(o_mask) * self.objects_n) in_aug_obj_labels = (in_obj_labels[:, i] * o_mask + (100 * (1 - o_mask))).astype( cp.int32) label_obj_acc += F.accuracy( out_obj_labels[i], in_aug_obj_labels, ignore_label=100) / (k * self.objects_n) else: label_obj_loss = chainer.Variable( cp.zeros(1).astype(cp.float32)) label_obj_acc = chainer.Variable( cp.zeros(1).astype(cp.float32)) ######################################### ############# RELATIONAL LABELS ######### ######################################### latent_concat = F.concat((latents), axis=1) mus_rel = [] ln_vars_rel = [] for i in range(self.groups_rel_n): mu_rel, ln_var_rel = self.operators[i](latent_concat) mus_rel.append(mu_rel) ln_vars_rel.append(ln_var_rel) mus_rel = F.concat((mus_rel), axis=1) ln_vars_rel = F.concat((ln_vars_rel), axis=1) latent_rel = F.gaussian(mus_rel, ln_vars_rel) out_rel_labels = self.predict_rel_label(latent_rel, softmax=False) # KL TERM if self.beta != 0: kl += gaussian_kl_divergence(mus_rel, ln_vars_rel) / batchsize else: kl = chainer.Variable(cp.zeros(1).astype(cp.float32)) if self.gamma_rel != 0: for i in range(self.groups_rel_n): r_mask = rel_masks[:, i].astype(cp.float32) if F.sum(r_mask).data == 0: label_rel_loss += 0 label_rel_acc += 1 continue label_rel_loss += F.sum( F.softmax_cross_entropy( out_rel_labels[i], in_rel_labels[:, i], reduce='no') * r_mask) / (k * F.sum(r_mask)) in_aug_rel_labels = (in_rel_labels[:, i] * r_mask + (100 * (1 - r_mask))).astype(cp.int32) label_rel_acc += F.accuracy(out_rel_labels[i], in_aug_rel_labels, ignore_label=100) / (k) else: label_rel_loss = chainer.Variable(cp.zeros(1).astype(cp.float32)) label_rel_acc = chainer.Variable(cp.zeros(1).astype(cp.float32)) ######################################### ############# RELATIONAL LABELS ######### ######################################### self.total_corr = chainer.Variable(cp.zeros(1).astype(cp.float32)) self.rec_loss = self.alpha * rec_loss self.kl = self.beta * kl self.label_obj_loss = self.gamma_obj * label_obj_loss self.label_obj_acc = label_obj_acc self.label_rel_loss = self.gamma_rel * label_rel_loss self.label_rel_acc = label_rel_acc self.loss = chainer.Variable(cp.zeros(1).astype(cp.float32)) if self.alpha: self.loss += self.rec_loss if self.beta: self.loss += self.kl if self.gamma_obj: self.loss += self.label_obj_loss if self.gamma_rel: self.loss += self.label_rel_loss chainer.report({'loss': self.loss}, self) chainer.report({'rec_l': self.rec_loss}, self) chainer.report({'kl': self.kl}, self) chainer.report({'obj_l': self.label_obj_loss}, self) chainer.report({'obj_a': self.label_obj_acc}, self) chainer.report({'rel_l': self.label_rel_loss}, self) chainer.report({'rel_a': self.label_rel_acc}, self) return self.loss