def __prep_loss_optimizer(self, real_x, disc_real, disc_fake, q_c_given_x_dist_info): # Gen / disc costs if self.mode == 'wgan-gp': self.gen_cost = -tf.reduce_mean(disc_fake) self.disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean( disc_real) alpha = tf.random_uniform( shape=[self.batch_size, 1], minval=0., maxval=1., seed=SEED, ) differences = self.fake_x - real_x interpolates = real_x + (alpha * differences) d_hat, _ = self.__D_Q(interpolates) gradients = tf.gradients(d_hat, [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) self.disc_cost += self.gp_coeff * gradient_penalty else: #vanilla / DC GAN self.gen_cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=disc_fake, labels=tf.ones_like(disc_fake))) self.disc_cost = 0.5 * tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=disc_fake, labels=tf.zeros_like(disc_fake))) self.disc_cost += 0.5 * tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=disc_real, labels=tf.ones_like(disc_real))) # Mutual information log_q_c_given_x = self.c_dist.logli(self.c, q_c_given_x_dist_info) log_q_c = self.c_dist.logli_prior(self.c) cross_entropy = tf.reduce_mean(-log_q_c_given_x) entropy = tf.reduce_mean(-log_q_c) self.mi_est = entropy - cross_entropy self.disc_cost -= self.mi_coeff * self.mi_est self.gen_cost -= self.mi_coeff * self.mi_est # Log vars self.log_vars.append(("Discriminator loss", self.disc_cost)) self.log_vars.append(("Generator loss", self.gen_cost)) self.log_vars.append(("MI", self.mi_est)) # Optimizers if self.mode == 'wgan-gp': self.gen_opt = tf.train.AdamOptimizer( learning_rate=1e-4, beta1=0., beta2=0.9).minimize(self.gen_cost, var_list=params_with_name('Generator')) self.disc_opt = tf.train.AdamOptimizer( learning_rate=1e-4, beta1=0., beta2=0.9).minimize( self.disc_cost, var_list=params_with_name('Discriminator.')) else: #vanilla / DC GAN self.gen_opt = tf.train.AdamOptimizer( learning_rate=2e-4, beta1=0.5).minimize(self.gen_cost, var_list=params_with_name('Generator')) self.disc_opt = tf.train.AdamOptimizer( learning_rate=2e-4, beta1=0.5).minimize( self.disc_cost, var_list=params_with_name('Discriminator.'))
def __prep_loss_optimizer(self, norm_x1, norm_img_white, r11, c_p0, c_p1, c_p2, disc_real, disc_fake): norm_x1 = tf.reshape(norm_x1, [-1, self.output_dim]) norm_img_white = tf.reshape(norm_img_white, [-1, self.output_dim]) #[Loss1]img reconstruction loss reconstr_img_loss = tf.reduce_sum(tf.square(norm_x1 - self.x_out1), axis=1) #[Loss2] rest 0 latent representation reconstruction loss reconstr__rep_loss = tf.reduce_sum(tf.square(self.r1 - r11), axis=1) #[Loss3]with representation all 0 image loss reconstr_img_zero_loss = tf.reduce_sum(tf.square(norm_img_white - self.img_out_zero), axis=1) #[loss4] classification loss temp_1 = self.vec_one - tf.reduce_sum( (self.class_gt0 - self.class_gt0 * c_p1), 1) * tf.reduce_sum( (self.class_gt1 - self.class_gt1 * c_p1), 1) self.class1_loss = -tf.reduce_mean(tf.log(temp_1)) temp_2 = self.vec_one - tf.reduce_sum( (self.class_gt0 - self.class_gt0 * c_p2), 1) * tf.reduce_sum( (self.class_gt2 - self.class_gt2 * c_p2), 1) self.class2_loss = -tf.reduce_mean(tf.log(temp_2)) #zero input class 1 self.class0_loss = -tf.reduce_mean(self.class_gt0 * tf.log(c_p0)) * 2 # average over batch self.rec_loss = 1.0 * tf.reduce_mean(reconstr_img_loss) self.reset0_loss = 1.0 * tf.reduce_mean(reconstr__rep_loss) self.rec_zero_loss = 10 * tf.reduce_mean(reconstr_img_zero_loss) self.class_loss = 1000 * (self.class1_loss + self.class2_loss + self.class0_loss) self.loss = self.rec_loss + self.reset0_loss + self.rec_zero_loss + self.class_loss lr = self.lr self.optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0., beta2=0.9).minimize(self.loss) print('Learning rate=') print(lr) # ==============GAN LOSS============================= self.gen_cost = -tf.reduce_mean(disc_fake) self.disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) alpha = tf.random_uniform(shape=[self.batch_size, 1], minval=0., maxval=1.) differences = self.fake_data - self.real_data interpolates = self.real_data + (alpha * differences) gradients = tf.gradients(self.__GAN_discriminator(interpolates), [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) LAMBDA = 10 # Gradient penalty lambda hyperparameter self.disc_cost += LAMBDA * gradient_penalty self.gen_cost = self.gen_cost self.disc_cost = self.disc_cost gen_params = lib.params_with_name('Decoder') # Generator is Decoder disc_params = lib.params_with_name('Discriminator') self.gen_train_optimizer = tf.train.AdamOptimizer( learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(self.gen_cost, var_list=gen_params) self.disc_train_optimizer = tf.train.AdamOptimizer( learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(self.disc_cost, var_list=disc_params)