def train_model(): # Setup session sess = tu.setup_training_session() ########## # Innputs ########## # Setup async input queue of real images X_real = du.read_celebA() # Noise batch_size = tf.shape(X_real)[0] z_noise_for_D = tf.random_uniform((batch_size, FLAGS.z_dim,), minval=-1, maxval=1, name="z_input_D") z_noise_for_G = tf.random_uniform((batch_size, FLAGS.z_dim,), minval=-1, maxval=1, name="z_input_G") # k factor k_factor = tf.Variable(initial_value=0., trainable=False, name='anneal_factor') # learning rate lr = tf.Variable(initial_value=FLAGS.learning_rate, trainable=False, name='learning_rate') ######################## # Instantiate models ######################## G = models.Generator(nb_filters=FLAGS.nb_filters_G) D = models.Discriminator(h_dim=FLAGS.h_dim, nb_filters=FLAGS.nb_filters_D) ########## # Outputs ########## X_rec_real = D(X_real, output_name="X_rec_real") X_fake_for_D = G(z_noise_for_D, output_name="X_fake_for_D") X_rec_fake_for_D = D(X_fake_for_D, reuse=True, output_name="X_rec_fake_for_D") X_fake_for_G = G(z_noise_for_G, reuse=True, output_name="X_fake_for_G") X_rec_fake_for_G = D(X_fake_for_G, reuse=True, output_name="X_rec_fake_for_G") # output images for plots real_toplot = du.unnormalize_image(X_real, name="real_toplot") generated_toplot = du.unnormalize_image(X_fake_for_G, name="generated_toplot") real_rec_toplot = du.unnormalize_image(X_rec_real, name="rec_toplot") generated_rec_toplot = du.unnormalize_image(X_rec_fake_for_G, name="generated_rec_toplot") ########################### # Instantiate optimizers ########################### opt = tf.train.AdamOptimizer(learning_rate=lr, name='opt') ########################### # losses ########################### loss_real = losses.mae(X_real, X_rec_real) loss_fake_for_D = losses.mae(X_fake_for_D, X_rec_fake_for_D) loss_fake_for_G = losses.mae(X_fake_for_G, X_rec_fake_for_G) L_D = loss_real - k_factor * loss_fake_for_D L_G = loss_fake_for_G Convergence = loss_real + tf.abs(FLAGS.gamma * loss_real - loss_fake_for_G) ########################### # Compute updates ops ########################### dict_G_vars = G.get_trainable_variables() G_vars = [dict_G_vars[k] for k in dict_G_vars.keys()] dict_D_vars = D.get_trainable_variables() D_vars = [dict_D_vars[k] for k in dict_D_vars.keys()] G_gradvar = opt.compute_gradients(L_G, var_list=G_vars) G_update = opt.apply_gradients(G_gradvar, name='G_loss_minimize') D_gradvar = opt.compute_gradients(L_D, var_list=D_vars) D_update = opt.apply_gradients(D_gradvar, name='D_loss_minimize') update_k_factor = tf.assign(k_factor, k_factor + FLAGS.lambdak * (FLAGS.gamma * loss_real - loss_fake_for_G)) update_lr = tf.assign(lr, tf.maximum(1E-6, lr / 2)) ########################## # Summary ops ########################## # Add summary for gradients tu.add_gradient_summary(G_gradvar) tu.add_gradient_summary(D_gradvar) # Add scalar symmaries for G tf.summary.scalar("G loss", L_G) # Add scalar symmaries for D tf.summary.scalar("D loss", L_D) # Add scalar symmaries for D tf.summary.scalar("k_factor", k_factor) tf.summary.scalar("Convergence", Convergence) tf.summary.scalar("learning rate", lr) summary_op = tf.summary.merge_all() ############################ # Start training ############################ # Initialize session saver = tu.initialize_session(sess) # Start queues coord, threads = du.manage_queues(sess) # Summaries writer = tu.manage_summaries(sess) # Run checks on data dimensions list_data = [z_noise_for_D, z_noise_for_G] list_data += [X_real, X_rec_real, X_fake_for_G, X_rec_fake_for_G, X_fake_for_D, X_rec_fake_for_D] list_data += [generated_toplot, real_toplot] output = sess.run(list_data) tu.check_data(output, list_data) for e in tqdm(range(FLAGS.nb_epoch), desc="Training progress"): # Anneal learning rate if (e + 1) % 200 == 0: sess.run([update_lr]) t = tqdm(range(FLAGS.nb_batch_per_epoch), desc="Epoch %i" % e, mininterval=0.5) for batch_counter in t: output = sess.run([G_update, D_update, update_k_factor]) if batch_counter % (FLAGS.nb_batch_per_epoch // (int(0.5 * FLAGS.nb_batch_per_epoch))) == 0: output = sess.run([summary_op]) writer.add_summary(output[-1], e * FLAGS.nb_batch_per_epoch + batch_counter) t.set_description('Epoch %s:' % e) # Plot some generated images Xf, Xr, Xrrec, Xfrec = sess.run([generated_toplot, real_toplot, real_rec_toplot, generated_rec_toplot]) vu.save_image(Xf, Xr, title="current_batch", e=e) vu.save_image(Xrrec, Xfrec, title="reconstruction", e=e) # Save session saver.save(sess, os.path.join(FLAGS.model_dir, "model"), global_step=e) # Show data statistics output = sess.run(list_data) tu.check_data(output, list_data) # Stop threads coord.request_stop() coord.join(threads) print('Finished training!')
def train_model(): # Setup session sess = tu.setup_training_session() ########## # Innputs ########## # Setup async input queue of real images X_real = du.read_celebA() # Noise batch_size = tf.shape(X_real)[0] z_noise_for_D = tf.random_uniform(( batch_size, FLAGS.z_dim, ), minval=-1, maxval=1, name="z_input_D") z_noise_for_G = tf.random_uniform(( batch_size, FLAGS.z_dim, ), minval=-1, maxval=1, name="z_input_G") # k factor k_factor = tf.Variable(initial_value=0., trainable=False, name='anneal_factor') # learning rate lr = tf.Variable(initial_value=FLAGS.learning_rate, trainable=False, name='learning_rate') ######################## # Instantiate models ######################## G = models.Generator(nb_filters=FLAGS.nb_filters_G) D = models.Discriminator(h_dim=FLAGS.h_dim, nb_filters=FLAGS.nb_filters_D) ########## # Outputs ########## X_rec_real = D(X_real, output_name="X_rec_real") X_fake_for_D = G(z_noise_for_D, output_name="X_fake_for_D") X_rec_fake_for_D = D(X_fake_for_D, reuse=True, output_name="X_rec_fake_for_D") X_fake_for_G = G(z_noise_for_G, reuse=True, output_name="X_fake_for_G") X_rec_fake_for_G = D(X_fake_for_G, reuse=True, output_name="X_rec_fake_for_G") # output images for plots real_toplot = du.unnormalize_image(X_real, name="real_toplot") generated_toplot = du.unnormalize_image(X_fake_for_G, name="generated_toplot") real_rec_toplot = du.unnormalize_image(X_rec_real, name="rec_toplot") generated_rec_toplot = du.unnormalize_image(X_rec_fake_for_G, name="generated_rec_toplot") ########################### # Instantiate optimizers ########################### opt = tf.train.AdamOptimizer(learning_rate=lr, name='opt') ########################### # losses ########################### loss_real = losses.mae(X_real, X_rec_real) loss_fake_for_D = losses.mae(X_fake_for_D, X_rec_fake_for_D) loss_fake_for_G = losses.mae(X_fake_for_G, X_rec_fake_for_G) L_D = loss_real - k_factor * loss_fake_for_D L_G = loss_fake_for_G Convergence = loss_real + tf.abs(FLAGS.gamma * loss_real - loss_fake_for_G) ########################### # Compute updates ops ########################### dict_G_vars = G.get_trainable_variables() G_vars = [dict_G_vars[k] for k in dict_G_vars.keys()] dict_D_vars = D.get_trainable_variables() D_vars = [dict_D_vars[k] for k in dict_D_vars.keys()] G_gradvar = opt.compute_gradients(L_G, var_list=G_vars) G_update = opt.apply_gradients(G_gradvar, name='G_loss_minimize') D_gradvar = opt.compute_gradients(L_D, var_list=D_vars) D_update = opt.apply_gradients(D_gradvar, name='D_loss_minimize') update_k_factor = tf.assign( k_factor, k_factor + FLAGS.lambdak * (FLAGS.gamma * loss_real - loss_fake_for_G)) update_lr = tf.assign(lr, lr / 2) ########################## # Summary ops ########################## # Add summary for gradients tu.add_gradient_summary(G_gradvar) tu.add_gradient_summary(D_gradvar) # Add scalar symmaries for G tf.summary.scalar("G loss", L_G) # Add scalar symmaries for D tf.summary.scalar("D loss", L_D) # Add scalar symmaries for D tf.summary.scalar("k_factor", k_factor) tf.summary.scalar("Convergence", Convergence) tf.summary.scalar("learning rate", lr) summary_op = tf.summary.merge_all() ############################ # Start training ############################ # Initialize session saver = tu.initialize_session(sess) # Start queues coord, threads = du.manage_queues(sess) # Summaries writer = tu.manage_summaries(sess) # Run checks on data dimensions list_data = [z_noise_for_D, z_noise_for_G] list_data += [ X_real, X_rec_real, X_fake_for_G, X_rec_fake_for_G, X_fake_for_D, X_rec_fake_for_D ] list_data += [generated_toplot, real_toplot] output = sess.run(list_data) tu.check_data(output, list_data) for e in tqdm(range(FLAGS.nb_epoch), desc="Training progress"): # Anneal learning rate every 5 epoch if (e + 1) % 5 == 0: sess.run([update_lr]) t = tqdm(range(FLAGS.nb_batch_per_epoch), desc="Epoch %i" % e, mininterval=0.5) for batch_counter in t: output = sess.run([G_update, D_update, update_k_factor]) if batch_counter % (FLAGS.nb_batch_per_epoch // (int(0.5 * FLAGS.nb_batch_per_epoch))) == 0: output = sess.run([summary_op]) writer.add_summary( output[-1], e * FLAGS.nb_batch_per_epoch + batch_counter) t.set_description('Epoch %s:' % e) # Plot some generated images Xf, Xr, Xrrec, Xfrec = sess.run([ generated_toplot, real_toplot, real_rec_toplot, generated_rec_toplot ]) vu.save_image(Xf, Xr, title="current_batch", e=e) vu.save_image(Xrrec, Xfrec, title="reconstruction", e=e) # Save session saver.save(sess, os.path.join(FLAGS.model_dir, "model"), global_step=e) # Show data statistics output = sess.run(list_data) tu.check_data(output, list_data) # Stop threads coord.request_stop() coord.join(threads) print('Finished training!')
def train_model(): # Setup session sess = tu.setup_training_session() # Setup async input queue of real images X_real16, X_real32, X_real64 = du.read_celebA() ####################### # Instantiate generators ####################### G16 = models.G16() G32 = models.G32() G64 = models.G64() ########################### # Instantiate discriminators ########################### D16 = models.D16() D32 = models.D32() D64 = models.D64() ########################### # Instantiate optimizers ########################### G_opt = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate, name='G_opt', beta1=0.5) D_opt = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate, name='D_opt', beta1=0.5) ########################### # Instantiate model outputs ########################### # noise_input = tf.random_normal((FLAGS.batch_size, FLAGS.noise_dim,), stddev=0.1) noise_input = tf.random_uniform(( FLAGS.batch_size, FLAGS.noise_dim, ), minval=-1, maxval=1) X_fake16 = G16(noise_input) D16_real = D16(X_real16, mode="D") X_feat16, D16_fake = D16(X_fake16, reuse=True, mode="G") X_fake32 = G32(X_fake16, X_feat16) D32_real = D32(X_real32, mode="D") X_feat32, D32_fake = D32(X_fake32, reuse=True, mode="G") X_fake64 = G64(X_fake32, X_feat32) D64_real = D64(X_real64) D64_fake = D64(X_fake64, reuse=True) # output images X_fake16_output = du.unnormalize_image(X_fake16) X_real16_output = du.unnormalize_image(X_real16) X_fake32_output = du.unnormalize_image(X_fake32) X_real32_output = du.unnormalize_image(X_real32) X_fake64_output = du.unnormalize_image(X_fake64) X_real64_output = du.unnormalize_image(X_real64) ########################### # Instantiate losses ########################### G16_loss = objectives.binary_cross_entropy_with_logits( D16_fake, tf.ones_like(D16_fake)) G32_loss = objectives.binary_cross_entropy_with_logits( D32_fake, tf.ones_like(D32_fake)) G64_loss = objectives.binary_cross_entropy_with_logits( D64_fake, tf.ones_like(D64_fake)) G_loss = G16_loss + G32_loss + G64_loss # Fake losses D16_loss_fake = objectives.binary_cross_entropy_with_logits( D16_fake, tf.zeros_like(D16_fake)) D32_loss_fake = objectives.binary_cross_entropy_with_logits( D32_fake, tf.zeros_like(D32_fake)) D64_loss_fake = objectives.binary_cross_entropy_with_logits( D64_fake, tf.zeros_like(D64_fake)) # Real losses D16_loss_real = objectives.binary_cross_entropy_with_logits( D16_real, tf.ones_like(D16_real)) D32_loss_real = objectives.binary_cross_entropy_with_logits( D32_real, tf.ones_like(D32_real)) D64_loss_real = objectives.binary_cross_entropy_with_logits( D64_real, tf.ones_like(D64_real)) D_loss = D16_loss_real + D32_loss_real + D64_loss_real D_loss += D16_loss_fake + D32_loss_fake + D64_loss_fake ########################### # Compute gradient updates ########################### dict_G16_vars = G16.get_trainable_variables() G16_vars = [dict_G16_vars[k] for k in dict_G16_vars.keys()] dict_G32_vars = G32.get_trainable_variables() G32_vars = [dict_G32_vars[k] for k in dict_G32_vars.keys()] dict_G64_vars = G64.get_trainable_variables() G64_vars = [dict_G64_vars[k] for k in dict_G64_vars.keys()] G_vars = G16_vars + G32_vars + G64_vars dict_D16_vars = D16.get_trainable_variables() D16_vars = [dict_D16_vars[k] for k in dict_D16_vars.keys()] dict_D32_vars = D32.get_trainable_variables() D32_vars = [dict_D32_vars[k] for k in dict_D32_vars.keys()] dict_D64_vars = D64.get_trainable_variables() D64_vars = [dict_D64_vars[k] for k in dict_D64_vars.keys()] D_vars = D16_vars + D32_vars + D64_vars G_gradvar = G_opt.compute_gradients(G_loss, var_list=G_vars, colocate_gradients_with_ops=True) G_update = G_opt.apply_gradients(G_gradvar, name='G_loss_minimize') D_gradvar = D_opt.compute_gradients(D_loss, var_list=D_vars, colocate_gradients_with_ops=True) D_update = D_opt.apply_gradients(D_gradvar, name='D_loss_minimize') ########################## # Summary ops ########################## # Add summary for gradients tu.add_gradient_summary(G_gradvar) tu.add_gradient_summary(D_gradvar) # Add scalar symmaries tf.summary.scalar("G16 loss", G16_loss) tf.summary.scalar("G32 loss", G32_loss) tf.summary.scalar("G64 loss", G64_loss) # Real losses tf.summary.scalar("D16 loss real", D16_loss_real) tf.summary.scalar("D32 loss real", D32_loss_real) tf.summary.scalar("D64 loss real", D64_loss_real) # Fake losses tf.summary.scalar("D16 loss fake", D16_loss_fake) tf.summary.scalar("D32 loss fake", D32_loss_fake) tf.summary.scalar("D64 loss fake", D64_loss_fake) summary_op = tf.summary.merge_all() ############################ # Start training ############################ # Initialize session saver = tu.initialize_session(sess) # Start queues du.manage_queues(sess) # Summaries writer = tu.manage_summaries(sess) # Run checks on data dimensions list_data = [noise_input] list_data += [X_fake16, X_fake32, X_fake64] list_data += [X_fake16_output, X_fake32_output, X_fake64_output] output = sess.run(list_data) tu.check_data(output, list_data) for e in tqdm(range(FLAGS.nb_epoch), desc="Training progress"): t = tqdm(range(FLAGS.nb_batch_per_epoch), desc="Epoch %i" % e, mininterval=0.5) for batch_counter in t: # Update D output = sess.run([D_update]) # Update G output = sess.run([G_update]) if batch_counter % (FLAGS.nb_batch_per_epoch // 20) == 0: output = sess.run([summary_op]) writer.add_summary( output[-1], e * FLAGS.nb_batch_per_epoch + batch_counter) t.set_description('Epoch %i:' % e) # Plot some generated images output = sess.run([ X_fake16_output, X_real16_output, X_fake32_output, X_real32_output, X_fake64_output, X_real64_output, ]) vu.save_image(output[:2], e=e, title="size_16") vu.save_image(output[2:4], e=e, title="size_32") vu.save_image(output[4:6], e=e, title="size_64") # Save session saver.save(sess, os.path.join(FLAGS.model_dir, "model"), global_step=e) # Show data statistics output = sess.run(list_data) tu.check_data(output, list_data) print('Finished training!')
def train_model(): # Setup session sess = tu.setup_training_session() ########## # Innputs ########## # Setup async input queue of real images X_real = du.read_celebA() # Noise batch_size = tf.shape(X_real)[0] z_noise = tf.random_uniform((batch_size, FLAGS.z_dim), minval=-1, maxval=1, name="z_input") epsilon = tf.random_uniform((batch_size, 1, 1, 1), minval=0, maxval=1, name="epsilon") # learning rate lr_D = tf.Variable(initial_value=FLAGS.learning_rate, trainable=False, name='learning_rate') lr_G = tf.Variable(initial_value=FLAGS.learning_rate, trainable=False, name='learning_rate') ######################## # Instantiate models ######################## G = models.Generator() D = models.Discriminator() ########################### # Instantiate optimizers ########################### G_opt = tf.train.AdamOptimizer(learning_rate=lr_D, name='G_opt', beta1=0.5) D_opt = tf.train.AdamOptimizer(learning_rate=lr_G, name='D_opt', beta1=0.5) ########## # Outputs ########## X_fake = G(z_noise) X_hat = epsilon * X_real + (1 - epsilon) * X_fake D_real = D(X_real) D_fake = D(X_fake, reuse=True) D_X_hat = D(X_hat, reuse=True) grad_D_X_hat = tf.gradients(D_X_hat, X_hat)[0] # output images generated_toplot = du.unnormalize_image(X_fake, name="generated_toplot") real_toplot = du.unnormalize_image(X_real, name="real_toplot") ########################### # losses ########################### G_loss = losses.wasserstein(D_fake, -tf.ones_like(D_fake)) D_loss_grad = FLAGS.lbd * tf.square((tf.nn.l2_loss(grad_D_X_hat) - 1)) D_loss_real = losses.wasserstein(D_real, -tf.ones_like(D_real)) D_loss_fake = losses.wasserstein(D_fake, tf.ones_like(D_fake)) D_loss = D_loss_grad + D_loss_real + D_loss_fake ########################### # Compute updates ops ########################### dict_G_vars = G.get_trainable_variables() G_vars = [dict_G_vars[k] for k in dict_G_vars.keys()] dict_D_vars = D.get_trainable_variables() D_vars = [dict_D_vars[k] for k in dict_D_vars.keys()] G_gradvar = G_opt.compute_gradients(G_loss, var_list=G_vars) G_update = G_opt.apply_gradients(G_gradvar, name='G_loss_minimize') D_gradvar = D_opt.compute_gradients(D_loss, var_list=D_vars) D_update = D_opt.apply_gradients(D_gradvar, name='D_loss_minimize') # D_gradvar_fake = D_opt.compute_gradients(D_loss_fake, var_list=D_vars) # D_update_fake = D_opt.apply_gradients(D_gradvar_fake, name='D_loss_minimize_fake') ########################## # Summary ops ########################## # Add summary for gradients tu.add_gradient_summary(G_gradvar) tu.add_gradient_summary(D_gradvar) # tu.add_gradient_summary(D_gradvar_fake) # Add scalar symmaries for G tf.summary.scalar("G loss", G_loss) # Add scalar symmaries for D tf.summary.scalar("D loss real", D_loss_real) tf.summary.scalar("D loss fake", D_loss_fake) tf.summary.scalar("D loss grad", D_loss_grad) # Add scalar symmaries for D summary_op = tf.summary.merge_all() ############################ # Start training ############################ # Initialize session saver = tu.initialize_session(sess) # Start queues coord, threads = du.manage_queues(sess) # Summaries writer = tu.manage_summaries(sess) # Run checks on data dimensions list_data = [z_noise] list_data += [X_real, X_fake] list_data += [generated_toplot, real_toplot] output = sess.run(list_data) tu.check_data(output, list_data) for e in tqdm(range(FLAGS.nb_epoch), desc="Training progress"): t = tqdm(range(FLAGS.nb_batch_per_epoch), desc="Epoch %i" % e, mininterval=0.5) for batch_counter in t: # Update discriminator for i_D in range(FLAGS.ncritic): sess.run([D_update]) # r = np.random.randint(0, 2) # if r == 0: # sess.run([D_update_real]) # else: # sess.run([D_update_fake]) # Update generator sess.run([G_update]) if batch_counter % (FLAGS.nb_batch_per_epoch // (int(0.5 * FLAGS.nb_batch_per_epoch))) == 0: output = sess.run([summary_op]) writer.add_summary( output[-1], e * FLAGS.nb_batch_per_epoch + batch_counter) t.set_description('Epoch %s:' % e) # Plot some generated images Xf, Xr = sess.run([generated_toplot, real_toplot]) vu.save_image(Xf, Xr, title="current_batch", e=e) # Save session saver.save(sess, os.path.join(FLAGS.model_dir, "model"), global_step=e) # Show data statistics output = sess.run(list_data) tu.check_data(output, list_data) # Stop threads coord.request_stop() coord.join(threads) print('Finished training!')