def train(self, lr=0.0002, epoch=100, schedule=10, resume=True, freeze_encoder=False, sample_steps=50, checkpoint_steps=50): g_vars, d_vars = self.retrieve_trainable_vars( freeze_encoder=freeze_encoder) input_handle, loss_handle, _, summary_handle = self.retrieve_handles() if not self.sess: raise Exception("no session registered") tf.set_random_seed(1234) learning_rate = tf.placeholder(tf.float32, name="learning_rate") d_optimizer = tf.train.AdamOptimizer( learning_rate, beta1=0.5).minimize(loss_handle.d_loss, var_list=d_vars) g_optimizer = tf.train.AdamOptimizer( learning_rate, beta1=0.5).minimize(loss_handle.g_loss, var_list=g_vars) tf.global_variables_initializer().run() real_data = input_handle.real_data no_target_data = input_handle.no_target_data # filter by one type of labels data_provider = TrainDataProvider(self.data_dir) total_batches = data_provider.compute_total_batch_num(self.batch_size) val_batch_iter = data_provider.get_val(size=self.batch_size) saver = tf.train.Saver(max_to_keep=3) summary_writer = tf.summary.FileWriter(self.log_dir, self.sess.graph) if resume: _, model_dir = self.get_model_id_and_dir() self.restore_model(saver, model_dir) current_lr = lr counter = 0 start_time = time.time() for ei in range(epoch): train_batch_iter = data_provider.get_train_iter(self.batch_size) if (ei + 1) % schedule == 0: update_lr = current_lr / 2.0 # minimum learning rate guarantee update_lr = max(update_lr, 0.0002) print("decay learning rate from %.5f to %.5f" % (current_lr, update_lr)) current_lr = update_lr for bid, batch in enumerate(train_batch_iter): counter += 1 batch_images = batch # Optimize D _, batch_d_loss, d_summary = self.sess.run( [d_optimizer, loss_handle.d_loss, summary_handle.d_merged], feed_dict={ real_data: batch_images, learning_rate: current_lr, no_target_data: batch_images }) # Optimize G _, batch_g_loss = self.sess.run( [g_optimizer, loss_handle.g_loss], feed_dict={ real_data: batch_images, learning_rate: current_lr, no_target_data: batch_images }) # magic move to Optimize G again # according to https://github.com/carpedm20/DCGAN-tensorflow # collect all the losses along the way _, batch_g_loss, \ const_loss, cheat_loss, l1_loss, tv_loss, g_summary = self.sess.run([g_optimizer, loss_handle.g_loss, loss_handle.const_loss, loss_handle.cheat_loss, loss_handle.l1_loss, loss_handle.tv_loss, summary_handle.g_merged], feed_dict={ real_data: batch_images, learning_rate: current_lr, no_target_data: batch_images }) passed = time.time() - start_time log_format = "Epoch: [%2d], [%4d/%4d] time: %4.4f, d_loss: %.5f, g_loss: %.5f, " + \ "const_loss: %.5f, cheat_loss: %.5f, l1_loss: %.5f, tv_loss: %.5f" print(log_format % (ei, bid, total_batches, passed, batch_d_loss, batch_g_loss, const_loss, cheat_loss, l1_loss, tv_loss)) summary_writer.add_summary(d_summary, counter) summary_writer.add_summary(g_summary, counter) if ei % sample_steps == 0: # sample the current model states with val data self.validate_model(val_batch_iter, ei, counter) if ei % checkpoint_steps == 0: print("Checkpoint: save checkpoint step %d" % ei) self.checkpoint(saver, ei) # save the last checkpoint print("Checkpoint: last checkpoint step %d" % ei) self.checkpoint(saver, ei)
def train(self, lr=0.0002, epoch=100, schedule=10, resume=True, freeze_encoder=False, sample_steps=50, checkpoint_steps=500, clamp=0.001, d_iters=3): g_vars, d_vars = self.retrieve_trainable_vars(freeze_encoder=freeze_encoder) input_handle, loss_handle, _, summary_handle = self.retrieve_handles() if not self.sess: raise Exception("no session registered") learning_rate = tf.placeholder(tf.float32, name="learning_rate") d_optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss_handle.d_loss, var_list=d_vars) g_optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss_handle.g_loss, var_list=g_vars) cap_d_vars_ops = [val.assign(tf.clip_by_value(val, -clamp, clamp)) for val in d_vars] tf.global_variables_initializer().run() real_data = input_handle.real_data # filter by one type of labels data_provider = TrainDataProvider(self.data_dir) total_batches = data_provider.compute_total_batch_num(self.batch_size) val_batch_iter = data_provider.get_val(size=self.batch_size) saver = tf.train.Saver(max_to_keep=3) summary_writer = tf.summary.FileWriter(self.log_dir, self.sess.graph) if resume: _, model_dir = self.get_model_id_and_dir() self.restore_model(saver, model_dir) current_lr = lr counter = 0 start_time = time.time() for ei in range(epoch): train_batch_iter = data_provider.get_train_iter(self.batch_size) if (ei + 1) % schedule == 0: update_lr = current_lr / 2.0 # minimum learning rate guarantee update_lr = max(update_lr, 0.0002) print("decay learning rate from %.5f to %.5f" % (current_lr, update_lr)) current_lr = update_lr for bid, batch in enumerate(train_batch_iter): counter += 1 batch_images = batch # Optimize D self.sess.run(cap_d_vars_ops) _, batch_d_loss, d_loss_real, d_loss_fake, d_summary = self.sess.run([d_optimizer, loss_handle.d_loss, loss_handle.d_loss_real, loss_handle.d_loss_fake, summary_handle.d_merged], feed_dict={real_data: batch_images, learning_rate: current_lr }) # Optimize G _, batch_g_loss = self.sess.run([g_optimizer, loss_handle.g_loss], feed_dict={ real_data: batch_images, learning_rate: current_lr }) # magic move to Optimize G again # according to https://github.com/carpedm20/DCGAN-tensorflow # collect all the losses along the way _, batch_g_loss, \ const_loss, l1_loss, tv_loss, g_summary = self.sess.run([g_optimizer, loss_handle.g_loss, loss_handle.const_loss, loss_handle.l1_loss, loss_handle.tv_loss, summary_handle.g_merged], feed_dict={ real_data: batch_images, learning_rate: current_lr }) passed = time.time() - start_time log_format = "Epoch: [%2d], [%4d/%4d] time: %4.4f, d_loss: %.5f, g_loss: %.5f, " + \ "const_loss: %.5f, l1_loss: %.5f, tv_loss: %.5f, d_loss_real: %.7f, d_loss_fake: %.7f" print(log_format % (ei, bid, total_batches, passed, batch_d_loss, batch_g_loss, const_loss, l1_loss, tv_loss, d_loss_real, d_loss_fake)) summary_writer.add_summary(d_summary, counter) summary_writer.add_summary(g_summary, counter) if counter % sample_steps == 0: # sample the current model states with val data self.validate_model(val_batch_iter, ei, counter) if counter % checkpoint_steps == 0: print("Checkpoint: save checkpoint step %d" % counter) self.checkpoint(saver, counter) # valiation the models # print("val.examples len:{}".format(len(data_provider.val.examples))) # accuracy = 0.0 # iters = int(len(data_provider.val.examples) / self.batch_size) # for it in range(iters): # val_batch_iter = data_provider.get_val(size=self.batch_size) # accuracy += self.validate_last_model(val_batch_iter) # break # accuracy /= iters # print("Avg accuracy: %.5f" % accuracy) # save the last checkpoint print("Checkpoint: last checkpoint step %d" % counter) self.checkpoint(saver, counter)