def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = aae.config # settings # _l -> labeled # _u -> unlabeled max_epoch = 1000 num_trains_per_epoch = 5000 batchsize_l = 100 batchsize_u = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # save validation accuracy per epoch csv_results = [] # create semi-supervised split num_validation_data = 10000 num_labeled_data = 100 num_types_of_label = 10 training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised( images, labels, num_validation_data, num_labeled_data, num_types_of_label, seed=args.seed) print training_labels_l # classification # 0 -> true sample # 1 -> generated sample class_true = aae.to_variable(np.zeros(batchsize_u, dtype=np.int32)) class_fake = aae.to_variable(np.ones(batchsize_u, dtype=np.int32)) # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss_reconstruction = 0 sum_loss_supervised = 0 sum_loss_discriminator = 0 sum_loss_generator = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x) # reconstruction phase q_y_x_u, z_u = aae.encode_x_yz(images_u, apply_softmax=True) reconstruction_u = aae.decode_yz_x(q_y_x_u, z_u) loss_reconstruction = F.mean_squared_error( aae.to_variable(images_u), reconstruction_u) aae.backprop_generator(loss_reconstruction) aae.backprop_decoder(loss_reconstruction) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) z_true_u = sampler.gaussian(batchsize_u, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batchsize_u, config.ndim_y) discrimination_z_true = aae.discriminate_z(z_true_u, apply_softmax=False) discrimination_y_true = aae.discriminate_y(y_true_u, apply_softmax=False) discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_discriminator_z = F.softmax_cross_entropy( discrimination_z_true, class_true) + F.softmax_cross_entropy( discrimination_z_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy( discrimination_y_true, class_true) + F.softmax_cross_entropy( discrimination_y_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y aae.backprop_discriminator(loss_discriminator) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_generator_z = F.softmax_cross_entropy(discrimination_z_fake, class_true) loss_generator_y = F.softmax_cross_entropy(discrimination_y_fake, class_true) loss_generator = loss_generator_z + loss_generator_y aae.backprop_generator(loss_generator) # supervised phase unnormalized_q_y_x_l, z_l = aae.encode_x_yz(images_l, apply_softmax=False) loss_supervised = F.softmax_cross_entropy( unnormalized_q_y_x_l, aae.to_variable(label_ids_l)) aae.backprop_generator(loss_supervised) sum_loss_reconstruction += float(loss_reconstruction.data) sum_loss_supervised += float(loss_supervised.data) sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) aae.save(args.model_dir) # validation phase # split validation data to reduce gpu memory consumption images_v, _, label_ids_v = dataset.sample_labeled_data( validation_images, validation_labels, num_validation_data, config.ndim_x, config.ndim_y) images_v_segments = np.split(images_v, num_validation_data // 500) label_ids_v_segments = np.split(label_ids_v, num_validation_data // 500) num_correct = 0 for images_v, labels_v in zip(images_v_segments, label_ids_v_segments): predicted_labels = aae.argmax_x_label(images_v, test=True) for i, label in enumerate(predicted_labels): if label == labels_v[i]: num_correct += 1 validation_accuracy = num_correct / float(num_validation_data) progress.show( num_trains_per_epoch, num_trains_per_epoch, { "loss_r": sum_loss_reconstruction / num_trains_per_epoch, "loss_s": sum_loss_supervised / num_trains_per_epoch, "loss_d": sum_loss_discriminator / num_trains_per_epoch, "loss_g": sum_loss_generator / num_trains_per_epoch, "accuracy": validation_accuracy }) # write accuracy to csv csv_results.append([epoch, validation_accuracy]) data = pd.DataFrame(csv_results) data.columns = ["epoch", "accuracy"] data.to_csv("{}/result.csv".format(args.model_dir))
# reconstruction phase x = sample_unlabeled_data() aae.update_learning_rate(conf.learning_rate_for_reconstruction_cost) aae.update_momentum(conf.momentum_for_reconstruction_cost) sum_loss_autoencoder += aae.train_autoencoder_unsupervised(x) # regularization phase ## train discriminator aae.update_learning_rate(conf.learning_rate_for_adversarial_cost) aae.update_momentum(conf.momentum_for_adversarial_cost) loss_discriminator = 0 for k in xrange(n_steps_to_optimize_dis): if k > 0: x = sample_unlabeled_data() z_true = sampler.gaussian(batchsize, conf.ndim_z) y_true = sampler.onehot_categorical(batchsize, conf.ndim_y) loss_discriminator += aae.train_discriminator_yz(x, y_true, z_true) loss_discriminator /= n_steps_to_optimize_dis sum_loss_discriminator += loss_discriminator ## train generator sum_loss_generator += aae.train_generator_x_yz(x) # semi-supervised classification phase x_labeled, y_onehot, y_id = sample_labeled_data() aae.update_learning_rate(conf.learning_rate_for_semi_supervised_cost) aae.update_momentum(conf.momentum_for_semi_supervised_cost) sum_loss_classifier += aae.train_classifier(x_labeled, y_id) if t % 10 == 0: sys.stdout.write("\rTraining in progress...({} / {})".format(
def train(): data_set = 'cifar10' prior = 'uniform' x_dim = 32 x_chl = 3 y_dim = 10 z_dim = 64 batch_size = 100 num_epochs = 500 * 50 step_epochs = 100 #int(num_epochs/100) learn_rate = 0.0005 root_path = 'save/{}/{}'.format(data_set, prior) save_path = tools.make_save_directory(root_path) z = tf.placeholder(dtype=tf.float32, shape=[batch_size, z_dim], name='z') y = tf.placeholder(dtype=tf.float32, shape=[batch_size, y_dim], name='y') x_real = tf.placeholder(dtype=tf.float32, shape=[batch_size, x_dim, x_dim, x_chl], name='x') generator = Generator(batch_size=batch_size, z_dim=z_dim, dataset=data_set) discriminator = Discriminator(batch_size=batch_size, dataset=data_set) x_fake = generator.generate_on_cifar10(z, y, train=True) d_out_real = discriminator.discriminator_cifar10(x_real) d_out_fake = discriminator.discriminator_cifar10(x_fake) # discriminator loss D_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_out_real), logits=d_out_real)) D_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(d_out_fake), logits=d_out_fake)) with tf.control_dependencies([D_loss_fake, D_loss_real]): D_loss = D_loss_fake + D_loss_real # generator loss G_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_out_fake), logits=d_out_fake)) # optimizers all_variables = tf.trainable_variables() g_var = [var for var in all_variables if 'generator' in var.name] d_var = [var for var in all_variables if 'discriminator' in var.name] optimizer = tf.train.AdamOptimizer(learn_rate) G_solver = optimizer.minimize(G_loss, var_list=g_var) D_solver = optimizer.minimize(D_loss, var_list=d_var) # read data train_data = Cifar10(train=True) file = open('{}/train.txt'.format(save_path), 'w') sess = tf.Session() # train the model sess.run(tf.global_variables_initializer()) ave_loss_list = [0, 0, 0] cur_time = datetime.now() # for save sample images save_step = int(num_epochs / 100) z_sample = spl.uniform(100, z_dim) y_sample = spl.onehot_categorical(100, y_dim) # training process for epochs in range(1, num_epochs + 1): batch_x, batch_y = train_data.next_batch(batch_size) s_z_real = spl.uniform(batch_size, z_dim) for _ in range(1): sess.run(D_solver, feed_dict={ z: s_z_real, x_real: batch_x, y: batch_y }) for _ in range(2): sess.run(G_solver, feed_dict={z: s_z_real, y: batch_y}) loss_list = sess.run([D_loss_fake, D_loss_real, G_loss], feed_dict={ z: s_z_real, x_real: batch_x, y: batch_y }) ave_loss(ave_loss_list, loss_list, step_epochs) if epochs % save_step == 0: iter_counter = int(epochs / save_step) x_sample = sess.run(x_fake, feed_dict={z: z_sample, y: y_sample}) tools.save_grid_images(x_sample, '{}/images/{}.png'.format( save_path, iter_counter), size=x_dim, chl=x_chl) # record information if epochs % step_epochs == 0: time_use = (datetime.now() - cur_time).seconds iter_counter = int(epochs / step_epochs) liner = "Epoch {:d}/{:d}, loss_dis_faker {:9f}, loss_dis_real {:9f}, loss_encoder {:9f} time_use {:f}" \ .format(epochs, num_epochs, ave_loss_list[0], ave_loss_list[1], ave_loss_list[2], time_use) print(liner), file.writelines(liner + '\n') ave_loss_list = [0, 0, 0] # reset to 0 cur_time = datetime.now() # save model vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) saver = tf.train.Saver(var_list=vars) saver.save(sess, save_path='{}/model'.format(save_path)) # close all file.close() sess.close()
def main(run_load_from_file=False): # load MNIST images images, labels = dataset.load_train_images() # config opt = Operation() opt.check_dir(config.ckpt_dir, is_restart=False) opt.check_dir(config.log_dir, is_restart=True) # setting max_epoch = 510 num_trains_per_epoch = 500 batch_size_u = 100 # training with tf.device(config.device): h = build_graph() sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9 saver = tf.train.Saver(max_to_keep=2) with tf.Session(config=sess_config) as sess: ''' Load from checkpoint or start a new session ''' if run_load_from_file: saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir)) training_epoch_loss, _ = pickle.load( open(config.ckpt_dir + '/pickle.pkl', 'rb')) else: sess.run(tf.global_variables_initializer()) training_epoch_loss = [] # Recording loss per epoch process = Process() for epoch in range(max_epoch): process.start_epoch(epoch, max_epoch) ''' Learning rate generator ''' learning_rate = opt.ladder_learning_rate(epoch + len(training_epoch_loss)) # Recording loss per iteration training_loss_set = [] sum_loss_reconstruction = 0 sum_loss_supervised = 0 sum_loss_discrminator = 0 sum_loss_generator = 0 process_iteration = Process() for i in range(num_trains_per_epoch): process_iteration.start_epoch(i, num_trains_per_epoch) # sample from data distribution images_u = dataset.sample_unlabeled_data(images, batch_size_u) # reconstruction_phase _, loss_reconstruction = sess.run([h.opt_r, h.loss_r], feed_dict={ h.x: images_u, h.lr: learning_rate }) z_true_u = sampler.gaussian(batch_size_u, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batch_size_u, config.ndim_y) # adversarial phase for discriminator _, loss_discriminator_y = sess.run([h.opt_dy, h.loss_dy], feed_dict={ h.x: images_u, h.y: y_true_u, h.lr: learning_rate }) _, loss_discriminator_z = sess.run([h.opt_dz, h.loss_dz], feed_dict={ h.x: images_u, h.z: z_true_u, h.lr: learning_rate }) loss_discriminator = loss_discriminator_y + loss_discriminator_z # adversarial phase for generator _, loss_generator_y, loss_generator_z = sess.run( [h.opt_e, h.loss_gy, h.loss_gz], feed_dict={ h.x: images_u, h.lr: learning_rate }) loss_generator = loss_generator_y + loss_generator_z training_loss_set.append([ loss_reconstruction, loss_discriminator, loss_discriminator_y, loss_discriminator_z, loss_generator, loss_generator_z, loss_generator_y, ]) sum_loss_reconstruction += loss_reconstruction sum_loss_discrminator += loss_discriminator sum_loss_generator += loss_generator if i % 1000 == 0: process_iteration.show_table_2d( i, num_trains_per_epoch, { 'reconstruction': sum_loss_reconstruction / (i + 1), 'discriminator': sum_loss_discrminator / (i + 1), 'generator': sum_loss_generator / (i + 1), }) # In end of epoch, summary the loss average_training_loss_per_epoch = np.mean( np.array(training_loss_set), axis=0) # append validation accuracy to the training loss training_epoch_loss.append(average_training_loss_per_epoch) loss_name_per_epoch = [ 'reconstruction', 'discriminator', 'discriminator_y', 'discriminator_z', 'generator', 'generator_z', 'generator_y', ] if epoch % 1 == 0: process.show_bar( epoch, max_epoch, { 'loss_r': average_training_loss_per_epoch[0], 'loss_d': average_training_loss_per_epoch[1], 'loss_g': average_training_loss_per_epoch[4], }) plt.tile_images(sess.run(h.x_, feed_dict={h.x: images_u}), dir=config.log_dir, filename='x_rec_epoch_{}'.format( str(epoch).zfill(3))) if epoch % 10 == 0: saver.save(sess, os.path.join(config.ckpt_dir, 'model_ckptpoint'), global_step=epoch) pickle.dump((training_epoch_loss, loss_name_per_epoch), open(config.ckpt_dir + '/pickle.pkl', 'wb'))
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = aae.config # settings max_epoch = 1000 num_trains_per_epoch = 5000 batchsize = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # classification # 0 -> true sample # 1 -> generated sample class_true = aae.to_variable(np.zeros(batchsize, dtype=np.int32)) class_fake = aae.to_variable(np.ones(batchsize, dtype=np.int32)) # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss_reconstruction = 0 sum_loss_discriminator = 0 sum_loss_generator = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_u = dataset.sample_unlabeled_data(images, batchsize) # reconstruction phase qy_x_u, z_u = aae.encode_x_yz(images_u, apply_softmax=True) reconstruction_u = aae.decode_yz_x(qy_x_u, z_u) loss_reconstruction = F.mean_squared_error(aae.to_variable(images_u), reconstruction_u) aae.backprop_generator(loss_reconstruction) aae.backprop_decoder(loss_reconstruction) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) z_true_u = sampler.gaussian(batchsize, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batchsize, config.ndim_y) discrimination_z_true = aae.discriminate_z(z_true_u, apply_softmax=False) discrimination_y_true = aae.discriminate_y(y_true_u, apply_softmax=False) discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_discriminator_z = F.softmax_cross_entropy(discrimination_z_true, class_true) + F.softmax_cross_entropy(discrimination_z_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy(discrimination_y_true, class_true) + F.softmax_cross_entropy(discrimination_y_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y aae.backprop_discriminator(loss_discriminator) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_generator_z = F.softmax_cross_entropy(discrimination_z_fake, class_true) loss_generator_y = F.softmax_cross_entropy(discrimination_y_fake, class_true) loss_generator = loss_generator_z + loss_generator_y aae.backprop_generator(loss_generator) sum_loss_reconstruction += float(loss_reconstruction.data) sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) aae.save(args.model_dir) progress.show(num_trains_per_epoch, num_trains_per_epoch, { "loss_r": sum_loss_reconstruction / num_trains_per_epoch, "loss_d": sum_loss_discriminator / num_trains_per_epoch, "loss_g": sum_loss_generator / num_trains_per_epoch, })
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = aae.config # settings # _l -> labeled # _u -> unlabeled max_epoch = 1000 num_trains_per_epoch = 5000 batchsize_l = 100 batchsize_u = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # save validation accuracy per epoch csv_results = [] # create semi-supervised split num_validation_data = 10000 num_labeled_data = 100 num_types_of_label = 10 training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised( images, labels, num_validation_data, num_labeled_data, num_types_of_label) # classification # 0 -> true sample # 1 -> generated sample class_true = aae.to_variable(np.zeros(batchsize_u, dtype=np.int32)) class_fake = aae.to_variable(np.ones(batchsize_u, dtype=np.int32)) # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss_reconstruction = 0 sum_loss_supervised = 0 sum_loss_discriminator = 0 sum_loss_generator = 0 sum_loss_cluster_head = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u) # reconstruction phase qy_x_u, z_u = aae.encode_x_yz(images_u, apply_softmax=True) representation = aae.encode_yz_representation(qy_x_u, z_u) reconstruction_u = aae.decode_representation_x(representation) loss_reconstruction = F.mean_squared_error( aae.to_variable(images_u), reconstruction_u) aae.backprop_generator(loss_reconstruction) aae.backprop_decoder(loss_reconstruction) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) z_true_u = sampler.gaussian(batchsize_u, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batchsize_u, config.ndim_y) dz_true = aae.discriminate_z(z_true_u, apply_softmax=False) dy_true = aae.discriminate_y(y_true_u, apply_softmax=False) dz_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) dy_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_discriminator_z = F.softmax_cross_entropy( dz_true, class_true) + F.softmax_cross_entropy( dz_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy( dy_true, class_true) + F.softmax_cross_entropy( dy_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y aae.backprop_discriminator(loss_discriminator) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) dz_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) dy_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_generator_z = F.softmax_cross_entropy(dz_fake, class_true) loss_generator_y = F.softmax_cross_entropy(dy_fake, class_true) loss_generator = loss_generator_z + loss_generator_y aae.backprop_generator(loss_generator) # supervised phase log_qy_x_l, z_l = aae.encode_x_yz(images_l, apply_softmax=False) loss_supervised = F.softmax_cross_entropy( log_qy_x_l, aae.to_variable(label_ids_l)) aae.backprop_generator(loss_supervised) # additional cost function that penalizes the euclidean distance between of every two of cluster distance = aae.compute_distance_of_cluster_heads() loss_cluster_head = -F.sum(distance) aae.backprop_cluster_head(loss_cluster_head) sum_loss_reconstruction += float(loss_reconstruction.data) sum_loss_supervised += float(loss_supervised.data) sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) sum_loss_cluster_head += float( aae.nCr(config.ndim_y, 2) * config.cluster_head_distance_threshold + loss_cluster_head.data) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) aae.save(args.model_dir) # validation phase images_v_segments = np.split(validation_images, num_validation_data // 1000) labels_v_segments = np.split(validation_labels, num_validation_data // 1000) sum_accuracy = 0 for images_v, labels_v in zip(images_v_segments, labels_v_segments): qy = aae.encode_x_yz(images_v, apply_softmax=True, test=True)[0] accuracy = F.accuracy(qy, aae.to_variable(labels_v)) sum_accuracy += float(accuracy.data) validation_accuracy = sum_accuracy / len(images_v_segments) progress.show( num_trains_per_epoch, num_trains_per_epoch, { "loss_r": sum_loss_reconstruction / num_trains_per_epoch, "loss_s": sum_loss_supervised / num_trains_per_epoch, "loss_d": sum_loss_discriminator / num_trains_per_epoch, "loss_g": sum_loss_generator / num_trains_per_epoch, "loss_c": sum_loss_cluster_head / num_trains_per_epoch, "accuracy": validation_accuracy }) # write accuracy to csv csv_results.append([epoch, validation_accuracy]) data = pd.DataFrame(csv_results) data.columns = ["epoch", "accuracy"] data.to_csv("{}/result.csv".format(args.model_dir))
def main(run_load_from_file=False): # load MNIST images images, labels = dataset.load_train_images() # config opt = Operation() opt.check_dir(config.ckpt_dir, is_restart=False) opt.check_dir(config.log_dir, is_restart=True) # setting max_epoch = 510 num_trains_per_epoch = 500 batch_size_l = 100 batch_size_u = 100 # create semi-supervised split num_validation_data = 10000 num_labeled_data = 100 num_types_of_label = 10 training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised( images, labels, num_validation_data, num_labeled_data, num_types_of_label) # training with tf.device(config.device): h = build_graph() sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9 saver = tf.train.Saver(max_to_keep=2) with tf.Session(config=sess_config) as sess: ''' Load from checkpoint or start a new session ''' if run_load_from_file: saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir)) training_epoch_loss, _ = pickle.load( open(config.ckpt_dir + '/pickle.pkl', 'rb')) else: sess.run(tf.global_variables_initializer()) training_epoch_loss = [] # Recording loss per epoch process = Process() for epoch in range(max_epoch): process.start_epoch(epoch, max_epoch) ''' Learning rate generator ''' learning_rate = opt.ladder_learning_rate(epoch + len(training_epoch_loss)) # Recording loss per iteration training_loss_set = [] sum_loss_reconstruction = 0 sum_loss_supervised = 0 sum_loss_discrminator = 0 sum_loss_generator = 0 sum_loss_cluster_head = 0 process_iteration = Process() for i in range(num_trains_per_epoch): process_iteration.start_epoch(i, num_trains_per_epoch) # sample from data distribution images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batch_size_l) images_u = dataset.sample_unlabeled_data( training_images_u, batch_size_u) # additional cost function that penalizes the euclidean between of every two of cluster if epoch == 0: for j in range(5): starting_labels, ending_labels = dataset.cluster_create_dataset( config.ndim_y) _, loss_transform = sess.run( [h.opt_t, h.loss_t], feed_dict={ h.g_s: starting_labels, h.g_e: ending_labels, h.lr: learning_rate }) # reconstruction_phase _, loss_reconstruction = sess.run([h.opt_r, h.loss_r], feed_dict={ h.x: images_u, h.lr: learning_rate }) z_true_u = sampler.gaussian(batch_size_u, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batch_size_u, config.ndim_y) # adversarial phase for discriminator _, loss_discriminator_y = sess.run([h.opt_dy, h.loss_dy], feed_dict={ h.x: images_u, h.y: y_true_u, h.lr: learning_rate }) _, loss_discriminator_z = sess.run([h.opt_dz, h.loss_dz], feed_dict={ h.x: images_u, h.z: z_true_u, h.lr: learning_rate }) loss_discriminator = loss_discriminator_y + loss_discriminator_z # adversarial phase for generator _, loss_generator_y, loss_generator_z = sess.run( [h.opt_e, h.loss_gy, h.loss_gz], feed_dict={ h.x: images_u, h.lr: learning_rate }) loss_generator = loss_generator_y + loss_generator_z # supervised phase _, loss_generator_supervised = sess.run([h.opt_ey, h.loss_ey], feed_dict={ h.x: images_l, h.y_s: label_id_l, h.lr: learning_rate }) training_loss_set.append([ loss_reconstruction, loss_discriminator, loss_discriminator_y, loss_discriminator_z, loss_generator, loss_generator_z, loss_generator_y, loss_generator_supervised, loss_transform ]) sum_loss_reconstruction += loss_reconstruction sum_loss_discrminator += loss_discriminator sum_loss_generator += loss_generator sum_loss_supervised += loss_generator_supervised sum_loss_cluster_head += loss_transform if i % 1000 == 0: process_iteration.show_table_2d( i, num_trains_per_epoch, { 'reconstruction': sum_loss_reconstruction / (i + 1), 'discriminator': sum_loss_discrminator / (i + 1), 'generator': sum_loss_generator / (i + 1), 'supervise': sum_loss_supervised / (i + 1), 'cluster_head': sum_loss_cluster_head / (i + 1) }) # In end of epoch, summary the loss average_training_loss_per_epoch = np.mean( np.array(training_loss_set), axis=0) # validation phase images_v_segments = np.split(validation_images, num_validation_data // 1000) labels_v_segments = np.split(validation_labels, num_validation_data // 1000) sum_accuracy = 0 for images_v, labels_v in zip(images_v_segments, labels_v_segments): y_v = sess.run(h.y_r, feed_dict={h.x: images_v}) accuracy = opt.compute_accuracy(y_v, labels_v) sum_accuracy += accuracy validation_accuracy = sum_accuracy / len(images_v_segments) # append validation accuracy to the training loss average_loss_per_epoch = np.append(average_training_loss_per_epoch, validation_accuracy) training_epoch_loss.append(average_loss_per_epoch) loss_name_per_epoch = [ 'reconstruction', 'discriminator', 'discriminator_y', 'discriminator_z', 'generator', 'generator_z', 'generator_y', 'supervised', 'transform', 'validation_accuracy' ] if epoch % 1 == 0: process.show_bar( epoch, max_epoch, { 'loss_r': average_loss_per_epoch[0], 'loss_d': average_loss_per_epoch[1], 'loss_g': average_loss_per_epoch[4], 'loss_v': average_loss_per_epoch[9], }) plt.tile_images(sess.run(h.x_, feed_dict={h.x: images_u}), dir=config.log_dir, filename='x_rec_epoch_{}'.format( str(epoch).zfill(3))) if epoch % 10 == 0: saver.save(sess, os.path.join(config.ckpt_dir, 'model_ckptpoint'), global_step=epoch) pickle.dump((training_epoch_loss, loss_name_per_epoch), open(config.ckpt_dir + '/pickle.pkl', 'wb')) plt.plot_double_scale_trend(config.ckpt_dir)