def main(): # load MNIST images images, labels = dataset.load_test_images() # Settings num_anologies = 10 pylab.gray() # generate style vector z x = dataset.sample_unlabeled_data(images, num_anologies) x = (x + 1) / 2 with tf.device(config.device): x_input, img_y, img_z, reconstruction = build_graph(is_test=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess: saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir)) z = sess.run(img_z, feed_dict={x_input: x}) for m in range(num_anologies): pylab.subplot(num_anologies, config.ndim_y + 2, m * 12 + 1) pylab.imshow(x[m].reshape((28, 28)), interpolation='none') pylab.axis('off') all_y = np.identity(config.ndim_y, dtype=np.float32) for m in range(num_anologies): fixed_z = np.repeat(z[m].reshape(1, -1), config.ndim_y, axis=0) gen_x = sess.run(reconstruction, feed_dict={ img_z: fixed_z, img_y: all_y }) gen_x = (gen_x + 1) / 2 for n in range(config.ndim_y): pylab.subplot(num_anologies, config.ndim_y + 2, m * 12 + 3 + n) pylab.imshow(gen_x[n].reshape((28, 28)), interpolation='none') pylab.axis('off') fig = pylab.gcf() fig.set_size_inches(num_anologies, config.ndim_y) pylab.savefig('{}/analogy.png'.format(config.ckpt_dir)) hist_value, hist_head = plot.load_pickle_to_data(config.ckpt_dir) for loss_name in [ 'reconstruction', 'validation_accuracy', 'supervised' ]: plot.plot_loss_trace(hist_value[loss_name], loss_name, config.ckpt_dir) plot.plot_adversarial_trace(hist_value['discriminator_y'], hist_value['generator_y'], 'y', config.ckpt_dir) plot.plot_adversarial_trace(hist_value['discriminator_z'], hist_value['generator_z'], 'z', config.ckpt_dir)
def main(): # load MNIST images images, labels = dataset.load_test_images() # config config = aae.config # settings num_analogies = 10 pylab.gray() # generate style vector z x = dataset.sample_unlabeled_data(images, num_analogies, config.ndim_x, binarize=False) _, z = aae.encode_x_yz(x, apply_softmax=True) z = aae.to_numpy(z) # plot original image on the left for m in xrange(num_analogies): pylab.subplot(num_analogies, config.ndim_y + 2, m * 12 + 1) pylab.imshow(x[m].reshape((28, 28)), interpolation="none") pylab.axis("off") all_y = np.identity(config.ndim_y, dtype=np.float32) for m in xrange(num_analogies): # copy z as many as the number of classes fixed_z = np.repeat(z[m].reshape(1, -1), config.ndim_y, axis=0) gen_x = aae.to_numpy(aae.decode_yz_x(all_y, fixed_z)) # plot images generated from each label for n in xrange(config.ndim_y): pylab.subplot(num_analogies, config.ndim_y + 2, m * 12 + 3 + n) pylab.imshow(gen_x[n].reshape((28, 28)), interpolation="none") pylab.axis("off") fig = pylab.gcf() fig.set_size_inches(num_analogies, config.ndim_y) pylab.savefig("{}/analogy.png".format(args.plot_dir))
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = aae.config # settings # _l -> labeled # _u -> unlabeled max_epoch = 1000 num_trains_per_epoch = 5000 batchsize_l = 100 batchsize_u = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # save validation accuracy per epoch csv_results = [] # create semi-supervised split num_validation_data = 10000 num_labeled_data = 100 num_types_of_label = 10 training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised( images, labels, num_validation_data, num_labeled_data, num_types_of_label, seed=args.seed) print training_labels_l # classification # 0 -> true sample # 1 -> generated sample class_true = aae.to_variable(np.zeros(batchsize_u, dtype=np.int32)) class_fake = aae.to_variable(np.ones(batchsize_u, dtype=np.int32)) # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss_reconstruction = 0 sum_loss_supervised = 0 sum_loss_discriminator = 0 sum_loss_generator = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x) # reconstruction phase q_y_x_u, z_u = aae.encode_x_yz(images_u, apply_softmax=True) reconstruction_u = aae.decode_yz_x(q_y_x_u, z_u) loss_reconstruction = F.mean_squared_error( aae.to_variable(images_u), reconstruction_u) aae.backprop_generator(loss_reconstruction) aae.backprop_decoder(loss_reconstruction) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) z_true_u = sampler.gaussian(batchsize_u, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batchsize_u, config.ndim_y) discrimination_z_true = aae.discriminate_z(z_true_u, apply_softmax=False) discrimination_y_true = aae.discriminate_y(y_true_u, apply_softmax=False) discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_discriminator_z = F.softmax_cross_entropy( discrimination_z_true, class_true) + F.softmax_cross_entropy( discrimination_z_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy( discrimination_y_true, class_true) + F.softmax_cross_entropy( discrimination_y_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y aae.backprop_discriminator(loss_discriminator) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_generator_z = F.softmax_cross_entropy(discrimination_z_fake, class_true) loss_generator_y = F.softmax_cross_entropy(discrimination_y_fake, class_true) loss_generator = loss_generator_z + loss_generator_y aae.backprop_generator(loss_generator) # supervised phase unnormalized_q_y_x_l, z_l = aae.encode_x_yz(images_l, apply_softmax=False) loss_supervised = F.softmax_cross_entropy( unnormalized_q_y_x_l, aae.to_variable(label_ids_l)) aae.backprop_generator(loss_supervised) sum_loss_reconstruction += float(loss_reconstruction.data) sum_loss_supervised += float(loss_supervised.data) sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) aae.save(args.model_dir) # validation phase # split validation data to reduce gpu memory consumption images_v, _, label_ids_v = dataset.sample_labeled_data( validation_images, validation_labels, num_validation_data, config.ndim_x, config.ndim_y) images_v_segments = np.split(images_v, num_validation_data // 500) label_ids_v_segments = np.split(label_ids_v, num_validation_data // 500) num_correct = 0 for images_v, labels_v in zip(images_v_segments, label_ids_v_segments): predicted_labels = aae.argmax_x_label(images_v, test=True) for i, label in enumerate(predicted_labels): if label == labels_v[i]: num_correct += 1 validation_accuracy = num_correct / float(num_validation_data) progress.show( num_trains_per_epoch, num_trains_per_epoch, { "loss_r": sum_loss_reconstruction / num_trains_per_epoch, "loss_s": sum_loss_supervised / num_trains_per_epoch, "loss_d": sum_loss_discriminator / num_trains_per_epoch, "loss_g": sum_loss_generator / num_trains_per_epoch, "accuracy": validation_accuracy }) # write accuracy to csv csv_results.append([epoch, validation_accuracy]) data = pd.DataFrame(csv_results) data.columns = ["epoch", "accuracy"] data.to_csv("{}/result.csv".format(args.model_dir))
os.mkdir(args.plot_dir) except: pass # load test images images, labels = dataset.load_test_images() # config config = adgm.config num_analogies = 10 xp = np if args.gpu_device != -1: xp = cuda.cupy # sample data x = dataset.sample_unlabeled_data(images, num_analogies, config.ndim_x, binarize=False) z = adgm.encode_x_z(x, argmax_y=True, test=True) # plot fig = pylab.gcf() fig.set_size_inches(16.0, 16.0) pylab.clf() pylab.gray() for m in xrange(num_analogies): pylab.subplot(num_analogies, config.ndim_y + 2, m * 12 + 1) pylab.imshow(x[m].reshape((28, 28)), interpolation="none") pylab.axis("off") all_y = xp.identity(config.ndim_y, dtype=xp.float32) for m in xrange(num_analogies): fixed_z_repeat = xp.repeat(z.data[m].reshape((1, -1)), config.ndim_y, axis=0) _x = adgm.decode_yz_x(all_y, fixed_z_repeat, test=True)
def main(run_load_from_file=False): # load MNIST images images, labels = dataset.load_test_images() # config opt = Operation() opt.check_dir(config.ckpt_dir, is_restart=False) opt.check_dir(config.log_dir, is_restart=True) max_epoch = 510 num_trains_per_epoch = 500 batch_size_u = 100 # training with tf.device(config.device): h = build_graph() sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9 saver = tf.train.Saver(max_to_keep=2) with tf.Session(config=sess_config) as sess: ''' Load from checkpoint or start a new session ''' if run_load_from_file: saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir)) training_epoch_loss, _ = pickle.load( open(config.ckpt_dir + '/pickle.pkl', 'rb')) else: sess.run(tf.global_variables_initializer()) training_epoch_loss = [] # Recording loss per epoch process = Process() for epoch in range(max_epoch): process.start_epoch(epoch, max_epoch) ''' Learning rate generator ''' learning_rate = 0.0001 # Recording loss per iteration sum_loss_reconstruction = 0 sum_loss_discrminator_z = 0 sum_loss_discrminator_img = 0 sum_loss_generator_z = 0 sum_loss_generator_img = 0 process_iteration = Process() for i in range(num_trains_per_epoch): process_iteration.start_epoch(i, num_trains_per_epoch) # Inputs ''' _l -> labeled _u -> unlabeled ''' images_u = dataset.sample_unlabeled_data(images, batch_size_u) if config.distribution_sampler == 'swiss_roll': z_true_u = sampler.swiss_roll(batch_size_u, config.ndim_z, config.num_types_of_label) elif config.distribution_sampler == 'gaussian_mixture': z_true_u = sampler.gaussian_mixture( batch_size_u, config.ndim_z, config.num_types_of_label) elif config.distribution_sampler == 'uniform_desk': z_true_u = sampler.uniform_desk(batch_size_u, config.ndim_z, radius=2) elif config.distribution_sampler == 'gaussian': z_true_u = sampler.gaussian(batch_size_u, config.ndim_z, var=1) elif config.distribution_sampler == 'uniform': z_true_u = sampler.uniform(batch_size_u, config.ndim_z, minv=-1, maxv=1) # reconstruction_phase _, loss_reconstruction = sess.run([h.opt_r, h.loss_r], feed_dict={ h.x: images_u, h.lr: learning_rate }) # adversarial phase for discriminator_z images_u_s = dataset.sample_unlabeled_data( images, batch_size_u) _, loss_discriminator_z = sess.run([h.opt_dz, h.loss_dz], feed_dict={ h.x: images_u, h.z: z_true_u, h.lr: learning_rate }) _, loss_discriminator_img = sess.run([h.opt_dimg, h.loss_dimg], feed_dict={ h.x: images_u, h.x_s: images_u_s, h.lr: learning_rate }) # adversarial phase for generator _, loss_generator_z = sess.run([h.opt_e, h.loss_e], feed_dict={ h.x: images_u, h.lr: learning_rate }) _, loss_generator_img = sess.run([h.opt_d, h.loss_d], feed_dict={ h.x: images_u, h.lr: learning_rate }) sum_loss_reconstruction += loss_reconstruction sum_loss_discrminator_z += loss_discriminator_z sum_loss_discrminator_img += loss_discriminator_img sum_loss_generator_z += loss_generator_z sum_loss_generator_img += loss_generator_img if i % 1000 == 0: process_iteration.show_table_2d( i, num_trains_per_epoch, { 'reconstruction': sum_loss_reconstruction / (i + 1), 'discriminator_z': sum_loss_discrminator_z / (i + 1), 'discriminator_img': sum_loss_discrminator_img / (i + 1), 'generator_z': sum_loss_generator_z / (i + 1), 'generator_img': sum_loss_generator_img / (i + 1), }) average_loss_per_epoch = [ sum_loss_reconstruction / num_trains_per_epoch, sum_loss_discrminator_z / num_trains_per_epoch, sum_loss_discrminator_img / num_trains_per_epoch, sum_loss_generator_z / num_trains_per_epoch, sum_loss_generator_img / num_trains_per_epoch, (sum_loss_discrminator_z + sum_loss_discrminator_img) / num_trains_per_epoch, (sum_loss_generator_z + sum_loss_generator_img) / num_trains_per_epoch ] training_epoch_loss.append(average_loss_per_epoch) training_loss_name = [ 'reconstruction', 'discriminator_z', 'discriminator_img', 'generator_z', 'generator_img', 'discriminator', 'generator' ] if epoch % 1 == 0: process.show_bar( epoch, max_epoch, { 'loss_r': average_loss_per_epoch[0], 'loss_d': average_loss_per_epoch[5], 'loss_g': average_loss_per_epoch[6] }) plt.scatter_labeled_z( sess.run(h.z_r, feed_dict={h.x: images[:1000]}), [int(var) for var in labels[:1000]], dir=config.log_dir, filename='z_representation-{}'.format(epoch)) if epoch % 10 == 0: saver.save(sess, os.path.join(config.ckpt_dir, 'model_ckptpoint'), global_step=epoch) pickle.dump((training_epoch_loss, training_loss_name), open(config.ckpt_dir + '/pickle.pkl', 'wb'))
def main(): # load MNIST images images, labels = dataset.load_train_images() # config discriminator_config = gan.config_discriminator generator_config = gan.config_generator # settings # _l -> labeled # _u -> unlabeled # _g -> generated max_epoch = 1000 num_trains_per_epoch = 500 plot_interval = 5 batchsize_l = 100 batchsize_u = 100 batchsize_g = batchsize_u # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # save validation accuracy per epoch csv_results = [] # create semi-supervised split num_validation_data = 10000 num_labeled_data = args.num_labeled if batchsize_l > num_labeled_data: batchsize_l = num_labeled_data training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised( images, labels, num_validation_data, num_labeled_data, discriminator_config.ndim_output, seed=args.seed) print training_labels_l # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss_supervised = 0 sum_loss_unsupervised = 0 sum_loss_adversarial = 0 sum_dx_labeled = 0 sum_dx_unlabeled = 0 sum_dx_generated = 0 gan.update_learning_rate(get_learning_rate_for_epoch(epoch)) for t in xrange(num_trains_per_epoch): # sample from data distribution images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l, discriminator_config.ndim_input, discriminator_config.ndim_output, binarize=False) images_u = dataset.sample_unlabeled_data( training_images_u, batchsize_u, discriminator_config.ndim_input, binarize=False) images_g = gan.generate_x(batchsize_g) images_g.unchain_backward() # supervised loss py_x_l, activations_l = gan.discriminate(images_l, apply_softmax=False) loss_supervised = F.softmax_cross_entropy( py_x_l, gan.to_variable(label_ids_l)) log_zx_l = F.logsumexp(py_x_l, axis=1) log_dx_l = log_zx_l - F.softplus(log_zx_l) dx_l = F.sum(F.exp(log_dx_l)) / batchsize_l # unsupervised loss # D(x) = Z(x) / {Z(x) + 1}, where Z(x) = \sum_{k=1}^K exp(l_k(x)) # softplus(x) := log(1 + exp(x)) # logD(x) = logZ(x) - log(Z(x) + 1) # = logZ(x) - log(exp(log(Z(x))) + 1) # = logZ(x) - softplus(logZ(x)) # 1 - D(x) = 1 / {Z(x) + 1} # log{1 - D(x)} = log1 - log(Z(x) + 1) # = -log(exp(log(Z(x))) + 1) # = -softplus(logZ(x)) py_x_u, _ = gan.discriminate(images_u, apply_softmax=False) log_zx_u = F.logsumexp(py_x_u, axis=1) log_dx_u = log_zx_u - F.softplus(log_zx_u) dx_u = F.sum(F.exp(log_dx_u)) / batchsize_u loss_unsupervised = -F.sum( log_dx_u) / batchsize_u # minimize negative logD(x) py_x_g, _ = gan.discriminate(images_g, apply_softmax=False) log_zx_g = F.logsumexp(py_x_g, axis=1) loss_unsupervised += F.sum(F.softplus( log_zx_g)) / batchsize_u # minimize negative log{1 - D(x)} # update discriminator gan.backprop_discriminator(loss_supervised + loss_unsupervised) # adversarial loss images_g = gan.generate_x(batchsize_g) py_x_g, activations_g = gan.discriminate(images_g, apply_softmax=False) log_zx_g = F.logsumexp(py_x_g, axis=1) log_dx_g = log_zx_g - F.softplus(log_zx_g) dx_g = F.sum(F.exp(log_dx_g)) / batchsize_g loss_adversarial = -F.sum( log_dx_g) / batchsize_u # minimize negative logD(x) # feature matching if discriminator_config.use_feature_matching: features_true = activations_l[-1] features_true.unchain_backward() if batchsize_l != batchsize_g: images_g = gan.generate_x(batchsize_l) _, activations_g = gan.discriminate(images_g, apply_softmax=False) features_fake = activations_g[-1] loss_adversarial += F.mean_squared_error( features_true, features_fake) # update generator gan.backprop_generator(loss_adversarial) sum_loss_supervised += float(loss_supervised.data) sum_loss_unsupervised += float(loss_unsupervised.data) sum_loss_adversarial += float(loss_adversarial.data) sum_dx_labeled += float(dx_l.data) sum_dx_unlabeled += float(dx_u.data) sum_dx_generated += float(dx_g.data) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) gan.save(args.model_dir) # validation images_l, _, label_ids_l = dataset.sample_labeled_data( validation_images, validation_labels, num_validation_data, discriminator_config.ndim_input, discriminator_config.ndim_output, binarize=False) images_l_segments = np.split(images_l, num_validation_data // 500) label_ids_l_segments = np.split(label_ids_l, num_validation_data // 500) sum_accuracy = 0 for images_l, label_ids_l in zip(images_l_segments, label_ids_l_segments): y_distribution, _ = gan.discriminate(images_l, apply_softmax=True, test=True) accuracy = F.accuracy(y_distribution, gan.to_variable(label_ids_l)) sum_accuracy += float(accuracy.data) validation_accuracy = sum_accuracy / len(images_l_segments) progress.show( num_trains_per_epoch, num_trains_per_epoch, { "loss_l": sum_loss_supervised / num_trains_per_epoch, "loss_u": sum_loss_unsupervised / num_trains_per_epoch, "loss_g": sum_loss_adversarial / num_trains_per_epoch, "dx_l": sum_dx_labeled / num_trains_per_epoch, "dx_u": sum_dx_unlabeled / num_trains_per_epoch, "dx_g": sum_dx_generated / num_trains_per_epoch, "accuracy": validation_accuracy, }) # write accuracy to csv csv_results.append( [epoch, validation_accuracy, progress.get_total_time()]) data = pd.DataFrame(csv_results) data.columns = ["epoch", "accuracy", "min"] data.to_csv("{}/result.csv".format(args.model_dir)) if epoch % plot_interval == 0 or epoch == 1: plot(filename="epoch_{}_time_{}min".format( epoch, progress.get_total_time()))
def main(run_load_from_file=False): # load MNIST images images, labels = dataset.load_train_images() # config opt = Operation() opt.check_dir(config.ckpt_dir, is_restart=False) opt.check_dir(config.log_dir, is_restart=True) # setting max_epoch = 510 num_trains_per_epoch = 500 batch_size_u = 100 # training with tf.device(config.device): h = build_graph() sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9 saver = tf.train.Saver(max_to_keep=2) with tf.Session(config=sess_config) as sess: ''' Load from checkpoint or start a new session ''' if run_load_from_file: saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir)) training_epoch_loss, _ = pickle.load( open(config.ckpt_dir + '/pickle.pkl', 'rb')) else: sess.run(tf.global_variables_initializer()) training_epoch_loss = [] # Recording loss per epoch process = Process() for epoch in range(max_epoch): process.start_epoch(epoch, max_epoch) ''' Learning rate generator ''' learning_rate = opt.ladder_learning_rate(epoch + len(training_epoch_loss)) # Recording loss per iteration training_loss_set = [] sum_loss_reconstruction = 0 sum_loss_supervised = 0 sum_loss_discrminator = 0 sum_loss_generator = 0 process_iteration = Process() for i in range(num_trains_per_epoch): process_iteration.start_epoch(i, num_trains_per_epoch) # sample from data distribution images_u = dataset.sample_unlabeled_data(images, batch_size_u) # reconstruction_phase _, loss_reconstruction = sess.run([h.opt_r, h.loss_r], feed_dict={ h.x: images_u, h.lr: learning_rate }) z_true_u = sampler.gaussian(batch_size_u, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batch_size_u, config.ndim_y) # adversarial phase for discriminator _, loss_discriminator_y = sess.run([h.opt_dy, h.loss_dy], feed_dict={ h.x: images_u, h.y: y_true_u, h.lr: learning_rate }) _, loss_discriminator_z = sess.run([h.opt_dz, h.loss_dz], feed_dict={ h.x: images_u, h.z: z_true_u, h.lr: learning_rate }) loss_discriminator = loss_discriminator_y + loss_discriminator_z # adversarial phase for generator _, loss_generator_y, loss_generator_z = sess.run( [h.opt_e, h.loss_gy, h.loss_gz], feed_dict={ h.x: images_u, h.lr: learning_rate }) loss_generator = loss_generator_y + loss_generator_z training_loss_set.append([ loss_reconstruction, loss_discriminator, loss_discriminator_y, loss_discriminator_z, loss_generator, loss_generator_z, loss_generator_y, ]) sum_loss_reconstruction += loss_reconstruction sum_loss_discrminator += loss_discriminator sum_loss_generator += loss_generator if i % 1000 == 0: process_iteration.show_table_2d( i, num_trains_per_epoch, { 'reconstruction': sum_loss_reconstruction / (i + 1), 'discriminator': sum_loss_discrminator / (i + 1), 'generator': sum_loss_generator / (i + 1), }) # In end of epoch, summary the loss average_training_loss_per_epoch = np.mean( np.array(training_loss_set), axis=0) # append validation accuracy to the training loss training_epoch_loss.append(average_training_loss_per_epoch) loss_name_per_epoch = [ 'reconstruction', 'discriminator', 'discriminator_y', 'discriminator_z', 'generator', 'generator_z', 'generator_y', ] if epoch % 1 == 0: process.show_bar( epoch, max_epoch, { 'loss_r': average_training_loss_per_epoch[0], 'loss_d': average_training_loss_per_epoch[1], 'loss_g': average_training_loss_per_epoch[4], }) plt.tile_images(sess.run(h.x_, feed_dict={h.x: images_u}), dir=config.log_dir, filename='x_rec_epoch_{}'.format( str(epoch).zfill(3))) if epoch % 10 == 0: saver.save(sess, os.path.join(config.ckpt_dir, 'model_ckptpoint'), global_step=epoch) pickle.dump((training_epoch_loss, loss_name_per_epoch), open(config.ckpt_dir + '/pickle.pkl', 'wb'))
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = adgm.config # settings max_epoch = 1000 num_trains_per_epoch = 500 batchsize_l = 100 batchsize_u = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # save validation accuracy per epoch csv_results = [] # create semi-supervised split num_validation_data = 10000 num_labeled_data = 100 num_types_of_label = 10 training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised( images, labels, num_validation_data, num_labeled_data, num_types_of_label, seed=args.seed) print training_labels_l # init weightnorm layers if config.use_weightnorm: print "initializing weight normalization layers ..." images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x) adgm.compute_lower_bound(images_l, label_onehot_l, images_u) # training temperature = 1 progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_lower_bound_l = 0 sum_lower_bound_u = 0 sum_loss_classifier = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x) # lower bound loss using gumbel-softmax lower_bound, lb_labeled, lb_unlabeled = adgm.compute_lower_bound_gumbel( images_l, label_onehot_l, images_u, temperature) loss_lower_bound = -lower_bound # classification loss a_l = adgm.encode_x_a(images_l, False) unnormalized_y_distribution = adgm.encode_ax_y_distribution( a_l, images_l, softmax=False) loss_classifier = alpha * F.softmax_cross_entropy( unnormalized_y_distribution, adgm.to_variable(label_ids_l)) # backprop adgm.backprop(loss_classifier + loss_lower_bound) sum_lower_bound_l += float(lb_labeled.data) sum_lower_bound_u += float(lb_unlabeled.data) sum_loss_classifier += float(loss_classifier.data) progress.show(t, num_trains_per_epoch, {}) adgm.save(args.model_dir) # validation images_l, _, label_ids_l = dataset.sample_labeled_data( validation_images, validation_labels, num_validation_data, config.ndim_x, config.ndim_y) images_l_segments = np.split(images_l, num_validation_data // 500) label_ids_l_segments = np.split(label_ids_l, num_validation_data // 500) sum_accuracy = 0 for images_l, label_ids_l in zip(images_l_segments, label_ids_l_segments): y_distribution = adgm.encode_x_y_distribution(images_l, softmax=True, test=True) accuracy = F.accuracy(y_distribution, adgm.to_variable(label_ids_l)) sum_accuracy += float(accuracy.data) validation_accuracy = sum_accuracy / len(images_l_segments) progress.show( num_trains_per_epoch, num_trains_per_epoch, { "lb_u": sum_lower_bound_u / num_trains_per_epoch, "lb_l": sum_lower_bound_l / num_trains_per_epoch, "loss_spv": sum_loss_classifier / num_trains_per_epoch, "accuracy": validation_accuracy, "tmp": temperature, }) # anneal the temperature temperature = max(0.5, temperature * 0.999) # write accuracy to csv csv_results.append([epoch, validation_accuracy]) data = pd.DataFrame(csv_results) data.columns = ["epoch", "accuracy"] data.to_csv("{}/result.csv".format(args.model_dir))
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = aae.config # settings max_epoch = 1000 num_trains_per_epoch = 5000 batchsize = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # classification # 0 -> true sample # 1 -> generated sample class_true = aae.to_variable(np.zeros(batchsize, dtype=np.int32)) class_fake = aae.to_variable(np.ones(batchsize, dtype=np.int32)) # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss_reconstruction = 0 sum_loss_discriminator = 0 sum_loss_generator = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_u = dataset.sample_unlabeled_data(images, batchsize) # reconstruction phase qy_x_u, z_u = aae.encode_x_yz(images_u, apply_softmax=True) reconstruction_u = aae.decode_yz_x(qy_x_u, z_u) loss_reconstruction = F.mean_squared_error(aae.to_variable(images_u), reconstruction_u) aae.backprop_generator(loss_reconstruction) aae.backprop_decoder(loss_reconstruction) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) z_true_u = sampler.gaussian(batchsize, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batchsize, config.ndim_y) discrimination_z_true = aae.discriminate_z(z_true_u, apply_softmax=False) discrimination_y_true = aae.discriminate_y(y_true_u, apply_softmax=False) discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_discriminator_z = F.softmax_cross_entropy(discrimination_z_true, class_true) + F.softmax_cross_entropy(discrimination_z_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy(discrimination_y_true, class_true) + F.softmax_cross_entropy(discrimination_y_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y aae.backprop_discriminator(loss_discriminator) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_generator_z = F.softmax_cross_entropy(discrimination_z_fake, class_true) loss_generator_y = F.softmax_cross_entropy(discrimination_y_fake, class_true) loss_generator = loss_generator_z + loss_generator_y aae.backprop_generator(loss_generator) sum_loss_reconstruction += float(loss_reconstruction.data) sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) aae.save(args.model_dir) progress.show(num_trains_per_epoch, num_trains_per_epoch, { "loss_r": sum_loss_reconstruction / num_trains_per_epoch, "loss_d": sum_loss_discriminator / num_trains_per_epoch, "loss_g": sum_loss_generator / num_trains_per_epoch, })
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = aae.config # settings # _l -> labeled # _u -> unlabeled max_epoch = 1000 num_trains_per_epoch = 5000 batchsize_l = 100 batchsize_u = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # create semi-supervised split num_labeled_data = 10000 num_types_of_label = 11 # additional label corresponds to unlabeled data training_images_l, training_labels_l, training_images_u, _, _ = dataset.create_semisupervised( images, labels, 0, num_labeled_data, num_types_of_label) # classification # 0 -> true sample # 1 -> generated sample class_true = aae.to_variable(np.zeros(batchsize_u, dtype=np.int32)) class_fake = aae.to_variable(np.ones(batchsize_u, dtype=np.int32)) # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss_reconstruction = 0 sum_loss_supervised = 0 sum_loss_discriminator = 0 sum_loss_generator = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l, ndim_y=num_types_of_label) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u) # reconstruction phase z_u = aae.encode_x_z(images_u) reconstruction_u = aae.decode_z_x(z_u) loss_reconstruction = F.mean_squared_error( aae.to_variable(images_u), reconstruction_u) aae.backprop_generator(loss_reconstruction) aae.backprop_decoder(loss_reconstruction) # adversarial phase z_fake_u = aae.encode_x_z(images_u) z_fake_l = aae.encode_x_z(images_l) onehot = np.zeros((1, num_types_of_label), dtype=np.float32) onehot[0, -1] = 1 # turn on the extra class label_onehot_u = np.repeat(onehot, batchsize_u, axis=0) z_true_l = sampler.supervised_gaussian_mixture( batchsize_l, config.ndim_z, label_ids_l, num_types_of_label - 1) z_true_u = sampler.gaussian_mixture(batchsize_u, config.ndim_z, num_types_of_label - 1) dz_true_l = aae.discriminate_z(label_onehot_l, z_true_l, apply_softmax=False) dz_true_u = aae.discriminate_z(label_onehot_u, z_true_u, apply_softmax=False) dz_fake_l = aae.discriminate_z(label_onehot_l, z_fake_l, apply_softmax=False) dz_fake_u = aae.discriminate_z(label_onehot_u, z_fake_u, apply_softmax=False) loss_discriminator = F.softmax_cross_entropy( dz_true_l, class_true) + F.softmax_cross_entropy( dz_true_u, class_true) + F.softmax_cross_entropy( dz_fake_l, class_fake) + F.softmax_cross_entropy( dz_fake_u, class_fake) aae.backprop_discriminator(loss_discriminator) # adversarial phase z_fake_u = aae.encode_x_z(images_u) z_fake_l = aae.encode_x_z(images_l) dz_fake_l = aae.discriminate_z(label_onehot_l, z_fake_l, apply_softmax=False) dz_fake_u = aae.discriminate_z(label_onehot_u, z_fake_u, apply_softmax=False) loss_generator = F.softmax_cross_entropy( dz_fake_l, class_true) + F.softmax_cross_entropy( dz_fake_u, class_true) aae.backprop_generator(loss_generator) sum_loss_reconstruction += float(loss_reconstruction.data) sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) aae.save(args.model_dir) progress.show( num_trains_per_epoch, num_trains_per_epoch, { "loss_r": sum_loss_reconstruction / num_trains_per_epoch, "loss_d": sum_loss_discriminator / num_trains_per_epoch, "loss_g": sum_loss_generator / num_trains_per_epoch, })
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = vat.config # settings max_epoch = 1000 num_trains_per_epoch = 500 batchsize_l = 100 batchsize_u = 200 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # save validation accuracy per epoch csv_results = [] # create semi-supervised split num_validation_data = 10000 num_labeled_data = 100 training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised( images, labels, num_validation_data, num_labeled_data, config.ndim_y, seed=args.seed) print training_labels_l # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss_supervised = 0 sum_loss_lds = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y, binarize=False) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x, binarize=False) # supervised loss unnormalized_y_distribution = vat.encode_x_y(images_l, apply_softmax=False) loss_supervised = F.softmax_cross_entropy( unnormalized_y_distribution, vat.to_variable(label_ids_l)) # virtual adversarial training lds_l = -F.sum(vat.compute_lds(images_l)) / batchsize_l lds_u = -F.sum(vat.compute_lds(images_u)) / batchsize_u loss_lsd = lds_l + lds_u # backprop vat.backprop(loss_supervised + config.lambda_ * loss_lsd) sum_loss_supervised += float(loss_supervised.data) sum_loss_lds += float(loss_lsd.data) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) vat.save(args.model_dir) # validation images_l, _, label_ids_l = dataset.sample_labeled_data( validation_images, validation_labels, num_validation_data, config.ndim_x, config.ndim_y, binarize=False) images_l_segments = np.split(images_l, num_validation_data // 500) label_ids_l_segments = np.split(label_ids_l, num_validation_data // 500) sum_accuracy = 0 for images_l, label_ids_l in zip(images_l_segments, label_ids_l_segments): y_distribution = vat.encode_x_y(images_l, apply_softmax=True, test=True) accuracy = F.accuracy(y_distribution, vat.to_variable(label_ids_l)) sum_accuracy += float(accuracy.data) validation_accuracy = sum_accuracy / len(images_l_segments) progress.show( num_trains_per_epoch, num_trains_per_epoch, { "loss_spv": sum_loss_supervised / num_trains_per_epoch, "loss_lds": sum_loss_lds / num_trains_per_epoch, "accuracy": validation_accuracy, }) # write accuracy to csv csv_results.append( [epoch, validation_accuracy, progress.get_total_time()]) data = pd.DataFrame(csv_results) data.columns = ["epoch", "accuracy", "min"] data.to_csv("{}/result.csv".format(args.model_dir))
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = aae.config # settings # _l -> labeled # _u -> unlabeled max_epoch = 1000 num_trains_per_epoch = 5000 batchsize_l = 100 batchsize_u = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # save validation accuracy per epoch csv_results = [] # create semi-supervised split num_validation_data = 10000 num_labeled_data = 100 num_types_of_label = 10 training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised( images, labels, num_validation_data, num_labeled_data, num_types_of_label) # classification # 0 -> true sample # 1 -> generated sample class_true = aae.to_variable(np.zeros(batchsize_u, dtype=np.int32)) class_fake = aae.to_variable(np.ones(batchsize_u, dtype=np.int32)) # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss_reconstruction = 0 sum_loss_supervised = 0 sum_loss_discriminator = 0 sum_loss_generator = 0 sum_loss_cluster_head = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u) # reconstruction phase qy_x_u, z_u = aae.encode_x_yz(images_u, apply_softmax=True) representation = aae.encode_yz_representation(qy_x_u, z_u) reconstruction_u = aae.decode_representation_x(representation) loss_reconstruction = F.mean_squared_error( aae.to_variable(images_u), reconstruction_u) aae.backprop_generator(loss_reconstruction) aae.backprop_decoder(loss_reconstruction) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) z_true_u = sampler.gaussian(batchsize_u, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batchsize_u, config.ndim_y) dz_true = aae.discriminate_z(z_true_u, apply_softmax=False) dy_true = aae.discriminate_y(y_true_u, apply_softmax=False) dz_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) dy_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_discriminator_z = F.softmax_cross_entropy( dz_true, class_true) + F.softmax_cross_entropy( dz_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy( dy_true, class_true) + F.softmax_cross_entropy( dy_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y aae.backprop_discriminator(loss_discriminator) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) dz_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) dy_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_generator_z = F.softmax_cross_entropy(dz_fake, class_true) loss_generator_y = F.softmax_cross_entropy(dy_fake, class_true) loss_generator = loss_generator_z + loss_generator_y aae.backprop_generator(loss_generator) # supervised phase log_qy_x_l, z_l = aae.encode_x_yz(images_l, apply_softmax=False) loss_supervised = F.softmax_cross_entropy( log_qy_x_l, aae.to_variable(label_ids_l)) aae.backprop_generator(loss_supervised) # additional cost function that penalizes the euclidean distance between of every two of cluster distance = aae.compute_distance_of_cluster_heads() loss_cluster_head = -F.sum(distance) aae.backprop_cluster_head(loss_cluster_head) sum_loss_reconstruction += float(loss_reconstruction.data) sum_loss_supervised += float(loss_supervised.data) sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) sum_loss_cluster_head += float( aae.nCr(config.ndim_y, 2) * config.cluster_head_distance_threshold + loss_cluster_head.data) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) aae.save(args.model_dir) # validation phase images_v_segments = np.split(validation_images, num_validation_data // 1000) labels_v_segments = np.split(validation_labels, num_validation_data // 1000) sum_accuracy = 0 for images_v, labels_v in zip(images_v_segments, labels_v_segments): qy = aae.encode_x_yz(images_v, apply_softmax=True, test=True)[0] accuracy = F.accuracy(qy, aae.to_variable(labels_v)) sum_accuracy += float(accuracy.data) validation_accuracy = sum_accuracy / len(images_v_segments) progress.show( num_trains_per_epoch, num_trains_per_epoch, { "loss_r": sum_loss_reconstruction / num_trains_per_epoch, "loss_s": sum_loss_supervised / num_trains_per_epoch, "loss_d": sum_loss_discriminator / num_trains_per_epoch, "loss_g": sum_loss_generator / num_trains_per_epoch, "loss_c": sum_loss_cluster_head / num_trains_per_epoch, "accuracy": validation_accuracy }) # write accuracy to csv csv_results.append([epoch, validation_accuracy]) data = pd.DataFrame(csv_results) data.columns = ["epoch", "accuracy"] data.to_csv("{}/result.csv".format(args.model_dir))
def main(run_load_from_file=False): # load MNIST images images, labels = dataset.load_train_images() # config opt = Operation() opt.check_dir(config.ckpt_dir, is_restart=False) opt.check_dir(config.log_dir, is_restart=True) # setting max_epoch = 510 num_trains_per_epoch = 500 batch_size_l = 100 batch_size_u = 100 # create semi-supervised split num_validation_data = 10000 num_labeled_data = 100 num_types_of_label = 10 training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised( images, labels, num_validation_data, num_labeled_data, num_types_of_label) # training with tf.device(config.device): h = build_graph() sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9 saver = tf.train.Saver(max_to_keep=2) with tf.Session(config=sess_config) as sess: ''' Load from checkpoint or start a new session ''' if run_load_from_file: saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir)) training_epoch_loss, _ = pickle.load( open(config.ckpt_dir + '/pickle.pkl', 'rb')) else: sess.run(tf.global_variables_initializer()) training_epoch_loss = [] # Recording loss per epoch process = Process() for epoch in range(max_epoch): process.start_epoch(epoch, max_epoch) ''' Learning rate generator ''' learning_rate = opt.ladder_learning_rate(epoch + len(training_epoch_loss)) # Recording loss per iteration training_loss_set = [] sum_loss_reconstruction = 0 sum_loss_supervised = 0 sum_loss_discrminator = 0 sum_loss_generator = 0 sum_loss_cluster_head = 0 process_iteration = Process() for i in range(num_trains_per_epoch): process_iteration.start_epoch(i, num_trains_per_epoch) # sample from data distribution images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batch_size_l) images_u = dataset.sample_unlabeled_data( training_images_u, batch_size_u) # additional cost function that penalizes the euclidean between of every two of cluster if epoch == 0: for j in range(5): starting_labels, ending_labels = dataset.cluster_create_dataset( config.ndim_y) _, loss_transform = sess.run( [h.opt_t, h.loss_t], feed_dict={ h.g_s: starting_labels, h.g_e: ending_labels, h.lr: learning_rate }) # reconstruction_phase _, loss_reconstruction = sess.run([h.opt_r, h.loss_r], feed_dict={ h.x: images_u, h.lr: learning_rate }) z_true_u = sampler.gaussian(batch_size_u, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batch_size_u, config.ndim_y) # adversarial phase for discriminator _, loss_discriminator_y = sess.run([h.opt_dy, h.loss_dy], feed_dict={ h.x: images_u, h.y: y_true_u, h.lr: learning_rate }) _, loss_discriminator_z = sess.run([h.opt_dz, h.loss_dz], feed_dict={ h.x: images_u, h.z: z_true_u, h.lr: learning_rate }) loss_discriminator = loss_discriminator_y + loss_discriminator_z # adversarial phase for generator _, loss_generator_y, loss_generator_z = sess.run( [h.opt_e, h.loss_gy, h.loss_gz], feed_dict={ h.x: images_u, h.lr: learning_rate }) loss_generator = loss_generator_y + loss_generator_z # supervised phase _, loss_generator_supervised = sess.run([h.opt_ey, h.loss_ey], feed_dict={ h.x: images_l, h.y_s: label_id_l, h.lr: learning_rate }) training_loss_set.append([ loss_reconstruction, loss_discriminator, loss_discriminator_y, loss_discriminator_z, loss_generator, loss_generator_z, loss_generator_y, loss_generator_supervised, loss_transform ]) sum_loss_reconstruction += loss_reconstruction sum_loss_discrminator += loss_discriminator sum_loss_generator += loss_generator sum_loss_supervised += loss_generator_supervised sum_loss_cluster_head += loss_transform if i % 1000 == 0: process_iteration.show_table_2d( i, num_trains_per_epoch, { 'reconstruction': sum_loss_reconstruction / (i + 1), 'discriminator': sum_loss_discrminator / (i + 1), 'generator': sum_loss_generator / (i + 1), 'supervise': sum_loss_supervised / (i + 1), 'cluster_head': sum_loss_cluster_head / (i + 1) }) # In end of epoch, summary the loss average_training_loss_per_epoch = np.mean( np.array(training_loss_set), axis=0) # validation phase images_v_segments = np.split(validation_images, num_validation_data // 1000) labels_v_segments = np.split(validation_labels, num_validation_data // 1000) sum_accuracy = 0 for images_v, labels_v in zip(images_v_segments, labels_v_segments): y_v = sess.run(h.y_r, feed_dict={h.x: images_v}) accuracy = opt.compute_accuracy(y_v, labels_v) sum_accuracy += accuracy validation_accuracy = sum_accuracy / len(images_v_segments) # append validation accuracy to the training loss average_loss_per_epoch = np.append(average_training_loss_per_epoch, validation_accuracy) training_epoch_loss.append(average_loss_per_epoch) loss_name_per_epoch = [ 'reconstruction', 'discriminator', 'discriminator_y', 'discriminator_z', 'generator', 'generator_z', 'generator_y', 'supervised', 'transform', 'validation_accuracy' ] if epoch % 1 == 0: process.show_bar( epoch, max_epoch, { 'loss_r': average_loss_per_epoch[0], 'loss_d': average_loss_per_epoch[1], 'loss_g': average_loss_per_epoch[4], 'loss_v': average_loss_per_epoch[9], }) plt.tile_images(sess.run(h.x_, feed_dict={h.x: images_u}), dir=config.log_dir, filename='x_rec_epoch_{}'.format( str(epoch).zfill(3))) if epoch % 10 == 0: saver.save(sess, os.path.join(config.ckpt_dir, 'model_ckptpoint'), global_step=epoch) pickle.dump((training_epoch_loss, loss_name_per_epoch), open(config.ckpt_dir + '/pickle.pkl', 'wb')) plt.plot_double_scale_trend(config.ckpt_dir)
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = adgm.config # settings max_epoch = 1000 num_trains_per_epoch = 500 batchsize_l = 100 batchsize_u = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # save validation accuracy per epoch csv_results = [] # create semi-supervised split num_validation_data = 10000 num_labeled_data = 100 num_types_of_label = 10 training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(images, labels, num_validation_data, num_labeled_data, num_types_of_label, seed=args.seed) print training_labels_l # init weightnorm layers if config.use_weightnorm: print "initializing weight normalization layers ..." images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data(training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x) adgm.compute_lower_bound(images_l, label_onehot_l, images_u) # training temperature = 1 progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_lower_bound_l = 0 sum_lower_bound_u = 0 sum_loss_classifier = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x) # lower bound loss using gumbel-softmax lower_bound, lb_labeled, lb_unlabeled = adgm.compute_lower_bound_gumbel(images_l, label_onehot_l, images_u, temperature) loss_lower_bound = -lower_bound # classification loss a_l = adgm.encode_x_a(images_l, False) unnormalized_y_distribution = adgm.encode_ax_y_distribution(a_l, images_l, softmax=False) loss_classifier = alpha * F.softmax_cross_entropy(unnormalized_y_distribution, adgm.to_variable(label_ids_l)) # backprop adgm.backprop(loss_classifier + loss_lower_bound) sum_lower_bound_l += float(lb_labeled.data) sum_lower_bound_u += float(lb_unlabeled.data) sum_loss_classifier += float(loss_classifier.data) progress.show(t, num_trains_per_epoch, {}) adgm.save(args.model_dir) # validation images_l, _, label_ids_l = dataset.sample_labeled_data(validation_images, validation_labels, num_validation_data, config.ndim_x, config.ndim_y) images_l_segments = np.split(images_l, num_validation_data // 500) label_ids_l_segments = np.split(label_ids_l, num_validation_data // 500) sum_accuracy = 0 for images_l, label_ids_l in zip(images_l_segments, label_ids_l_segments): y_distribution = adgm.encode_x_y_distribution(images_l, softmax=True, test=True) accuracy = F.accuracy(y_distribution, adgm.to_variable(label_ids_l)) sum_accuracy += float(accuracy.data) validation_accuracy = sum_accuracy / len(images_l_segments) progress.show(num_trains_per_epoch, num_trains_per_epoch, { "lb_u": sum_lower_bound_u / num_trains_per_epoch, "lb_l": sum_lower_bound_l / num_trains_per_epoch, "loss_spv": sum_loss_classifier / num_trains_per_epoch, "accuracy": validation_accuracy, "tmp": temperature, }) # anneal the temperature temperature = max(0.5, temperature * 0.999) # write accuracy to csv csv_results.append([epoch, validation_accuracy]) data = pd.DataFrame(csv_results) data.columns = ["epoch", "accuracy"] data.to_csv("{}/result.csv".format(args.model_dir))
def main(run_load_from_file=False): # config opt = Operation() opt.check_dir(config.ckpt_dir, is_restart=False) opt.check_dir(config.log_dir, is_restart=True) max_epoch = 510 num_trains_per_epoch = 500 batch_size_l = 100 batch_size_u = 100 # create semi-supervised split # Load minist images images, labels = dataset.load_train_images() num_labeled_data = 10000 num_types_of_label = 11 # additional label corresponds to unlabeled data training_images_l, training_labels_l, training_images_u, _, _ = dataset.create_semisupervised(images, labels, 0, num_labeled_data, num_types_of_label) # training with tf.device(config.device): h = build_graph() sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9 saver = tf.train.Saver(max_to_keep=2) with tf.Session(config=sess_config) as sess: ''' Load from checkpoint or start a new session ''' if run_load_from_file: saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir)) training_epoch_loss, _ = pickle.load(open(config.ckpt_dir + '/pickle.pkl', 'rb')) else: sess.run(tf.global_variables_initializer()) training_epoch_loss = [] # Recording loss per epoch process = Process() for epoch in range(max_epoch): process.start_epoch(epoch, max_epoch) ''' Learning rate generator ''' learning_rate = opt.ladder_learning_rate(epoch + len(training_epoch_loss)) # Recording loss per iteration sum_loss_reconstruction = 0 sum_loss_discrminator = 0 sum_loss_generator = 0 process_iteration = Process() for i in range(num_trains_per_epoch): process_iteration.start_epoch(i, num_trains_per_epoch) # Inputs ''' _l -> labeled _u -> unlabeled ''' images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data(training_images_l, training_labels_l, batch_size_l, ndim_y=num_types_of_label) images_u = dataset.sample_unlabeled_data(training_images_u, batch_size_u) onehot = np.zeros((1, num_types_of_label), dtype=np.float32) onehot[-1] = 1 label_onehot_u = np.repeat(onehot, batch_size_u, axis=0) z_true_l = sampler.supervised_swiss_roll(batch_size_l, config.ndim_z, label_id_l, num_types_of_label - 1) z_true_u = sampler.swiss_roll(batch_size_u, config.ndim_z, num_types_of_label - 1) # z_true_l = sampler.supervised_gaussian_mixture(batch_size_l, config.ndim_z, label_id_l, num_types_of_label - 1) # z_true_u = sampler.gaussian_mixture(batch_size_u, config.ndim_z, num_types_of_label - 1) # reconstruction_phase _, loss_reconstruction = sess.run([h.opt_r, h.loss_r], feed_dict={ h.x: images_u, h.lr: learning_rate }) # adversarial phase for discriminator _, loss_discriminator_l = sess.run([h.opt_d, h.loss_d], feed_dict={ h.x: images_l, h.label: label_onehot_l, h.z: z_true_l, h.lr: learning_rate }) _, loss_discriminator_u = sess.run([h.opt_d, h.loss_d], feed_dict={ h.x: images_u, h.label: label_onehot_u, h.z: z_true_u, h.lr: learning_rate }) loss_discriminator = loss_discriminator_l + loss_discriminator_u # adversarial phase for generator _, loss_generator_l= sess.run([h.opt_e, h.loss_e,], feed_dict={ h.x: images_l, h.label: label_onehot_l, h.lr: learning_rate }) _, loss_generator_u = sess.run([h.opt_e, h.loss_e], feed_dict={ h.x: images_u, h.label: label_onehot_u, h.lr: learning_rate }) loss_generator = loss_generator_l + loss_generator_u sum_loss_reconstruction += loss_reconstruction / batch_size_u sum_loss_discrminator += loss_discriminator sum_loss_generator += loss_generator if i % 1000 == 0: process_iteration.show_table_2d(i, num_trains_per_epoch, { 'reconstruction': sum_loss_reconstruction / (i + 1), 'discriminator': sum_loss_discrminator / (i + 1), 'generator': sum_loss_generator / (i + 1), }) average_loss_per_epoch = [ sum_loss_reconstruction / num_trains_per_epoch, sum_loss_discrminator / num_trains_per_epoch, sum_loss_generator / num_trains_per_epoch, ] training_epoch_loss.append(average_loss_per_epoch) training_loss_name = [ 'reconstruction', 'discriminator', 'generator' ] if epoch % 1 == 0: process.show_bar(epoch, max_epoch, { 'loss_r': average_loss_per_epoch[0], 'loss_d': average_loss_per_epoch[1], 'loss_g': average_loss_per_epoch[2] }) plt.tile_images(sess.run(h.x_, feed_dict={h.x: images_u}), dir=config.log_dir, filename='x_rec_epoch_{}'.format(str(epoch).zfill(3))) plt.scatter_labeled_z(sess.run(h.z_r, feed_dict={h.x: images[:1000]}), [int(var) for var in labels[:1000]], dir=config.log_dir, filename='z_representation-{}'.format(epoch)) if epoch % 10 == 0: saver.save(sess, os.path.join(config.ckpt_dir, 'model_ckptpoint'), global_step=epoch) pickle.dump((training_epoch_loss, training_loss_name), open(config.ckpt_dir + '/pickle.pkl', 'wb')) plt.plot_double_scale_trend(config.ckpt_dir)