def main(): parser = argparse.ArgumentParser() parser.add_argument("--batchsize", "-b", type=int, default=64) parser.add_argument("--total-epochs", "-e", type=int, default=5000) parser.add_argument("--num-labeled-data", "-nl", type=int, default=100) parser.add_argument("--gpu-device", "-g", type=int, default=0) parser.add_argument("--grad-clip", "-gc", type=float, default=5) parser.add_argument("--seed", type=int, default=0) parser.add_argument("--model", "-m", type=str, default="model.hdf5") args = parser.parse_args() np.random.seed(args.seed) model = Model() model.load(args.model) mnist_train, mnist_test = chainer.datasets.get_mnist() images_train, labels_train = mnist_train._datasets images_test, labels_test = mnist_test._datasets # normalize images_train = (images_train - 0.5) * 2 images_test = (images_test - 0.5) * 2 dataset = Dataset(train=(images_train, labels_train), test=(images_test, labels_test), num_labeled_data=args.num_labeled_data, num_classes=model.ndim_y) print("#labeled: {}".format(dataset.get_num_labeled_data())) print("#unlabeled: {}".format(dataset.get_num_unlabeled_data())) _, labels = dataset.get_labeled_data() print("labeled data:", labels) total_iterations_train = len(images_train) // args.batchsize # optimizers optimizer_encoder = Optimizer("msgd", 0.01, 0.9) optimizer_encoder.setup(model.encoder) if args.grad_clip > 0: optimizer_encoder.add_hook(GradientClipping(args.grad_clip)) optimizer_semi_supervised = Optimizer("msgd", 0.1, 0.9) optimizer_semi_supervised.setup(model.encoder) if args.grad_clip > 0: optimizer_semi_supervised.add_hook(GradientClipping(args.grad_clip)) optimizer_generator = Optimizer("msgd", 0.1, 0.1) optimizer_generator.setup(model.encoder) if args.grad_clip > 0: optimizer_generator.add_hook(GradientClipping(args.grad_clip)) optimizer_decoder = Optimizer("msgd", 0.01, 0.9) optimizer_decoder.setup(model.decoder) if args.grad_clip > 0: optimizer_decoder.add_hook(GradientClipping(args.grad_clip)) optimizer_discriminator_z = Optimizer("msgd", 0.1, 0.1) optimizer_discriminator_z.setup(model.discriminator_z) if args.grad_clip > 0: optimizer_discriminator_z.add_hook(GradientClipping(args.grad_clip)) optimizer_discriminator_y = Optimizer("msgd", 0.1, 0.1) optimizer_discriminator_y.setup(model.discriminator_y) if args.grad_clip > 0: optimizer_discriminator_y.add_hook(GradientClipping(args.grad_clip)) optimizer_cluster_head = Optimizer("msgd", 0.01, 0.9) optimizer_cluster_head.setup(model.cluster_head) if args.grad_clip > 0: optimizer_cluster_head.add_hook(GradientClipping(args.grad_clip)) using_gpu = False if args.gpu_device >= 0: cuda.get_device(args.gpu_device).use() model.to_gpu() using_gpu = True xp = model.xp # 0 -> true sample # 1 -> generated sample class_true = np.zeros(args.batchsize, dtype=np.int32) class_fake = np.ones(args.batchsize, dtype=np.int32) if using_gpu: class_true = cuda.to_gpu(class_true) class_fake = cuda.to_gpu(class_fake) training_start_time = time.time() for epoch in range(args.total_epochs): sum_loss_generator = 0 sum_loss_discriminator = 0 sum_loss_autoencoder = 0 sum_loss_supervised = 0 sum_loss_cluster_head = 0 sum_discriminator_z_confidence_true = 0 sum_discriminator_z_confidence_fake = 0 sum_discriminator_y_confidence_true = 0 sum_discriminator_y_confidence_fake = 0 epoch_start_time = time.time() dataset.shuffle() # training for itr in range(total_iterations_train): # update model parameters with chainer.using_config("train", True): # sample minibatch x_u = dataset.sample_unlabeled_minibatch(args.batchsize, gpu=using_gpu) x_l, y_l, _ = dataset.sample_labeled_minibatch(args.batchsize, gpu=using_gpu) ### reconstruction phase ### if True: y_onehot_u, z_u = model.encode_x_yz(x_u, apply_softmax_y=True) repr_u = model.encode_yz_representation(y_onehot_u, z_u) x_reconstruction_u = model.decode_representation_x(repr_u) loss_reconstruction_u = F.mean_squared_error( x_u, x_reconstruction_u) y_onehot_l, z_l = model.encode_x_yz(x_l, apply_softmax_y=True) repr_l = model.encode_yz_representation(y_onehot_l, z_l) x_reconstruction_l = model.decode_representation_x(repr_l) loss_reconstruction_l = F.mean_squared_error( x_l, x_reconstruction_l) loss_reconstruction = loss_reconstruction_u + loss_reconstruction_l model.cleargrads() loss_reconstruction.backward() optimizer_encoder.update() # optimizer_cluster_head.update() optimizer_decoder.update() ### adversarial phase ### if True: y_onehot_fake_u, z_fake_u = model.encode_x_yz( x_u, apply_softmax_y=True) z_true = sampler.gaussian(args.batchsize, model.ndim_z, mean=0, var=1) y_onehot_true = sampler.onehot_categorical( args.batchsize, model.ndim_y) if using_gpu: z_true = cuda.to_gpu(z_true) y_onehot_true = cuda.to_gpu(y_onehot_true) dz_true = model.discriminate_z(z_true, apply_softmax=False) dz_fake = model.discriminate_z(z_fake_u, apply_softmax=False) dy_true = model.discriminate_y(y_onehot_true, apply_softmax=False) dy_fake = model.discriminate_y(y_onehot_fake_u, apply_softmax=False) discriminator_z_confidence_true = float( xp.mean(F.softmax(dz_true).data[:, 0])) discriminator_z_confidence_fake = float( xp.mean(F.softmax(dz_fake).data[:, 1])) discriminator_y_confidence_true = float( xp.mean(F.softmax(dy_true).data[:, 0])) discriminator_y_confidence_fake = float( xp.mean(F.softmax(dy_fake).data[:, 1])) loss_discriminator_z = F.softmax_cross_entropy( dz_true, class_true) + F.softmax_cross_entropy( dz_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy( dy_true, class_true) + F.softmax_cross_entropy( dy_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y model.cleargrads() loss_discriminator.backward() optimizer_discriminator_z.update() optimizer_discriminator_y.update() ### generator phase ### if True: y_onehot_fake_u, z_fake_u = model.encode_x_yz( x_u, apply_softmax_y=True) dz_fake = model.discriminate_z(z_fake_u, apply_softmax=False) dy_fake = model.discriminate_y(y_onehot_fake_u, apply_softmax=False) loss_generator = F.softmax_cross_entropy( dz_fake, class_true) + F.softmax_cross_entropy( dy_fake, class_true) model.cleargrads() loss_generator.backward() optimizer_generator.update() ### supervised phase ### if True: logit_l, _ = model.encode_x_yz(x_l, apply_softmax_y=False) loss_supervised = F.softmax_cross_entropy(logit_l, y_l) model.cleargrads() loss_supervised.backward() optimizer_semi_supervised.update() ### additional cost ### if True: distance = model.compute_distance_of_cluster_heads() loss_cluster_head = -F.sum(distance) model.cleargrads() loss_cluster_head.backward() optimizer_cluster_head.update() sum_loss_discriminator += float(loss_discriminator.data) sum_loss_supervised += float(loss_supervised.data) sum_loss_generator += float(loss_generator.data) sum_loss_autoencoder += float(loss_reconstruction.data) sum_loss_cluster_head += float( model.nCr(model.ndim_y, 2) * model.cluster_head_distance_threshold + loss_cluster_head.data) sum_discriminator_z_confidence_true += discriminator_z_confidence_true sum_discriminator_z_confidence_fake += discriminator_z_confidence_fake sum_discriminator_y_confidence_true += discriminator_y_confidence_true sum_discriminator_y_confidence_fake += discriminator_y_confidence_fake printr("Training ... {:3.0f}% ({}/{})".format( (itr + 1) / total_iterations_train * 100, itr + 1, total_iterations_train)) model.save(args.model) labeled_iter_train = dataset.get_iterator(args.batchsize * 20, train=True, labeled=True, gpu=using_gpu) unlabeled_iter_train = dataset.get_iterator(args.batchsize * 20, train=True, unlabeled=True, gpu=using_gpu) average_accuracy_l = 0 average_accuracy_u = 0 for x_l, true_label in labeled_iter_train: with chainer.no_backprop_mode() and chainer.using_config( "train", False): y_onehot_l, _ = model.encode_x_yz(x_l, apply_softmax_y=True) accuracy = F.accuracy(y_onehot_l, true_label) average_accuracy_l += float(accuracy.data) for x_u, true_label in unlabeled_iter_train: with chainer.no_backprop_mode() and chainer.using_config( "train", False): y_onehot_u, _ = model.encode_x_yz(x_u, apply_softmax_y=True) accuracy = F.accuracy(y_onehot_u, true_label) average_accuracy_u += float(accuracy.data) average_accuracy_l /= labeled_iter_train.get_total_iterations() average_accuracy_u /= unlabeled_iter_train.get_total_iterations() clear_console() print( "Epoch {} done in {} sec - loss: g={:.5g}, d={:.5g}, a={:.5g}, s={:.5g}, c={:.5g} - disc_z: true={:.1f}%, fake={:.1f}% - disc_y: true={:.1f}%, fake={:.1f}% - acc: l={:.2f}%, u={:.2f}% - total {} min" .format( epoch + 1, int(time.time() - epoch_start_time), sum_loss_generator / total_iterations_train, sum_loss_discriminator / total_iterations_train, sum_loss_autoencoder / total_iterations_train, sum_loss_supervised / total_iterations_train, sum_loss_cluster_head / total_iterations_train, sum_discriminator_z_confidence_true / total_iterations_train * 100, sum_discriminator_z_confidence_fake / total_iterations_train * 100, sum_discriminator_y_confidence_true / total_iterations_train * 100, sum_discriminator_y_confidence_fake / total_iterations_train * 100, average_accuracy_l * 100, average_accuracy_u * 100, int((time.time() - training_start_time) // 60))) if epoch == 50: optimizer_encoder.set_learning_rate(0.001) optimizer_decoder.set_learning_rate(0.001) optimizer_semi_supervised.set_learning_rate(0.01) optimizer_generator.set_learning_rate(0.01) optimizer_discriminator_y.set_learning_rate(0.01) optimizer_discriminator_z.set_learning_rate(0.01) if epoch == 1000: optimizer_encoder.set_learning_rate(0.0001) optimizer_decoder.set_learning_rate(0.0001) optimizer_semi_supervised.set_learning_rate(0.001) optimizer_generator.set_learning_rate(0.001) optimizer_discriminator_y.set_learning_rate(0.001) optimizer_discriminator_z.set_learning_rate(0.001)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--batchsize", "-b", type=int, default=64) parser.add_argument("--total-epochs", "-e", type=int, default=5000) parser.add_argument("--num-labeled-data", "-nl", type=int, default=100) parser.add_argument("--gpu-device", "-g", type=int, default=0) parser.add_argument("--grad-clip", "-gc", type=float, default=5) parser.add_argument("--seed", type=int, default=0) parser.add_argument("--model", "-m", type=str, default="model.hdf5") args = parser.parse_args() np.random.seed(args.seed) model = Model() model.load(args.model) mnist_train, mnist_test = chainer.datasets.get_mnist() images_train, labels_train = mnist_train._datasets images_test, labels_test = mnist_test._datasets # normalize images_train = (images_train - 0.5) * 2 images_test = (images_test - 0.5) * 2 dataset = Dataset(train=(images_train, labels_train), test=(images_test, labels_test), num_labeled_data=args.num_labeled_data, num_classes=model.ndim_y) print("#labeled: {}".format(dataset.get_num_labeled_data())) print("#unlabeled: {}".format(dataset.get_num_unlabeled_data())) _, labels = dataset.get_labeled_data() print("labeled data:", labels) total_iterations_train = len(images_train) // args.batchsize # optimizers optimizer_encoder = Optimizer("msgd", 0.01, 0.9) optimizer_encoder.setup(model.encoder) if args.grad_clip > 0: optimizer_encoder.add_hook(GradientClipping(args.grad_clip)) optimizer_semi_supervised = Optimizer("msgd", 0.1, 0.9) optimizer_semi_supervised.setup(model.encoder) if args.grad_clip > 0: optimizer_semi_supervised.add_hook(GradientClipping(args.grad_clip)) optimizer_generator = Optimizer("msgd", 0.1, 0.1) optimizer_generator.setup(model.encoder) if args.grad_clip > 0: optimizer_generator.add_hook(GradientClipping(args.grad_clip)) optimizer_decoder = Optimizer("msgd", 0.01, 0.9) optimizer_decoder.setup(model.decoder) if args.grad_clip > 0: optimizer_decoder.add_hook(GradientClipping(args.grad_clip)) optimizer_discriminator_z = Optimizer("msgd", 0.1, 0.1) optimizer_discriminator_z.setup(model.discriminator_z) if args.grad_clip > 0: optimizer_discriminator_z.add_hook(GradientClipping(args.grad_clip)) optimizer_discriminator_y = Optimizer("msgd", 0.1, 0.1) optimizer_discriminator_y.setup(model.discriminator_y) if args.grad_clip > 0: optimizer_discriminator_y.add_hook(GradientClipping(args.grad_clip)) optimizer_linear_transformation = Optimizer("msgd", 0.01, 0.9) optimizer_linear_transformation.setup(model.linear_transformation) if args.grad_clip > 0: optimizer_linear_transformation.add_hook(GradientClipping(args.grad_clip)) using_gpu = False if args.gpu_device >= 0: cuda.get_device(args.gpu_device).use() model.to_gpu() using_gpu = True xp = model.xp # 0 -> true sample # 1 -> generated sample class_true = np.zeros(args.batchsize, dtype=np.int32) class_fake = np.ones(args.batchsize, dtype=np.int32) if using_gpu: class_true = cuda.to_gpu(class_true) class_fake = cuda.to_gpu(class_fake) # 2D circle # we use a linear transformation to map the 10D representation to a 2D space such that # the cluster heads are mapped to the points that are uniformly placed on a 2D circle. rad = math.radians(360 / model.ndim_y) radius = 5 mapped_cluster_head_2d_target = np.zeros((10, 2), dtype=np.float32) for n in range(model.ndim_y): x = math.cos(rad * n) * radius y = math.sin(rad * n) * radius mapped_cluster_head_2d_target[n] = (x, y) if using_gpu: mapped_cluster_head_2d_target = cuda.to_gpu(mapped_cluster_head_2d_target) # training loop training_start_time = time.time() for epoch in range(args.total_epochs): sum_loss_generator = 0 sum_loss_discriminator = 0 sum_loss_autoencoder = 0 sum_loss_supervised = 0 sum_loss_linear_transformation = 0 sum_discriminator_z_confidence_true = 0 sum_discriminator_z_confidence_fake = 0 sum_discriminator_y_confidence_true = 0 sum_discriminator_y_confidence_fake = 0 epoch_start_time = time.time() dataset.shuffle() # training for itr in range(total_iterations_train): # update model parameters with chainer.using_config("train", True): # sample minibatch x_u = dataset.sample_unlabeled_minibatch(args.batchsize, gpu=using_gpu) x_l, y_l, _ = dataset.sample_labeled_minibatch(args.batchsize, gpu=using_gpu) ### reconstruction phase ### if True: y_onehot_u, z_u = model.encode_x_yz(x_u, apply_softmax_y=True) repr_u = model.encode_yz_representation(y_onehot_u, z_u) x_reconstruction_u = model.decode_representation_x(repr_u) loss_reconstruction_u = F.mean_squared_error(x_u, x_reconstruction_u) y_onehot_l, z_l = model.encode_x_yz(x_l, apply_softmax_y=True) repr_l = model.encode_yz_representation(y_onehot_l, z_l) x_reconstruction_l = model.decode_representation_x(repr_l) loss_reconstruction_l = F.mean_squared_error(x_l, x_reconstruction_l) loss_reconstruction = loss_reconstruction_u + loss_reconstruction_l model.cleargrads() loss_reconstruction.backward() optimizer_encoder.update() optimizer_decoder.update() sum_loss_autoencoder += float(loss_reconstruction.data) ### adversarial phase ### if True: y_onehot_fake_u, z_fake_u = model.encode_x_yz(x_u, apply_softmax_y=True) z_true = sampler.gaussian(args.batchsize, model.ndim_y, mean=0, var=1) y_onehot_true = sampler.onehot_categorical(args.batchsize, model.ndim_y) if using_gpu: z_true = cuda.to_gpu(z_true) y_onehot_true = cuda.to_gpu(y_onehot_true) dz_true = model.discriminate_z(z_true, apply_softmax=False) dz_fake = model.discriminate_z(z_fake_u, apply_softmax=False) dy_true = model.discriminate_y(y_onehot_true, apply_softmax=False) dy_fake = model.discriminate_y(y_onehot_fake_u, apply_softmax=False) discriminator_z_confidence_true = float(xp.mean(F.softmax(dz_true).data[:, 0])) discriminator_z_confidence_fake = float(xp.mean(F.softmax(dz_fake).data[:, 1])) discriminator_y_confidence_true = float(xp.mean(F.softmax(dy_true).data[:, 0])) discriminator_y_confidence_fake = float(xp.mean(F.softmax(dy_fake).data[:, 1])) loss_discriminator_z = F.softmax_cross_entropy(dz_true, class_true) + F.softmax_cross_entropy(dz_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy(dy_true, class_true) + F.softmax_cross_entropy(dy_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y model.cleargrads() loss_discriminator.backward() optimizer_discriminator_z.update() optimizer_discriminator_y.update() sum_loss_discriminator += float(loss_discriminator.data) sum_discriminator_z_confidence_true += discriminator_z_confidence_true sum_discriminator_z_confidence_fake += discriminator_z_confidence_fake sum_discriminator_y_confidence_true += discriminator_y_confidence_true sum_discriminator_y_confidence_fake += discriminator_y_confidence_fake ### generator phase ### if True: y_onehot_fake_u, z_fake_u = model.encode_x_yz(x_u, apply_softmax_y=True) dz_fake = model.discriminate_z(z_fake_u, apply_softmax=False) dy_fake = model.discriminate_y(y_onehot_fake_u, apply_softmax=False) loss_generator = F.softmax_cross_entropy(dz_fake, class_true) + F.softmax_cross_entropy(dy_fake, class_true) model.cleargrads() loss_generator.backward() optimizer_generator.update() sum_loss_generator += float(loss_generator.data) ### supervised phase ### if True: logit_l, _ = model.encode_x_yz(x_l, apply_softmax_y=False) loss_supervised = F.softmax_cross_entropy(logit_l, y_l) model.cleargrads() loss_supervised.backward() optimizer_semi_supervised.update() sum_loss_supervised += float(loss_supervised.data) ### additional cost ### if True: identity = np.identity(model.ndim_y, dtype=np.float32) if using_gpu: identity = cuda.to_gpu(identity) mapped_head = model.linear_transformation(identity) loss_linear_transformation = F.mean_squared_error(mapped_cluster_head_2d_target, mapped_head) model.cleargrads() loss_linear_transformation.backward() optimizer_linear_transformation.update() sum_loss_linear_transformation += float(loss_linear_transformation.data) printr("Training ... {:3.0f}% ({}/{})".format((itr + 1) / total_iterations_train * 100, itr + 1, total_iterations_train)) model.save(args.model) labeled_iter_train = dataset.get_iterator(args.batchsize * 20, train=True, labeled=True, gpu=using_gpu) unlabeled_iter_train = dataset.get_iterator(args.batchsize * 20, train=True, unlabeled=True, gpu=using_gpu) average_accuracy_l = 0 average_accuracy_u = 0 for x_l, true_label in labeled_iter_train: with chainer.no_backprop_mode() and chainer.using_config("train", False): y_onehot_l, _ = model.encode_x_yz(x_l, apply_softmax_y=True) accuracy = F.accuracy(y_onehot_l, true_label) average_accuracy_l += float(accuracy.data) for x_u, true_label in unlabeled_iter_train: with chainer.no_backprop_mode() and chainer.using_config("train", False): y_onehot_u, _ = model.encode_x_yz(x_u, apply_softmax_y=True) accuracy = F.accuracy(y_onehot_u, true_label) average_accuracy_u += float(accuracy.data) average_accuracy_l /= labeled_iter_train.get_total_iterations() average_accuracy_u /= unlabeled_iter_train.get_total_iterations() clear_console() print("Epoch {} done in {} sec - loss: g={:.5g}, d={:.5g}, a={:.5g}, s={:.5g}, l={:.5g} - disc_z: true={:.1f}%, fake={:.1f}% - disc_y: true={:.1f}%, fake={:.1f}% - acc: l={:.2f}%, u={:.2f}% - total {} min".format( epoch + 1, int(time.time() - epoch_start_time), sum_loss_generator / total_iterations_train, sum_loss_discriminator / total_iterations_train, sum_loss_autoencoder / total_iterations_train, sum_loss_supervised / total_iterations_train, sum_loss_linear_transformation / total_iterations_train, sum_discriminator_z_confidence_true / total_iterations_train * 100, sum_discriminator_z_confidence_fake / total_iterations_train * 100, sum_discriminator_y_confidence_true / total_iterations_train * 100, sum_discriminator_y_confidence_fake / total_iterations_train * 100, average_accuracy_l * 100, average_accuracy_u * 100, int((time.time() - training_start_time) // 60))) if epoch == 50: optimizer_encoder.set_learning_rate(0.001) optimizer_decoder.set_learning_rate(0.001) optimizer_semi_supervised.set_learning_rate(0.01) optimizer_generator.set_learning_rate(0.01) optimizer_discriminator_y.set_learning_rate(0.01) optimizer_discriminator_z.set_learning_rate(0.01) if epoch == 1000: optimizer_encoder.set_learning_rate(0.0001) optimizer_decoder.set_learning_rate(0.0001) optimizer_semi_supervised.set_learning_rate(0.001) optimizer_generator.set_learning_rate(0.001) optimizer_discriminator_y.set_learning_rate(0.001) optimizer_discriminator_z.set_learning_rate(0.001)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--batchsize", "-b", type=int, default=64) parser.add_argument("--total-epochs", "-e", type=int, default=5000) parser.add_argument("--num-labeled-data", "-nl", type=int, default=100) parser.add_argument("--gpu-device", "-g", type=int, default=0) parser.add_argument("--grad-clip", "-gc", type=float, default=5) parser.add_argument("--learning-rate", "-lr", type=float, default=0.0001) parser.add_argument("--momentum", "-mo", type=float, default=0.1) parser.add_argument("--optimizer", "-opt", type=str, default="adam") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--model", "-m", type=str, default="model.hdf5") args = parser.parse_args() np.random.seed(args.seed) model = Model() model.load(args.model) mnist_train, mnist_test = chainer.datasets.get_mnist() images_train, labels_train = mnist_train._datasets images_test, labels_test = mnist_test._datasets # normalize images_train = (images_train - 0.5) * 2 images_test = (images_test - 0.5) * 2 dataset = Dataset(train=(images_train, labels_train), test=(images_test, labels_test)) total_iterations_train = len(images_train) // args.batchsize # optimizers optimizer_encoder = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_encoder.setup(model.encoder) if args.grad_clip > 0: optimizer_encoder.add_hook(GradientClipping(args.grad_clip)) optimizer_decoder = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_decoder.setup(model.decoder) if args.grad_clip > 0: optimizer_decoder.add_hook(GradientClipping(args.grad_clip)) optimizer_discriminator_z = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_discriminator_z.setup(model.discriminator_z) if args.grad_clip > 0: optimizer_discriminator_z.add_hook(GradientClipping(args.grad_clip)) optimizer_discriminator_y = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_discriminator_y.setup(model.discriminator_y) if args.grad_clip > 0: optimizer_discriminator_y.add_hook(GradientClipping(args.grad_clip)) using_gpu = False if args.gpu_device >= 0: cuda.get_device(args.gpu_device).use() model.to_gpu() using_gpu = True xp = model.xp # 0 -> true sample # 1 -> generated sample class_true = np.zeros(args.batchsize, dtype=np.int32) class_fake = np.ones(args.batchsize, dtype=np.int32) if using_gpu: class_true = cuda.to_gpu(class_true) class_fake = cuda.to_gpu(class_fake) training_start_time = time.time() for epoch in range(args.total_epochs): sum_loss_generator = 0 sum_loss_discriminator = 0 sum_loss_autoencoder = 0 sum_discriminator_z_confidence_true = 0 sum_discriminator_z_confidence_fake = 0 sum_discriminator_y_confidence_true = 0 sum_discriminator_y_confidence_fake = 0 epoch_start_time = time.time() dataset.shuffle() # training for itr in range(total_iterations_train): # update model parameters with chainer.using_config("train", True): # sample minibatch x_u, _, _ = dataset.sample_minibatch(args.batchsize, gpu=using_gpu) ### reconstruction phase ### if True: y_onehot_u, z_u = model.encode_x_yz(x_u, apply_softmax_y=True) x_reconstruction_u = model.decode_yz_x(y_onehot_u, z_u) loss_reconstruction = F.mean_squared_error( x_u, x_reconstruction_u) model.cleargrads() loss_reconstruction.backward() optimizer_encoder.update() optimizer_decoder.update() ### adversarial phase ### if True: y_onehot_fake_u, z_fake_u = model.encode_x_yz( x_u, apply_softmax_y=True) z_true = sampler.gaussian(args.batchsize, model.ndim_z, mean=0, var=1) y_onehot_true = sampler.onehot_categorical( args.batchsize, model.ndim_y) if using_gpu: z_true = cuda.to_gpu(z_true) y_onehot_true = cuda.to_gpu(y_onehot_true) dz_true = model.discriminate_z(z_true, apply_softmax=False) dz_fake = model.discriminate_z(z_fake_u, apply_softmax=False) dy_true = model.discriminate_y(y_onehot_true, apply_softmax=False) dy_fake = model.discriminate_y(y_onehot_fake_u, apply_softmax=False) discriminator_z_confidence_true = float( xp.mean(F.softmax(dz_true).data[:, 0])) discriminator_z_confidence_fake = float( xp.mean(F.softmax(dz_fake).data[:, 1])) discriminator_y_confidence_true = float( xp.mean(F.softmax(dy_true).data[:, 0])) discriminator_y_confidence_fake = float( xp.mean(F.softmax(dy_fake).data[:, 1])) loss_discriminator_z = F.softmax_cross_entropy( dz_true, class_true) + F.softmax_cross_entropy( dz_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy( dy_true, class_true) + F.softmax_cross_entropy( dy_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y model.cleargrads() loss_discriminator.backward() optimizer_discriminator_z.update() optimizer_discriminator_y.update() ### generator phase ### if True: y_onehot_fake_u, z_fake_u = model.encode_x_yz( x_u, apply_softmax_y=True) dz_fake = model.discriminate_z(z_fake_u, apply_softmax=False) dy_fake = model.discriminate_y(y_onehot_fake_u, apply_softmax=False) loss_generator = F.softmax_cross_entropy( dz_fake, class_true) + F.softmax_cross_entropy( dy_fake, class_true) model.cleargrads() loss_generator.backward() optimizer_encoder.update() sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) sum_loss_autoencoder += float(loss_reconstruction.data) sum_discriminator_z_confidence_true += discriminator_z_confidence_true sum_discriminator_z_confidence_fake += discriminator_z_confidence_fake sum_discriminator_y_confidence_true += discriminator_y_confidence_true sum_discriminator_y_confidence_fake += discriminator_y_confidence_fake printr("Training ... {:3.0f}% ({}/{})".format( (itr + 1) / total_iterations_train * 100, itr + 1, total_iterations_train)) model.save(args.model) clear_console() print( "Epoch {} done in {} sec - loss: g={:.5g}, d={:.5g}, a={:.5g} - disc_z: true={:.1f}%, fake={:.1f}% - disc_y: true={:.1f}%, fake={:.1f}% - total {} min" .format( epoch + 1, int(time.time() - epoch_start_time), sum_loss_generator / total_iterations_train, sum_loss_discriminator / total_iterations_train, sum_loss_autoencoder / total_iterations_train, sum_discriminator_z_confidence_true / total_iterations_train * 100, sum_discriminator_z_confidence_fake / total_iterations_train * 100, sum_discriminator_y_confidence_true / total_iterations_train * 100, sum_discriminator_y_confidence_fake / total_iterations_train * 100, int((time.time() - training_start_time) // 60)))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--batchsize", "-b", type=int, default=64) parser.add_argument("--total-epochs", "-e", type=int, default=5000) parser.add_argument("--num-labeled-data", "-nl", type=int, default=10000) parser.add_argument("--gpu-device", "-g", type=int, default=0) parser.add_argument("--grad-clip", "-gc", type=float, default=5) parser.add_argument("--learning-rate", "-lr", type=float, default=0.0001) parser.add_argument("--momentum", "-mo", type=float, default=0.5) parser.add_argument("--optimizer", "-opt", type=str, default="adam") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--model", "-m", type=str, default="model.hdf5") args = parser.parse_args() np.random.seed(args.seed) model = Model() model.load(args.model) mnist_train, mnist_test = chainer.datasets.get_mnist() images_train, labels_train = mnist_train._datasets images_test, labels_test = mnist_test._datasets # normalize images_train = (images_train - 0.5) * 2 images_test = (images_test - 0.5) * 2 dataset = Dataset(train=(images_train, labels_train), test=(images_test, labels_test), num_labeled_data=args.num_labeled_data, num_classes=model.ndim_y - 1, num_extra_classes=1) print("#labeled: {}".format(dataset.get_num_labeled_data())) print("#unlabeled: {}".format(dataset.get_num_unlabeled_data())) _, labels = dataset.get_labeled_data() total_iterations_train = len(images_train) // args.batchsize # optimizers optimizer_encoder = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_encoder.setup(model.encoder) if args.grad_clip > 0: optimizer_encoder.add_hook(GradientClipping(args.grad_clip)) optimizer_decoder = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_decoder.setup(model.decoder) if args.grad_clip > 0: optimizer_decoder.add_hook(GradientClipping(args.grad_clip)) optimizer_discriminator = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_discriminator.setup(model.discriminator) if args.grad_clip > 0: optimizer_discriminator.add_hook(GradientClipping(args.grad_clip)) using_gpu = False if args.gpu_device >= 0: cuda.get_device(args.gpu_device).use() model.to_gpu() using_gpu = True xp = model.xp # 0 -> true sample # 1 -> generated sample class_true = np.zeros(args.batchsize, dtype=np.int32) class_fake = np.ones(args.batchsize, dtype=np.int32) if using_gpu: class_true = cuda.to_gpu(class_true) class_fake = cuda.to_gpu(class_fake) y_onehot_u = xp.zeros((1, model.ndim_y), dtype=xp.float32) y_onehot_u[0, -1] = 1 # turn on the extra class y_onehot_u = xp.repeat(y_onehot_u, args.batchsize, axis=0) training_start_time = time.time() for epoch in range(args.total_epochs): sum_loss_generator = 0 sum_loss_discriminator = 0 sum_loss_autoencoder = 0 sum_discriminator_confidence_true_l = 0 sum_discriminator_confidence_fake_l = 0 sum_discriminator_confidence_true_u = 0 sum_discriminator_confidence_fake_u = 0 epoch_start_time = time.time() dataset.shuffle() # training for itr in range(total_iterations_train): # update model parameters with chainer.using_config("train", True): # sample minibatch x_u = dataset.sample_unlabeled_minibatch(args.batchsize, gpu=using_gpu) x_l, y_l, y_onehot_l = dataset.sample_labeled_minibatch(args.batchsize, gpu=using_gpu) ### reconstruction phase ### if True: z_u = model.encode_x_z(x_u) x_reconstruction_u = model.decode_z_x(z_u) loss_reconstruction_u = F.mean_squared_error(x_u, x_reconstruction_u) z_l = model.encode_x_z(x_l) x_reconstruction_l = model.decode_z_x(z_l) loss_reconstruction_l = F.mean_squared_error(x_l, x_reconstruction_l) loss_reconstruction = loss_reconstruction_u + loss_reconstruction_l model.cleargrads() loss_reconstruction.backward() optimizer_encoder.update() optimizer_decoder.update() ### adversarial phase ### if True: z_fake_u = model.encode_x_z(x_u) z_fake_l = model.encode_x_z(x_l) if False: z_true_l = sampler.supervised_swiss_roll(args.batchsize, model.ndim_z, y_l, model.ndim_y - 1) z_true_u = sampler.swiss_roll(args.batchsize, model.ndim_z, model.ndim_y - 1) else: z_true_l = sampler.supervised_gaussian_mixture(args.batchsize, model.ndim_z, y_l, model.ndim_y - 1) z_true_u = sampler.gaussian_mixture(args.batchsize, model.ndim_z, model.ndim_y - 1) if using_gpu: z_true_u = cuda.to_gpu(z_true_u) z_true_l = cuda.to_gpu(z_true_l) dz_true_u = model.discriminate(y_onehot_u, z_true_u, apply_softmax=False) dz_fake_u = model.discriminate(y_onehot_u, z_fake_u, apply_softmax=False) dz_true_l = model.discriminate(y_onehot_l, z_true_l, apply_softmax=False) dz_fake_l = model.discriminate(y_onehot_l, z_fake_l, apply_softmax=False) discriminator_confidence_true_u = float(xp.mean(F.softmax(dz_true_u).data[:, 0])) discriminator_confidence_fake_u = float(xp.mean(F.softmax(dz_fake_u).data[:, 1])) discriminator_confidence_true_l = float(xp.mean(F.softmax(dz_true_l).data[:, 0])) discriminator_confidence_fake_l = float(xp.mean(F.softmax(dz_fake_l).data[:, 1])) loss_discriminator = (F.softmax_cross_entropy(dz_true_u, class_true) + F.softmax_cross_entropy(dz_fake_u, class_fake) + F.softmax_cross_entropy(dz_true_l, class_true) + F.softmax_cross_entropy(dz_fake_l, class_fake)) model.cleargrads() loss_discriminator.backward() optimizer_discriminator.update() ### generator phase ### if True: z_fake_u = model.encode_x_z(x_u) z_fake_l = model.encode_x_z(x_l) dz_fake_u = model.discriminate(y_onehot_u, z_fake_u, apply_softmax=False) dz_fake_l = model.discriminate(y_onehot_l, z_fake_l, apply_softmax=False) loss_generator = F.softmax_cross_entropy(dz_fake_u, class_true) + F.softmax_cross_entropy(dz_fake_l, class_true) model.cleargrads() loss_generator.backward() optimizer_encoder.update() sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) sum_loss_autoencoder += float(loss_reconstruction.data) sum_discriminator_confidence_true_u += discriminator_confidence_true_u sum_discriminator_confidence_fake_u += discriminator_confidence_fake_u sum_discriminator_confidence_true_l += discriminator_confidence_true_l sum_discriminator_confidence_fake_l += discriminator_confidence_fake_l printr("Training ... {:3.0f}% ({}/{})".format((itr + 1) / total_iterations_train * 100, itr + 1, total_iterations_train)) model.save(args.model) clear_console() print("Epoch {} done in {} sec - loss: g={:.5g}, d={:.5g}, a={:.5g} - disc_u: true={:.1f}%, fake={:.1f}% - disc_l: true={:.1f}%, fake={:.1f}% - total {} min".format( epoch + 1, int(time.time() - epoch_start_time), sum_loss_generator / total_iterations_train, sum_loss_discriminator / total_iterations_train, sum_loss_autoencoder / total_iterations_train, sum_discriminator_confidence_true_u / total_iterations_train * 100, sum_discriminator_confidence_fake_u / total_iterations_train * 100, sum_discriminator_confidence_true_l / total_iterations_train * 100, sum_discriminator_confidence_fake_l / total_iterations_train * 100, int((time.time() - training_start_time) // 60)))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--batchsize", "-b", type=int, default=64) parser.add_argument("--total-epochs", "-e", type=int, default=300) parser.add_argument("--gpu-device", "-g", type=int, default=0) parser.add_argument("--grad-clip", "-gc", type=float, default=5) parser.add_argument("--learning-rate", "-lr", type=float, default=0.0001) parser.add_argument("--momentum", "-mo", type=float, default=0.5) parser.add_argument("--optimizer", "-opt", type=str, default="adam") parser.add_argument("--model", "-m", type=str, default="model.hdf5") args = parser.parse_args() mnist_train, mnist_test = chainer.datasets.get_mnist() images_train, labels_train = mnist_train._datasets images_test, labels_test = mnist_test._datasets # normalize images_train = (images_train - 0.5) * 2 images_test = (images_test - 0.5) * 2 dataset = Dataset(train=(images_train, labels_train), test=(images_test, labels_test)) total_iterations_train = len(images_train) // args.batchsize model = Model() model.load(args.model) # optimizers optimizer_encoder = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_encoder.setup(model.encoder) if args.grad_clip > 0: optimizer_encoder.add_hook(GradientClipping(args.grad_clip)) optimizer_decoder = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_decoder.setup(model.decoder) if args.grad_clip > 0: optimizer_decoder.add_hook(GradientClipping(args.grad_clip)) optimizer_discriminator = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_discriminator.setup(model.discriminator) if args.grad_clip > 0: optimizer_discriminator.add_hook(GradientClipping(args.grad_clip)) using_gpu = False if args.gpu_device >= 0: cuda.get_device(args.gpu_device).use() model.to_gpu() using_gpu = True xp = model.xp # 0 -> true sample # 1 -> generated sample class_true = np.zeros(args.batchsize, dtype=np.int32) class_fake = np.ones(args.batchsize, dtype=np.int32) if using_gpu: class_true = cuda.to_gpu(class_true) class_fake = cuda.to_gpu(class_fake) training_start_time = time.time() for epoch in range(args.total_epochs): sum_loss_generator = 0 sum_loss_discriminator = 0 sum_loss_autoencoder = 0 sum_discriminator_confidence_true = 0 sum_discriminator_confidence_fake = 0 epoch_start_time = time.time() dataset.shuffle() # training for itr in range(total_iterations_train): # update model parameters with chainer.using_config("train", True): x_l, y_l, y_onehot_l = dataset.sample_minibatch(args.batchsize, gpu=using_gpu) ### reconstruction phase ### if True: z_fake_l = model.encode_x_z(x_l) x_reconstruction_l = model.decode_yz_x(y_onehot_l, z_fake_l) loss_reconstruction = F.mean_squared_error(x_l, x_reconstruction_l) model.cleargrads() loss_reconstruction.backward() optimizer_encoder.update() optimizer_decoder.update() ### adversarial phase ### if True: z_fake_l = model.encode_x_z(x_l) z_true_batch = sampler.gaussian(args.batchsize, model.ndim_z, mean=0, var=1) if using_gpu: z_true_batch = cuda.to_gpu(z_true_batch) dz_true = model.discriminate_z(z_true_batch, apply_softmax=False) dz_fake = model.discriminate_z(z_fake_l, apply_softmax=False) discriminator_confidence_true = float(xp.mean(F.softmax(dz_true).data[:, 0])) discriminator_confidence_fake = float(xp.mean(F.softmax(dz_fake).data[:, 1])) loss_discriminator = F.softmax_cross_entropy(dz_true, class_true) + F.softmax_cross_entropy(dz_fake, class_fake) model.cleargrads() loss_discriminator.backward() optimizer_discriminator.update() ### generator phase ### if True: z_fake_l = model.encode_x_z(x_l) dz_fake = model.discriminate_z(z_fake_l, apply_softmax=False) loss_generator = F.softmax_cross_entropy(dz_fake, class_true) model.cleargrads() loss_generator.backward() optimizer_encoder.update() sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) sum_loss_autoencoder += float(loss_reconstruction.data) sum_discriminator_confidence_true += discriminator_confidence_true sum_discriminator_confidence_fake += discriminator_confidence_fake printr("Training ... {:3.0f}% ({}/{})".format((itr + 1) / total_iterations_train * 100, itr + 1, total_iterations_train)) model.save(args.model) clear_console() print("Epoch {} done in {} sec - loss: g={:.5g}, d={:.5g}, a={:.5g} - discriminator: true={:.1f}%, fake={:.1f}% - total {} min".format( epoch + 1, int(time.time() - epoch_start_time), sum_loss_generator / total_iterations_train, sum_loss_discriminator / total_iterations_train, sum_loss_autoencoder / total_iterations_train, sum_discriminator_confidence_true / total_iterations_train * 100, sum_discriminator_confidence_fake / total_iterations_train * 100, int((time.time() - training_start_time) // 60)))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--batchsize", "-b", type=int, default=64) parser.add_argument("--total-epochs", "-e", type=int, default=5000) parser.add_argument("--num-labeled-data", "-nl", type=int, default=100) parser.add_argument("--gpu-device", "-g", type=int, default=0) parser.add_argument("--grad-clip", "-gc", type=float, default=5) parser.add_argument("--learning-rate", "-lr", type=float, default=0.0001) parser.add_argument("--momentum", "-mo", type=float, default=0.1) parser.add_argument("--optimizer", "-opt", type=str, default="adam") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--model", "-m", type=str, default="model.hdf5") args = parser.parse_args() np.random.seed(args.seed) model = Model() model.load(args.model) mnist_train, mnist_test = chainer.datasets.get_mnist() images_train, labels_train = mnist_train._datasets images_test, labels_test = mnist_test._datasets # normalize images_train = (images_train - 0.5) * 2 images_test = (images_test - 0.5) * 2 dataset = Dataset(train=(images_train, labels_train), test=(images_test, labels_test)) total_iterations_train = len(images_train) // args.batchsize # optimizers optimizer_encoder = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_encoder.setup(model.encoder) if args.grad_clip > 0: optimizer_encoder.add_hook(GradientClipping(args.grad_clip)) optimizer_decoder = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_decoder.setup(model.decoder) if args.grad_clip > 0: optimizer_decoder.add_hook(GradientClipping(args.grad_clip)) optimizer_discriminator_z = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_discriminator_z.setup(model.discriminator_z) if args.grad_clip > 0: optimizer_discriminator_z.add_hook(GradientClipping(args.grad_clip)) optimizer_discriminator_y = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_discriminator_y.setup(model.discriminator_y) if args.grad_clip > 0: optimizer_discriminator_y.add_hook(GradientClipping(args.grad_clip)) optimizer_cluster_head = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer_cluster_head.setup(model.cluster_head) if args.grad_clip > 0: optimizer_cluster_head.add_hook(GradientClipping(args.grad_clip)) using_gpu = False if args.gpu_device >= 0: cuda.get_device(args.gpu_device).use() model.to_gpu() using_gpu = True xp = model.xp # 0 -> true sample # 1 -> generated sample class_true = np.zeros(args.batchsize, dtype=np.int32) class_fake = np.ones(args.batchsize, dtype=np.int32) if using_gpu: class_true = cuda.to_gpu(class_true) class_fake = cuda.to_gpu(class_fake) training_start_time = time.time() for epoch in range(args.total_epochs): sum_loss_generator = 0 sum_loss_discriminator = 0 sum_loss_autoencoder = 0 sum_loss_cluster_head = 0 sum_discriminator_z_confidence_true = 0 sum_discriminator_z_confidence_fake = 0 sum_discriminator_y_confidence_true = 0 sum_discriminator_y_confidence_fake = 0 epoch_start_time = time.time() dataset.shuffle() # training for itr in range(total_iterations_train): # update model parameters with chainer.using_config("train", True): # sample minibatch x_u, _, _ = dataset.sample_minibatch(args.batchsize, gpu=using_gpu) ### reconstruction phase ### if True: y_onehot_u, z_u = model.encode_x_yz(x_u, apply_softmax_y=True) repr_u = model.encode_yz_representation(y_onehot_u, z_u) x_reconstruction_u = model.decode_representation_x(repr_u) loss_reconstruction = F.mean_squared_error(x_u, x_reconstruction_u) model.cleargrads() loss_reconstruction.backward() optimizer_encoder.update() optimizer_cluster_head.update() optimizer_decoder.update() ### adversarial phase ### if True: y_onehot_fake_u, z_fake_u = model.encode_x_yz(x_u, apply_softmax_y=True) z_true = sampler.gaussian(args.batchsize, model.ndim_z, mean=0, var=1) y_onehot_true = sampler.onehot_categorical(args.batchsize, model.ndim_y) if using_gpu: z_true = cuda.to_gpu(z_true) y_onehot_true = cuda.to_gpu(y_onehot_true) dz_true = model.discriminate_z(z_true, apply_softmax=False) dz_fake = model.discriminate_z(z_fake_u, apply_softmax=False) dy_true = model.discriminate_y(y_onehot_true, apply_softmax=False) dy_fake = model.discriminate_y(y_onehot_fake_u, apply_softmax=False) discriminator_z_confidence_true = float(xp.mean(F.softmax(dz_true).data[:, 0])) discriminator_z_confidence_fake = float(xp.mean(F.softmax(dz_fake).data[:, 1])) discriminator_y_confidence_true = float(xp.mean(F.softmax(dy_true).data[:, 0])) discriminator_y_confidence_fake = float(xp.mean(F.softmax(dy_fake).data[:, 1])) loss_discriminator_z = F.softmax_cross_entropy(dz_true, class_true) + F.softmax_cross_entropy(dz_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy(dy_true, class_true) + F.softmax_cross_entropy(dy_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y model.cleargrads() loss_discriminator.backward() optimizer_discriminator_z.update() optimizer_discriminator_y.update() ### generator phase ### if True: y_onehot_fake_u, z_fake_u = model.encode_x_yz(x_u, apply_softmax_y=True) dz_fake = model.discriminate_z(z_fake_u, apply_softmax=False) dy_fake = model.discriminate_y(y_onehot_fake_u, apply_softmax=False) loss_generator = F.softmax_cross_entropy(dz_fake, class_true) + F.softmax_cross_entropy(dy_fake, class_true) model.cleargrads() loss_generator.backward() optimizer_encoder.update() ### additional cost ### if True: distance = model.compute_distance_of_cluster_heads() loss_cluster_head = -F.sum(distance) model.cleargrads() loss_cluster_head.backward() optimizer_cluster_head.update() sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) sum_loss_autoencoder += float(loss_reconstruction.data) sum_loss_cluster_head += float(model.nCr(model.ndim_y, 2) * model.cluster_head_distance_threshold + loss_cluster_head.data) sum_discriminator_z_confidence_true += discriminator_z_confidence_true sum_discriminator_z_confidence_fake += discriminator_z_confidence_fake sum_discriminator_y_confidence_true += discriminator_y_confidence_true sum_discriminator_y_confidence_fake += discriminator_y_confidence_fake printr("Training ... {:3.0f}% ({}/{})".format((itr + 1) / total_iterations_train * 100, itr + 1, total_iterations_train)) model.save(args.model) clear_console() print("Epoch {} done in {} sec - loss: g={:.5g}, d={:.5g}, a={:.5g}, c={:.5g} - disc_z: true={:.1f}%, fake={:.1f}% - disc_y: true={:.1f}%, fake={:.1f}% - total {} min".format( epoch + 1, int(time.time() - epoch_start_time), sum_loss_generator / total_iterations_train, sum_loss_discriminator / total_iterations_train, sum_loss_autoencoder / total_iterations_train, sum_loss_cluster_head / total_iterations_train, sum_discriminator_z_confidence_true / total_iterations_train * 100, sum_discriminator_z_confidence_fake / total_iterations_train * 100, sum_discriminator_y_confidence_true / total_iterations_train * 100, sum_discriminator_y_confidence_fake / total_iterations_train * 100, int((time.time() - training_start_time) // 60)))