示例#1
0
def main():
    images, labels = dataset.load_test_images()
    num_scatter = len(images)
    _images, _, label_id = dataset.sample_labeled_data(images, labels,
                                                       num_scatter)
    with tf.device(config.device):
        t = build_graph(is_test=True)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=True)) as sess:
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir))
        z, _x = sess.run([t.z_r, t.x_r], feed_dict={t.x: _images})

        plot.scatter_labeled_z(z, label_id, dir=config.ckpt_dir)
        plot.tile_images(_x[:100], dir=config.ckpt_dir)
        plot.plot_loss_tendency(config.ckpt_dir)

    hist_value, hist_head = plot.load_pickle_to_data(config.ckpt_dir)
    for loss_name in ['reconstruction']:
        plot.plot_loss_trace(hist_value[loss_name], loss_name, config.ckpt_dir)

    plot.plot_adversarial_trace(hist_value['discriminator'],
                                hist_value['generator'], 'z', config.ckpt_dir)
    plot.plot_adversarial_trace(hist_value['discriminator_z'],
                                hist_value['generator_z'], 'z',
                                config.ckpt_dir)
    plot.plot_adversarial_trace(hist_value['discriminator_img'],
                                hist_value['generator_img'], 'z',
                                config.ckpt_dir)
示例#2
0
def main():
    # load MNIST images
    images, labels = dataset.load_test_images()

    # Settings
    num_scatter = len(images)
    _images, _, label_id = dataset.sample_labeled_data(images, labels,
                                                       num_scatter)

    with tf.device(config.device):
        t = build_graph(is_test=True)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=True)) as sess:
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir))
        representation, x_reconstruction = sess.run([t.yz, t.x_r],
                                                    feed_dict={t.x: _images})
        plot.scatter_labeled_z(representation, label_id, dir=config.ckpt_dir)
        plot.tile_images(x_reconstruction[:100], dir=config.ckpt_dir)

        # z distributed plot
        num_segments = 20
        limit = (-5, 5)
        x_values = np.linspace(limit[0], limit[1], num_segments)
        y_values = np.linspace(limit[0], limit[1], num_segments)
        vacant = np.zeros((28 * num_segments, 28 * num_segments))
        for i, x_element in enumerate(x_values):
            for j, y_element in enumerate(y_values):
                x_reconstruction = sess.run(
                    t.x_r,
                    feed_dict={
                        t.yz: np.reshape([x_element, y_element], [1, 2])
                    })
                vacant[(num_segments - 1 - i) * 28:(num_segments - i) * 28,
                       j * 28:(j + 1) * 28] = x_reconstruction.reshape(28, 28)

        vacant = (vacant + 1) / 2
        pylab.figure(figsize=(10, 10), dpi=400, facecolor='white')
        pylab.imshow(vacant, cmap='gray', origin='upper')
        pylab.tight_layout()
        pylab.axis('off')
        pylab.savefig("{}/clusters.png".format(config.ckpt_dir))

        # loss part
        hist_value, hist_head = plot.load_pickle_to_data(config.ckpt_dir)
        for loss_name in ['reconstruction', 'supervised']:
            plot.plot_loss_trace(hist_value[loss_name], loss_name,
                                 config.ckpt_dir)

        plot.plot_adversarial_trace(hist_value['discriminator_y'],
                                    hist_value['generator_y'], 'y',
                                    config.ckpt_dir)
        plot.plot_adversarial_trace(hist_value['discriminator_z'],
                                    hist_value['generator_z'], 'z',
                                    config.ckpt_dir)
        plot.plot_adversarial_trace(hist_value['validation_accuracy'],
                                    hist_value['transform'],
                                    'validation_accuracy', config.ckpt_dir)
def main():
	# load MNIST images
	images, labels = dataset.load_test_images()
	num_scatter = len(images)
	x, _, labels = dataset.sample_labeled_data(images, labels, num_scatter)
	y_distribution, z = aae.encode_x_yz(x, apply_softmax=False, test=True)
	y = aae.argmax_onehot_from_unnormalized_distribution(y_distribution)
	representation = aae.to_numpy(aae.encode_yz_representation(y, z, test=True))
	plot.scatter_labeled_z(representation, labels, dir=args.plot_dir)
示例#4
0
def main():
	# load MNIST images
	images, labels = dataset.load_test_images()

	# config
	config = aae.config
	num_scatter = len(images)

	x, _, label_ids = dataset.sample_labeled_data(images, labels, num_scatter, config.ndim_x, config.ndim_y)
	z = aae.to_numpy(aae.encode_x_z(x, test=True))
	visualizer.plot_labeled_z(z, label_ids, dir=args.plot_dir)
示例#5
0
def main():
    images, labels = dataset.load_test_images()
    num_scatter = len(images)
    _images, _, label_id = dataset.sample_labeled_data(images, labels,
                                                       num_scatter)
    with tf.device(config.device):
        x, z_respresentation, x_construction = build_graph(is_test=True)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=True)) as sess:
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir))
        z, _x = sess.run([z_respresentation, x_construction],
                         feed_dict={x: _images})

        scatter_labeled_z(z, label_id, dir=config.ckpt_dir)
        tile_images(_x[:100], dir=config.ckpt_dir)
        plot_loss_tendency(config.ckpt_dir)
def main():
    # load MNIST images
    images, labels = dataset.load_test_images()

    # Settings
    num_scatter = len(images)
    _images, _, label_id = dataset.sample_labeled_data(images, labels,
                                                       num_scatter)

    with tf.device(config.device):
        t = build_graph(is_test=True)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=True)) as sess:
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir))
        # z distributed plot
        num_segments = 20
        limit = (-5, 5)
        x_values = np.linspace(limit[0], limit[1], num_segments)
        y_values = np.linspace(limit[0], limit[1], num_segments)
        vacant = np.zeros((28 * num_segments, 28 * num_segments))
        for i, x_element in enumerate(x_values):
            for j, y_element in enumerate(y_values):
                x_reconstruction = sess.run(
                    t.x_r,
                    feed_dict={
                        t.yz: np.reshape([x_element, y_element], [1, 2])
                    })
                vacant[(num_segments - 1 - i) * 28:(num_segments - i) * 28,
                       j * 28:(j + 1) * 28] = x_reconstruction.reshape(28, 28)

        vacant = (vacant + 1) / 2
        pylab.figure(figsize=(10, 10), dpi=400, facecolor='white')
        pylab.imshow(vacant, cmap='gray', origin='upper')
        pylab.tight_layout()
        pylab.axis('off')
        pylab.savefig("{}/clusters.png".format(config.ckpt_dir))
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    config = aae.config

    # settings
    # _l -> labeled
    # _u -> unlabeled
    max_epoch = 1000
    num_trains_per_epoch = 5000
    batchsize_l = 100
    batchsize_u = 100
    alpha = 1

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # save validation accuracy per epoch
    csv_results = []

    # create semi-supervised split
    num_validation_data = 10000
    num_labeled_data = 100
    num_types_of_label = 10
    training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(
        images,
        labels,
        num_validation_data,
        num_labeled_data,
        num_types_of_label,
        seed=args.seed)
    print training_labels_l

    # classification
    # 0 -> true sample
    # 1 -> generated sample
    class_true = aae.to_variable(np.zeros(batchsize_u, dtype=np.int32))
    class_fake = aae.to_variable(np.ones(batchsize_u, dtype=np.int32))

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_reconstruction = 0
        sum_loss_supervised = 0
        sum_loss_discriminator = 0
        sum_loss_generator = 0

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                training_images_l, training_labels_l, batchsize_l,
                config.ndim_x, config.ndim_y)
            images_u = dataset.sample_unlabeled_data(training_images_u,
                                                     batchsize_u,
                                                     config.ndim_x)

            # reconstruction phase
            q_y_x_u, z_u = aae.encode_x_yz(images_u, apply_softmax=True)
            reconstruction_u = aae.decode_yz_x(q_y_x_u, z_u)
            loss_reconstruction = F.mean_squared_error(
                aae.to_variable(images_u), reconstruction_u)
            aae.backprop_generator(loss_reconstruction)
            aae.backprop_decoder(loss_reconstruction)

            # adversarial phase
            y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True)
            z_true_u = sampler.gaussian(batchsize_u,
                                        config.ndim_z,
                                        mean=0,
                                        var=1)
            y_true_u = sampler.onehot_categorical(batchsize_u, config.ndim_y)
            discrimination_z_true = aae.discriminate_z(z_true_u,
                                                       apply_softmax=False)
            discrimination_y_true = aae.discriminate_y(y_true_u,
                                                       apply_softmax=False)
            discrimination_z_fake = aae.discriminate_z(z_fake_u,
                                                       apply_softmax=False)
            discrimination_y_fake = aae.discriminate_y(y_fake_u,
                                                       apply_softmax=False)
            loss_discriminator_z = F.softmax_cross_entropy(
                discrimination_z_true, class_true) + F.softmax_cross_entropy(
                    discrimination_z_fake, class_fake)
            loss_discriminator_y = F.softmax_cross_entropy(
                discrimination_y_true, class_true) + F.softmax_cross_entropy(
                    discrimination_y_fake, class_fake)
            loss_discriminator = loss_discriminator_z + loss_discriminator_y
            aae.backprop_discriminator(loss_discriminator)

            # adversarial phase
            y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True)
            discrimination_z_fake = aae.discriminate_z(z_fake_u,
                                                       apply_softmax=False)
            discrimination_y_fake = aae.discriminate_y(y_fake_u,
                                                       apply_softmax=False)
            loss_generator_z = F.softmax_cross_entropy(discrimination_z_fake,
                                                       class_true)
            loss_generator_y = F.softmax_cross_entropy(discrimination_y_fake,
                                                       class_true)
            loss_generator = loss_generator_z + loss_generator_y
            aae.backprop_generator(loss_generator)

            # supervised phase
            unnormalized_q_y_x_l, z_l = aae.encode_x_yz(images_l,
                                                        apply_softmax=False)
            loss_supervised = F.softmax_cross_entropy(
                unnormalized_q_y_x_l, aae.to_variable(label_ids_l))
            aae.backprop_generator(loss_supervised)

            sum_loss_reconstruction += float(loss_reconstruction.data)
            sum_loss_supervised += float(loss_supervised.data)
            sum_loss_discriminator += float(loss_discriminator.data)
            sum_loss_generator += float(loss_generator.data)

            if t % 10 == 0:
                progress.show(t, num_trains_per_epoch, {})

        aae.save(args.model_dir)

        # validation phase
        # split validation data to reduce gpu memory consumption
        images_v, _, label_ids_v = dataset.sample_labeled_data(
            validation_images, validation_labels, num_validation_data,
            config.ndim_x, config.ndim_y)
        images_v_segments = np.split(images_v, num_validation_data // 500)
        label_ids_v_segments = np.split(label_ids_v,
                                        num_validation_data // 500)
        num_correct = 0
        for images_v, labels_v in zip(images_v_segments, label_ids_v_segments):
            predicted_labels = aae.argmax_x_label(images_v, test=True)
            for i, label in enumerate(predicted_labels):
                if label == labels_v[i]:
                    num_correct += 1
        validation_accuracy = num_correct / float(num_validation_data)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "loss_r": sum_loss_reconstruction / num_trains_per_epoch,
                "loss_s": sum_loss_supervised / num_trains_per_epoch,
                "loss_d": sum_loss_discriminator / num_trains_per_epoch,
                "loss_g": sum_loss_generator / num_trains_per_epoch,
                "accuracy": validation_accuracy
            })

        # write accuracy to csv
        csv_results.append([epoch, validation_accuracy])
        data = pd.DataFrame(csv_results)
        data.columns = ["epoch", "accuracy"]
        data.to_csv("{}/result.csv".format(args.model_dir))
示例#8
0
def main():
    # settings
    max_epoch = 1000
    num_updates_per_epoch = 500
    batchsize_u = 256
    batchsize_l = min(100, args.num_labeled_data)

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # training
    progress = Progress()
    for epoch in range(1, max_epoch + 1):
        progress.start_epoch(epoch, max_epoch)
        sum_loss = 0
        sum_entropy = 0
        sum_conditional_entropy = 0
        sum_rsat = 0

        for t in range(num_updates_per_epoch):
            x_u = dataset.sample_data(train_images_u, batchsize_u)
            p = imsat.classify(x_u, apply_softmax=True)
            hy = imsat.compute_marginal_entropy(p)
            hy_x = F.sum(imsat.compute_entropy(p)) / batchsize_u
            Rsat = -F.sum(imsat.compute_lds(x_u)) / batchsize_u

            # semi-supervised
            loss_semisupervised = 0
            if args.num_labeled_data > 0:
                x_l, t_l = dataset.sample_labeled_data(train_images_l,
                                                       train_labels_l,
                                                       batchsize_l)
                log_p = imsat.classify(x_l, apply_softmax=False)
                loss_semisupervised = F.softmax_cross_entropy(
                    log_p, imsat.to_variable(t_l))

            loss = Rsat - config.lam * (
                config.mu * hy - hy_x) + config.sigma * loss_semisupervised
            imsat.backprop(loss)

            sum_loss += float(loss.data)
            sum_entropy += float(hy.data)
            sum_conditional_entropy += float(hy_x.data)
            sum_rsat += float(Rsat.data)

            if t % 10 == 0:
                progress.show(t, num_updates_per_epoch, {})

        imsat.save(args.model_dir)

        counts_train, accuracy_train = compute_accuracy(
            train_images, train_labels)
        counts_test, accuracy_test = compute_accuracy(test_images, test_labels)
        progress.show(
            num_updates_per_epoch, num_updates_per_epoch, {
                "loss": sum_loss / num_updates_per_epoch,
                "hy": sum_entropy / num_updates_per_epoch,
                "hy_x": sum_conditional_entropy / num_updates_per_epoch,
                "Rsat": sum_rsat / num_updates_per_epoch,
                "acc_test": accuracy_test,
                "acc_train": accuracy_test,
            })
        print(counts_train)
        print(counts_test)
        plot(counts_train, "train")
        plot(counts_test, "test")
示例#9
0
文件: train.py 项目: svoss/ru-ccn
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    discriminator_config = gan.config_discriminator
    generator_config = gan.config_generator

    # settings
    # _l -> labeled
    # _u -> unlabeled
    # _g -> generated
    max_epoch = 1000
    num_trains_per_epoch = 500
    plot_interval = 5
    batchsize_l = 100
    batchsize_u = 100
    batchsize_g = batchsize_u

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # save validation accuracy per epoch
    csv_results = []

    # create semi-supervised split
    num_validation_data = 10000
    num_labeled_data = args.num_labeled
    if batchsize_l > num_labeled_data:
        batchsize_l = num_labeled_data

    training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(
        images,
        labels,
        num_validation_data,
        num_labeled_data,
        discriminator_config.ndim_output,
        seed=args.seed)
    print training_labels_l

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_supervised = 0
        sum_loss_unsupervised = 0
        sum_loss_adversarial = 0
        sum_dx_labeled = 0
        sum_dx_unlabeled = 0
        sum_dx_generated = 0

        gan.update_learning_rate(get_learning_rate_for_epoch(epoch))

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                training_images_l,
                training_labels_l,
                batchsize_l,
                discriminator_config.ndim_input,
                discriminator_config.ndim_output,
                binarize=False)
            images_u = dataset.sample_unlabeled_data(
                training_images_u,
                batchsize_u,
                discriminator_config.ndim_input,
                binarize=False)
            images_g = gan.generate_x(batchsize_g)
            images_g.unchain_backward()

            # supervised loss
            py_x_l, activations_l = gan.discriminate(images_l,
                                                     apply_softmax=False)
            loss_supervised = F.softmax_cross_entropy(
                py_x_l, gan.to_variable(label_ids_l))

            log_zx_l = F.logsumexp(py_x_l, axis=1)
            log_dx_l = log_zx_l - F.softplus(log_zx_l)
            dx_l = F.sum(F.exp(log_dx_l)) / batchsize_l

            # unsupervised loss
            # D(x) = Z(x) / {Z(x) + 1}, where Z(x) = \sum_{k=1}^K exp(l_k(x))
            # softplus(x) := log(1 + exp(x))
            # logD(x) = logZ(x) - log(Z(x) + 1)
            # 		  = logZ(x) - log(exp(log(Z(x))) + 1)
            # 		  = logZ(x) - softplus(logZ(x))
            # 1 - D(x) = 1 / {Z(x) + 1}
            # log{1 - D(x)} = log1 - log(Z(x) + 1)
            # 				= -log(exp(log(Z(x))) + 1)
            # 				= -softplus(logZ(x))
            py_x_u, _ = gan.discriminate(images_u, apply_softmax=False)
            log_zx_u = F.logsumexp(py_x_u, axis=1)
            log_dx_u = log_zx_u - F.softplus(log_zx_u)
            dx_u = F.sum(F.exp(log_dx_u)) / batchsize_u
            loss_unsupervised = -F.sum(
                log_dx_u) / batchsize_u  # minimize negative logD(x)
            py_x_g, _ = gan.discriminate(images_g, apply_softmax=False)
            log_zx_g = F.logsumexp(py_x_g, axis=1)
            loss_unsupervised += F.sum(F.softplus(
                log_zx_g)) / batchsize_u  # minimize negative log{1 - D(x)}

            # update discriminator
            gan.backprop_discriminator(loss_supervised + loss_unsupervised)

            # adversarial loss
            images_g = gan.generate_x(batchsize_g)
            py_x_g, activations_g = gan.discriminate(images_g,
                                                     apply_softmax=False)
            log_zx_g = F.logsumexp(py_x_g, axis=1)
            log_dx_g = log_zx_g - F.softplus(log_zx_g)
            dx_g = F.sum(F.exp(log_dx_g)) / batchsize_g
            loss_adversarial = -F.sum(
                log_dx_g) / batchsize_u  # minimize negative logD(x)

            # feature matching
            if discriminator_config.use_feature_matching:
                features_true = activations_l[-1]
                features_true.unchain_backward()
                if batchsize_l != batchsize_g:
                    images_g = gan.generate_x(batchsize_l)
                    _, activations_g = gan.discriminate(images_g,
                                                        apply_softmax=False)
                features_fake = activations_g[-1]
                loss_adversarial += F.mean_squared_error(
                    features_true, features_fake)

            # update generator
            gan.backprop_generator(loss_adversarial)

            sum_loss_supervised += float(loss_supervised.data)
            sum_loss_unsupervised += float(loss_unsupervised.data)
            sum_loss_adversarial += float(loss_adversarial.data)
            sum_dx_labeled += float(dx_l.data)
            sum_dx_unlabeled += float(dx_u.data)
            sum_dx_generated += float(dx_g.data)
            if t % 10 == 0:
                progress.show(t, num_trains_per_epoch, {})

        gan.save(args.model_dir)

        # validation
        images_l, _, label_ids_l = dataset.sample_labeled_data(
            validation_images,
            validation_labels,
            num_validation_data,
            discriminator_config.ndim_input,
            discriminator_config.ndim_output,
            binarize=False)
        images_l_segments = np.split(images_l, num_validation_data // 500)
        label_ids_l_segments = np.split(label_ids_l,
                                        num_validation_data // 500)
        sum_accuracy = 0
        for images_l, label_ids_l in zip(images_l_segments,
                                         label_ids_l_segments):
            y_distribution, _ = gan.discriminate(images_l,
                                                 apply_softmax=True,
                                                 test=True)
            accuracy = F.accuracy(y_distribution, gan.to_variable(label_ids_l))
            sum_accuracy += float(accuracy.data)
        validation_accuracy = sum_accuracy / len(images_l_segments)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "loss_l": sum_loss_supervised / num_trains_per_epoch,
                "loss_u": sum_loss_unsupervised / num_trains_per_epoch,
                "loss_g": sum_loss_adversarial / num_trains_per_epoch,
                "dx_l": sum_dx_labeled / num_trains_per_epoch,
                "dx_u": sum_dx_unlabeled / num_trains_per_epoch,
                "dx_g": sum_dx_generated / num_trains_per_epoch,
                "accuracy": validation_accuracy,
            })

        # write accuracy to csv
        csv_results.append(
            [epoch, validation_accuracy,
             progress.get_total_time()])
        data = pd.DataFrame(csv_results)
        data.columns = ["epoch", "accuracy", "min"]
        data.to_csv("{}/result.csv".format(args.model_dir))

        if epoch % plot_interval == 0 or epoch == 1:
            plot(filename="epoch_{}_time_{}min".format(
                epoch, progress.get_total_time()))
示例#10
0
文件: train.py 项目: LynnHongLiu/adgm
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    config = adgm.config

    # settings
    max_epoch = 1000
    num_trains_per_epoch = 500
    batchsize_l = 100
    batchsize_u = 100
    alpha = 1

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # save validation accuracy per epoch
    csv_results = []

    # create semi-supervised split
    num_validation_data = 10000
    num_labeled_data = 100
    num_types_of_label = 10
    training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(
        images,
        labels,
        num_validation_data,
        num_labeled_data,
        num_types_of_label,
        seed=args.seed)
    print training_labels_l

    # init weightnorm layers
    if config.use_weightnorm:
        print "initializing weight normalization layers ..."
        images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data(
            training_images_l, training_labels_l, batchsize_l, config.ndim_x,
            config.ndim_y)
        images_u = dataset.sample_unlabeled_data(training_images_u,
                                                 batchsize_u, config.ndim_x)
        adgm.compute_lower_bound(images_l, label_onehot_l, images_u)

    # training
    temperature = 1
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_lower_bound_l = 0
        sum_lower_bound_u = 0
        sum_loss_classifier = 0

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                training_images_l, training_labels_l, batchsize_l,
                config.ndim_x, config.ndim_y)
            images_u = dataset.sample_unlabeled_data(training_images_u,
                                                     batchsize_u,
                                                     config.ndim_x)

            # lower bound loss using gumbel-softmax
            lower_bound, lb_labeled, lb_unlabeled = adgm.compute_lower_bound_gumbel(
                images_l, label_onehot_l, images_u, temperature)
            loss_lower_bound = -lower_bound

            # classification loss
            a_l = adgm.encode_x_a(images_l, False)
            unnormalized_y_distribution = adgm.encode_ax_y_distribution(
                a_l, images_l, softmax=False)
            loss_classifier = alpha * F.softmax_cross_entropy(
                unnormalized_y_distribution, adgm.to_variable(label_ids_l))

            # backprop
            adgm.backprop(loss_classifier + loss_lower_bound)

            sum_lower_bound_l += float(lb_labeled.data)
            sum_lower_bound_u += float(lb_unlabeled.data)
            sum_loss_classifier += float(loss_classifier.data)
            progress.show(t, num_trains_per_epoch, {})

        adgm.save(args.model_dir)

        # validation
        images_l, _, label_ids_l = dataset.sample_labeled_data(
            validation_images, validation_labels, num_validation_data,
            config.ndim_x, config.ndim_y)
        images_l_segments = np.split(images_l, num_validation_data // 500)
        label_ids_l_segments = np.split(label_ids_l,
                                        num_validation_data // 500)
        sum_accuracy = 0
        for images_l, label_ids_l in zip(images_l_segments,
                                         label_ids_l_segments):
            y_distribution = adgm.encode_x_y_distribution(images_l,
                                                          softmax=True,
                                                          test=True)
            accuracy = F.accuracy(y_distribution,
                                  adgm.to_variable(label_ids_l))
            sum_accuracy += float(accuracy.data)
        validation_accuracy = sum_accuracy / len(images_l_segments)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "lb_u": sum_lower_bound_u / num_trains_per_epoch,
                "lb_l": sum_lower_bound_l / num_trains_per_epoch,
                "loss_spv": sum_loss_classifier / num_trains_per_epoch,
                "accuracy": validation_accuracy,
                "tmp": temperature,
            })

        # anneal the temperature
        temperature = max(0.5, temperature * 0.999)

        # write accuracy to csv
        csv_results.append([epoch, validation_accuracy])
        data = pd.DataFrame(csv_results)
        data.columns = ["epoch", "accuracy"]
        data.to_csv("{}/result.csv".format(args.model_dir))
示例#11
0
def main(run_load_from_file=False):
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    opt = Operation()
    config = Config()
    opt.check_dir(config.ckpt_dir, is_restart=False)
    opt.check_dir(config.log_dir, is_restart=True)

    # setting
    max_epoch = 510
    num_trains_per_epoch = 500
    batch_size_l = 100
    batch_size_u = 100

    # create semi-supervised split
    num_validation_data = 10000
    num_labeled_data = 100
    num_types_of_label = 10
    training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(
        images, labels, num_validation_data, num_labeled_data,
        num_types_of_label)

    # training
    with tf.device(config.device):
        h = build_graph()

    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 log_device_placement=True)
    sess_config.gpu_options.allow_growth = True
    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9
    saver = tf.train.Saver(max_to_keep=2)

    with tf.Session(config=sess_config) as sess:
        '''
         Load from checkpoint or start a new session

        '''
        if run_load_from_file:
            saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir))
            training_epoch_loss, _ = pickle.load(
                open(config.ckpt_dir + '/pickle.pkl', 'rb'))
        else:
            sess.run(tf.global_variables_initializer())
            training_epoch_loss = []

        # Recording loss per epoch
        process = Process()
        for epoch in range(max_epoch):
            process.start_epoch(epoch, max_epoch)
            '''
            Learning rate generator

            '''
            learning_rate = opt.ladder_learning_rate(epoch +
                                                     len(training_epoch_loss))

            # Recording loss per iteration
            training_loss_set = []
            sum_loss_reconstruction = 0
            sum_loss_supervised = 0
            sum_loss_discrminator = 0
            sum_loss_generator = 0

            process_iteration = Process()
            for i in range(num_trains_per_epoch):
                process_iteration.start_epoch(i, num_trains_per_epoch)

                # sample from data distribution
                images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data(
                    training_images_l, training_labels_l, batch_size_l)
                images_u = dataset.sample_unlabeled_data(
                    training_images_u, batch_size_u)

                # reconstruction_phase
                _, loss_reconstruction = sess.run([h.opt_r, h.loss_r],
                                                  feed_dict={
                                                      h.x: images_u,
                                                      h.lr: learning_rate
                                                  })

                z_true_u = sampler.gaussian(batch_size_u,
                                            config.ndim_z,
                                            mean=0,
                                            var=1)
                y_true_u = sampler.onehot_categorical(batch_size_u,
                                                      config.ndim_y)
                # adversarial phase for discriminator
                _, loss_discriminator_y = sess.run([h.opt_dy, h.loss_dy],
                                                   feed_dict={
                                                       h.x: images_u,
                                                       h.y: y_true_u,
                                                       h.lr: learning_rate
                                                   })

                _, loss_discriminator_z = sess.run([h.opt_dz, h.loss_dz],
                                                   feed_dict={
                                                       h.x: images_u,
                                                       h.z: z_true_u,
                                                       h.lr: learning_rate
                                                   })

                loss_discriminator = loss_discriminator_y + loss_discriminator_z

                # adversarial phase for generator
                _, loss_generator_y, loss_generator_z = sess.run(
                    [h.opt_e, h.loss_gy, h.loss_gz],
                    feed_dict={
                        h.x: images_u,
                        h.lr: learning_rate
                    })

                loss_generator = loss_generator_y + loss_generator_z

                # supervised phase
                _, loss_generator_supervised = sess.run([h.opt_ey, h.loss_ey],
                                                        feed_dict={
                                                            h.x: images_l,
                                                            h.y_s: label_id_l,
                                                            h.lr: learning_rate
                                                        })

                training_loss_set.append([
                    loss_reconstruction,
                    loss_discriminator,
                    loss_discriminator_y,
                    loss_discriminator_z,
                    loss_generator,
                    loss_generator_z,
                    loss_generator_y,
                    loss_generator_supervised,
                ])

                sum_loss_reconstruction += loss_reconstruction
                sum_loss_discrminator += loss_discriminator
                sum_loss_generator += loss_generator
                sum_loss_supervised += loss_generator_supervised

                if i % 1000 == 0:
                    process_iteration.show_table_2d(
                        i, num_trains_per_epoch, {
                            'reconstruction': sum_loss_reconstruction /
                            (i + 1),
                            'discriminator': sum_loss_discrminator / (i + 1),
                            'generator': sum_loss_generator / (i + 1),
                            'supervise': sum_loss_supervised / (i + 1),
                        })
            # In end of epoch, summary the loss
            average_training_loss_per_epoch = np.mean(
                np.array(training_loss_set), axis=0)

            # validation phase
            images_v_segments = np.split(validation_images,
                                         num_validation_data // 1000)
            labels_v_segments = np.split(validation_labels,
                                         num_validation_data // 1000)
            sum_accuracy = 0
            for images_v, labels_v in zip(images_v_segments,
                                          labels_v_segments):
                y_v = sess.run(h.y_r, feed_dict={h.x: images_v})
                accuracy = opt.compute_accuracy(y_v, labels_v)
                sum_accuracy += accuracy
            validation_accuracy = sum_accuracy / len(images_v_segments)

            # append validation accuracy to the training loss
            average_loss_per_epoch = np.append(average_training_loss_per_epoch,
                                               validation_accuracy)
            training_epoch_loss.append(average_loss_per_epoch)
            loss_name_per_epoch = [
                'reconstruction', 'discriminator', 'discriminator_y',
                'discriminator_z', 'generator', 'generator_z', 'generator_y',
                'supervised', 'validation_accuracy'
            ]

            if epoch % 1 == 0:
                process.show_bar(
                    epoch, max_epoch, {
                        'loss_r': average_loss_per_epoch[0],
                        'loss_d': average_loss_per_epoch[1],
                        'loss_g': average_loss_per_epoch[4],
                        'loss_v': average_loss_per_epoch[8],
                    })

                plt.tile_images(sess.run(h.x_, feed_dict={h.x: images_u}),
                                dir=config.log_dir,
                                filename='x_rec_epoch_{}'.format(
                                    str(epoch).zfill(3)))

            if epoch % 10 == 0:
                saver.save(sess,
                           os.path.join(config.ckpt_dir, 'model_ckptpoint'),
                           global_step=epoch)
                pickle.dump((training_epoch_loss, loss_name_per_epoch),
                            open(config.ckpt_dir + '/pickle.pkl', 'wb'))
                plt.plot_double_scale_trend(config.ckpt_dir)
示例#12
0
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    config = aae.config

    # settings
    # _l -> labeled
    # _u -> unlabeled
    max_epoch = 1000
    num_trains_per_epoch = 5000
    batchsize_l = 100
    batchsize_u = 100
    alpha = 1

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # create semi-supervised split
    num_labeled_data = 10000
    num_types_of_label = 11  # additional label corresponds to unlabeled data
    training_images_l, training_labels_l, training_images_u, _, _ = dataset.create_semisupervised(
        images, labels, 0, num_labeled_data, num_types_of_label)

    # classification
    # 0 -> true sample
    # 1 -> generated sample
    class_true = aae.to_variable(np.zeros(batchsize_u, dtype=np.int32))
    class_fake = aae.to_variable(np.ones(batchsize_u, dtype=np.int32))

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_reconstruction = 0
        sum_loss_supervised = 0
        sum_loss_discriminator = 0
        sum_loss_generator = 0

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                training_images_l,
                training_labels_l,
                batchsize_l,
                ndim_y=num_types_of_label)
            images_u = dataset.sample_unlabeled_data(training_images_u,
                                                     batchsize_u)

            # reconstruction phase
            z_u = aae.encode_x_z(images_u)
            reconstruction_u = aae.decode_z_x(z_u)
            loss_reconstruction = F.mean_squared_error(
                aae.to_variable(images_u), reconstruction_u)
            aae.backprop_generator(loss_reconstruction)
            aae.backprop_decoder(loss_reconstruction)

            # adversarial phase
            z_fake_u = aae.encode_x_z(images_u)
            z_fake_l = aae.encode_x_z(images_l)
            onehot = np.zeros((1, num_types_of_label), dtype=np.float32)
            onehot[0, -1] = 1  # turn on the extra class
            label_onehot_u = np.repeat(onehot, batchsize_u, axis=0)
            z_true_l = sampler.supervised_gaussian_mixture(
                batchsize_l, config.ndim_z, label_ids_l,
                num_types_of_label - 1)
            z_true_u = sampler.gaussian_mixture(batchsize_u, config.ndim_z,
                                                num_types_of_label - 1)
            dz_true_l = aae.discriminate_z(label_onehot_l,
                                           z_true_l,
                                           apply_softmax=False)
            dz_true_u = aae.discriminate_z(label_onehot_u,
                                           z_true_u,
                                           apply_softmax=False)
            dz_fake_l = aae.discriminate_z(label_onehot_l,
                                           z_fake_l,
                                           apply_softmax=False)
            dz_fake_u = aae.discriminate_z(label_onehot_u,
                                           z_fake_u,
                                           apply_softmax=False)
            loss_discriminator = F.softmax_cross_entropy(
                dz_true_l, class_true) + F.softmax_cross_entropy(
                    dz_true_u, class_true) + F.softmax_cross_entropy(
                        dz_fake_l, class_fake) + F.softmax_cross_entropy(
                            dz_fake_u, class_fake)
            aae.backprop_discriminator(loss_discriminator)

            # adversarial phase
            z_fake_u = aae.encode_x_z(images_u)
            z_fake_l = aae.encode_x_z(images_l)
            dz_fake_l = aae.discriminate_z(label_onehot_l,
                                           z_fake_l,
                                           apply_softmax=False)
            dz_fake_u = aae.discriminate_z(label_onehot_u,
                                           z_fake_u,
                                           apply_softmax=False)
            loss_generator = F.softmax_cross_entropy(
                dz_fake_l, class_true) + F.softmax_cross_entropy(
                    dz_fake_u, class_true)
            aae.backprop_generator(loss_generator)

            sum_loss_reconstruction += float(loss_reconstruction.data)
            sum_loss_discriminator += float(loss_discriminator.data)
            sum_loss_generator += float(loss_generator.data)

            if t % 10 == 0:
                progress.show(t, num_trains_per_epoch, {})

        aae.save(args.model_dir)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "loss_r": sum_loss_reconstruction / num_trains_per_epoch,
                "loss_d": sum_loss_discriminator / num_trains_per_epoch,
                "loss_g": sum_loss_generator / num_trains_per_epoch,
            })
示例#13
0
def main():
	images, labels = dataset.load_test_images()
	num_scatter = len(images)
	x, _, label_ids = dataset.sample_labeled_data(images, labels, num_scatter)
	z = aae.to_numpy(aae.encode_x_z(x, test=True))
	plot.scatter_labeled_z(z, label_ids, dir=args.plot_dir)
示例#14
0
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    config = aae.config

    # settings
    # _l -> labeled
    # _u -> unlabeled
    max_epoch = 1000
    num_trains_per_epoch = 5000
    batchsize_l = 100
    batchsize_u = 100
    alpha = 1

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # save validation accuracy per epoch
    csv_results = []

    # create semi-supervised split
    num_validation_data = 10000
    num_labeled_data = 100
    num_types_of_label = 10
    training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(
        images, labels, num_validation_data, num_labeled_data,
        num_types_of_label)

    # classification
    # 0 -> true sample
    # 1 -> generated sample
    class_true = aae.to_variable(np.zeros(batchsize_u, dtype=np.int32))
    class_fake = aae.to_variable(np.ones(batchsize_u, dtype=np.int32))

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_reconstruction = 0
        sum_loss_supervised = 0
        sum_loss_discriminator = 0
        sum_loss_generator = 0
        sum_loss_cluster_head = 0

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                training_images_l, training_labels_l, batchsize_l)
            images_u = dataset.sample_unlabeled_data(training_images_u,
                                                     batchsize_u)

            # reconstruction phase
            qy_x_u, z_u = aae.encode_x_yz(images_u, apply_softmax=True)
            representation = aae.encode_yz_representation(qy_x_u, z_u)
            reconstruction_u = aae.decode_representation_x(representation)
            loss_reconstruction = F.mean_squared_error(
                aae.to_variable(images_u), reconstruction_u)
            aae.backprop_generator(loss_reconstruction)
            aae.backprop_decoder(loss_reconstruction)

            # adversarial phase
            y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True)
            z_true_u = sampler.gaussian(batchsize_u,
                                        config.ndim_z,
                                        mean=0,
                                        var=1)
            y_true_u = sampler.onehot_categorical(batchsize_u, config.ndim_y)
            dz_true = aae.discriminate_z(z_true_u, apply_softmax=False)
            dy_true = aae.discriminate_y(y_true_u, apply_softmax=False)
            dz_fake = aae.discriminate_z(z_fake_u, apply_softmax=False)
            dy_fake = aae.discriminate_y(y_fake_u, apply_softmax=False)
            loss_discriminator_z = F.softmax_cross_entropy(
                dz_true, class_true) + F.softmax_cross_entropy(
                    dz_fake, class_fake)
            loss_discriminator_y = F.softmax_cross_entropy(
                dy_true, class_true) + F.softmax_cross_entropy(
                    dy_fake, class_fake)
            loss_discriminator = loss_discriminator_z + loss_discriminator_y
            aae.backprop_discriminator(loss_discriminator)

            # adversarial phase
            y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True)
            dz_fake = aae.discriminate_z(z_fake_u, apply_softmax=False)
            dy_fake = aae.discriminate_y(y_fake_u, apply_softmax=False)
            loss_generator_z = F.softmax_cross_entropy(dz_fake, class_true)
            loss_generator_y = F.softmax_cross_entropy(dy_fake, class_true)
            loss_generator = loss_generator_z + loss_generator_y
            aae.backprop_generator(loss_generator)

            # supervised phase
            log_qy_x_l, z_l = aae.encode_x_yz(images_l, apply_softmax=False)
            loss_supervised = F.softmax_cross_entropy(
                log_qy_x_l, aae.to_variable(label_ids_l))
            aae.backprop_generator(loss_supervised)

            # additional cost function that penalizes the euclidean distance between of every two of cluster
            distance = aae.compute_distance_of_cluster_heads()
            loss_cluster_head = -F.sum(distance)
            aae.backprop_cluster_head(loss_cluster_head)

            sum_loss_reconstruction += float(loss_reconstruction.data)
            sum_loss_supervised += float(loss_supervised.data)
            sum_loss_discriminator += float(loss_discriminator.data)
            sum_loss_generator += float(loss_generator.data)
            sum_loss_cluster_head += float(
                aae.nCr(config.ndim_y, 2) *
                config.cluster_head_distance_threshold +
                loss_cluster_head.data)

            if t % 10 == 0:
                progress.show(t, num_trains_per_epoch, {})

        aae.save(args.model_dir)

        # validation phase
        images_v_segments = np.split(validation_images,
                                     num_validation_data // 1000)
        labels_v_segments = np.split(validation_labels,
                                     num_validation_data // 1000)
        sum_accuracy = 0
        for images_v, labels_v in zip(images_v_segments, labels_v_segments):
            qy = aae.encode_x_yz(images_v, apply_softmax=True, test=True)[0]
            accuracy = F.accuracy(qy, aae.to_variable(labels_v))
            sum_accuracy += float(accuracy.data)
        validation_accuracy = sum_accuracy / len(images_v_segments)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "loss_r": sum_loss_reconstruction / num_trains_per_epoch,
                "loss_s": sum_loss_supervised / num_trains_per_epoch,
                "loss_d": sum_loss_discriminator / num_trains_per_epoch,
                "loss_g": sum_loss_generator / num_trains_per_epoch,
                "loss_c": sum_loss_cluster_head / num_trains_per_epoch,
                "accuracy": validation_accuracy
            })

        # write accuracy to csv
        csv_results.append([epoch, validation_accuracy])
        data = pd.DataFrame(csv_results)
        data.columns = ["epoch", "accuracy"]
        data.to_csv("{}/result.csv".format(args.model_dir))
示例#15
0
文件: train.py 项目: musyoku/adgm
def main():
	# load MNIST images
	images, labels = dataset.load_train_images()

	# config
	config = adgm.config

	# settings
	max_epoch = 1000
	num_trains_per_epoch = 500
	batchsize_l = 100
	batchsize_u = 100
	alpha = 1

	# seed
	np.random.seed(args.seed)
	if args.gpu_device != -1:
		cuda.cupy.random.seed(args.seed)

	# save validation accuracy per epoch
	csv_results = []

	# create semi-supervised split
	num_validation_data = 10000
	num_labeled_data = 100
	num_types_of_label = 10
	training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(images, labels, num_validation_data, num_labeled_data, num_types_of_label, seed=args.seed)
	print training_labels_l

	# init weightnorm layers
	if config.use_weightnorm:
		print "initializing weight normalization layers ..."
		images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data(training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y)
		images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x)
		adgm.compute_lower_bound(images_l, label_onehot_l, images_u)

	# training
	temperature = 1
	progress = Progress()
	for epoch in xrange(1, max_epoch):
		progress.start_epoch(epoch, max_epoch)
		sum_lower_bound_l = 0
		sum_lower_bound_u = 0
		sum_loss_classifier = 0

		for t in xrange(num_trains_per_epoch):
			# sample from data distribution
			images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y)
			images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x)

			# lower bound loss using gumbel-softmax
			lower_bound, lb_labeled, lb_unlabeled = adgm.compute_lower_bound_gumbel(images_l, label_onehot_l, images_u, temperature)
			loss_lower_bound = -lower_bound

			# classification loss
			a_l = adgm.encode_x_a(images_l, False)
			unnormalized_y_distribution = adgm.encode_ax_y_distribution(a_l, images_l, softmax=False)
			loss_classifier = alpha * F.softmax_cross_entropy(unnormalized_y_distribution, adgm.to_variable(label_ids_l))

			# backprop
			adgm.backprop(loss_classifier + loss_lower_bound)

			sum_lower_bound_l += float(lb_labeled.data)
			sum_lower_bound_u += float(lb_unlabeled.data)
			sum_loss_classifier += float(loss_classifier.data)
			progress.show(t, num_trains_per_epoch, {})

		adgm.save(args.model_dir)

		# validation
		images_l, _, label_ids_l = dataset.sample_labeled_data(validation_images, validation_labels, num_validation_data, config.ndim_x, config.ndim_y)
		images_l_segments = np.split(images_l, num_validation_data // 500)
		label_ids_l_segments = np.split(label_ids_l, num_validation_data // 500)
		sum_accuracy = 0
		for images_l, label_ids_l in zip(images_l_segments, label_ids_l_segments):
			y_distribution = adgm.encode_x_y_distribution(images_l, softmax=True, test=True)
			accuracy = F.accuracy(y_distribution, adgm.to_variable(label_ids_l))
			sum_accuracy += float(accuracy.data)
		validation_accuracy = sum_accuracy / len(images_l_segments)
		
		progress.show(num_trains_per_epoch, num_trains_per_epoch, {
			"lb_u": sum_lower_bound_u / num_trains_per_epoch,
			"lb_l": sum_lower_bound_l / num_trains_per_epoch,
			"loss_spv": sum_loss_classifier / num_trains_per_epoch,
			"accuracy": validation_accuracy,
			"tmp": temperature,
		})

		# anneal the temperature
		temperature = max(0.5, temperature * 0.999)

		# write accuracy to csv
		csv_results.append([epoch, validation_accuracy])
		data = pd.DataFrame(csv_results)
		data.columns = ["epoch", "accuracy"]
		data.to_csv("{}/result.csv".format(args.model_dir))
示例#16
0
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    config = vat.config

    # settings
    max_epoch = 1000
    num_trains_per_epoch = 500
    batchsize_l = 100
    batchsize_u = 200

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # save validation accuracy per epoch
    csv_results = []

    # create semi-supervised split
    num_validation_data = 10000
    num_labeled_data = 100
    training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(
        images,
        labels,
        num_validation_data,
        num_labeled_data,
        config.ndim_y,
        seed=args.seed)
    print training_labels_l

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_supervised = 0
        sum_loss_lds = 0

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                training_images_l,
                training_labels_l,
                batchsize_l,
                config.ndim_x,
                config.ndim_y,
                binarize=False)
            images_u = dataset.sample_unlabeled_data(training_images_u,
                                                     batchsize_u,
                                                     config.ndim_x,
                                                     binarize=False)

            # supervised loss
            unnormalized_y_distribution = vat.encode_x_y(images_l,
                                                         apply_softmax=False)
            loss_supervised = F.softmax_cross_entropy(
                unnormalized_y_distribution, vat.to_variable(label_ids_l))

            # virtual adversarial training
            lds_l = -F.sum(vat.compute_lds(images_l)) / batchsize_l
            lds_u = -F.sum(vat.compute_lds(images_u)) / batchsize_u
            loss_lsd = lds_l + lds_u

            # backprop
            vat.backprop(loss_supervised + config.lambda_ * loss_lsd)

            sum_loss_supervised += float(loss_supervised.data)
            sum_loss_lds += float(loss_lsd.data)
            if t % 10 == 0:
                progress.show(t, num_trains_per_epoch, {})

        vat.save(args.model_dir)

        # validation
        images_l, _, label_ids_l = dataset.sample_labeled_data(
            validation_images,
            validation_labels,
            num_validation_data,
            config.ndim_x,
            config.ndim_y,
            binarize=False)
        images_l_segments = np.split(images_l, num_validation_data // 500)
        label_ids_l_segments = np.split(label_ids_l,
                                        num_validation_data // 500)
        sum_accuracy = 0
        for images_l, label_ids_l in zip(images_l_segments,
                                         label_ids_l_segments):
            y_distribution = vat.encode_x_y(images_l,
                                            apply_softmax=True,
                                            test=True)
            accuracy = F.accuracy(y_distribution, vat.to_variable(label_ids_l))
            sum_accuracy += float(accuracy.data)
        validation_accuracy = sum_accuracy / len(images_l_segments)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "loss_spv": sum_loss_supervised / num_trains_per_epoch,
                "loss_lds": sum_loss_lds / num_trains_per_epoch,
                "accuracy": validation_accuracy,
            })

        # write accuracy to csv
        csv_results.append(
            [epoch, validation_accuracy,
             progress.get_total_time()])
        data = pd.DataFrame(csv_results)
        data.columns = ["epoch", "accuracy", "min"]
        data.to_csv("{}/result.csv".format(args.model_dir))
示例#17
0
def main():
    # load MNIST images
    train_images, train_labels = dataset.load_train_images()

    # config
    config = aae.config

    # settings
    # _l -> labeled
    # _u -> unlabeled
    max_epoch = 1000
    num_trains_per_epoch = 5000
    batchsize = 100
    alpha = 1

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # classification
    # 0 -> true sample
    # 1 -> generated sample
    class_true = aae.to_variable(np.zeros(batchsize, dtype=np.int32))
    class_fake = aae.to_variable(np.ones(batchsize, dtype=np.int32))

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_loss_reconstruction = 0
        sum_loss_discriminator = 0
        sum_loss_generator = 0

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                train_images, train_labels, batchsize)

            # reconstruction phase
            z_l = aae.encode_x_z(images_l)
            reconstruction_l = aae.decode_yz_x(label_onehot_l, z_l)
            loss_reconstruction = F.mean_squared_error(
                aae.to_variable(images_l), reconstruction_l)
            aae.backprop_generator(loss_reconstruction)
            aae.backprop_decoder(loss_reconstruction)

            # adversarial phase
            images_l = dataset.sample_labeled_data(train_images, train_labels,
                                                   batchsize)[0]
            z_fake_l = aae.encode_x_z(images_l)
            z_true_l = sampler.gaussian(batchsize,
                                        config.ndim_z,
                                        mean=0,
                                        var=1)
            dz_true = aae.discriminate_z(z_true_l, apply_softmax=False)
            dz_fake = aae.discriminate_z(z_fake_l, apply_softmax=False)
            loss_discriminator = F.softmax_cross_entropy(
                dz_true, class_true) + F.softmax_cross_entropy(
                    dz_fake, class_fake)
            aae.backprop_discriminator(loss_discriminator)

            # adversarial phase
            images_l = dataset.sample_labeled_data(train_images, train_labels,
                                                   batchsize)[0]
            z_fake_l = aae.encode_x_z(images_l)
            dz_fake = aae.discriminate_z(z_fake_l, apply_softmax=False)
            loss_generator = F.softmax_cross_entropy(dz_fake, class_true)
            aae.backprop_generator(loss_generator)

            sum_loss_reconstruction += float(loss_reconstruction.data)
            sum_loss_discriminator += float(loss_discriminator.data)
            sum_loss_generator += float(loss_generator.data)

            if t % 10 == 0:
                progress.show(t, num_trains_per_epoch, {})

        aae.save(args.model_dir)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "loss_r": sum_loss_reconstruction / num_trains_per_epoch,
                "loss_d": sum_loss_discriminator / num_trains_per_epoch,
                "loss_g": sum_loss_generator / num_trains_per_epoch,
            })
示例#18
0
def main(run_load_from_file=False):
    # config
    opt = Operation()
    opt.check_dir(config.ckpt_dir, is_restart=False)
    opt.check_dir(config.log_dir, is_restart=True)

    max_epoch = 510
    num_trains_per_epoch = 500
    batch_size_l = 100
    batch_size_u = 100

    # create semi-supervised split
    # Load minist images
    images, labels = dataset.load_train_images()
    num_labeled_data = 10000
    num_types_of_label = 11  # additional label corresponds to unlabeled data
    training_images_l, training_labels_l, training_images_u, _, _ = dataset.create_semisupervised(images, labels, 0, num_labeled_data, num_types_of_label)

    # training
    with tf.device(config.device):
        h = build_graph()

    sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)
    sess_config.gpu_options.allow_growth = True
    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9
    saver = tf.train.Saver(max_to_keep=2)

    with tf.Session(config=sess_config) as sess:
        '''
         Load from checkpoint or start a new session

        '''
        if run_load_from_file:
            saver.restore(sess, tf.train.latest_checkpoint(config.ckpt_dir))
            training_epoch_loss, _ = pickle.load(open(config.ckpt_dir + '/pickle.pkl', 'rb'))
        else:
            sess.run(tf.global_variables_initializer())
            training_epoch_loss = []

        # Recording loss per epoch
        process = Process()
        for epoch in range(max_epoch):
            process.start_epoch(epoch, max_epoch)

            '''
            Learning rate generator

            '''
            learning_rate = opt.ladder_learning_rate(epoch + len(training_epoch_loss))

            # Recording loss per iteration
            sum_loss_reconstruction = 0
            sum_loss_discrminator = 0
            sum_loss_generator = 0
            process_iteration = Process()
            for i in range(num_trains_per_epoch):
                process_iteration.start_epoch(i, num_trains_per_epoch)
                # Inputs
                '''
                _l -> labeled
                _u -> unlabeled

                '''
                images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data(training_images_l, training_labels_l, batch_size_l, ndim_y=num_types_of_label)
                images_u = dataset.sample_unlabeled_data(training_images_u, batch_size_u)
                onehot = np.zeros((1, num_types_of_label), dtype=np.float32)
                onehot[-1] = 1
                label_onehot_u = np.repeat(onehot, batch_size_u, axis=0)
                z_true_l = sampler.supervised_swiss_roll(batch_size_l, config.ndim_z, label_id_l, num_types_of_label - 1)
                z_true_u = sampler.swiss_roll(batch_size_u, config.ndim_z, num_types_of_label - 1)
                # z_true_l = sampler.supervised_gaussian_mixture(batch_size_l, config.ndim_z, label_id_l, num_types_of_label - 1)
                # z_true_u = sampler.gaussian_mixture(batch_size_u, config.ndim_z, num_types_of_label - 1)

                # reconstruction_phase
                _, loss_reconstruction = sess.run([h.opt_r, h.loss_r], feed_dict={
                    h.x: images_u,
                    h.lr: learning_rate
                })

                # adversarial phase for discriminator
                _, loss_discriminator_l = sess.run([h.opt_d, h.loss_d], feed_dict={
                    h.x: images_l,
                    h.label: label_onehot_l,
                    h.z: z_true_l,
                    h.lr: learning_rate
                })

                _, loss_discriminator_u = sess.run([h.opt_d, h.loss_d], feed_dict={
                    h.x: images_u,
                    h.label: label_onehot_u,
                    h.z: z_true_u,
                    h.lr: learning_rate
                })

                loss_discriminator = loss_discriminator_l + loss_discriminator_u

                # adversarial phase for generator
                _, loss_generator_l= sess.run([h.opt_e, h.loss_e,], feed_dict={
                    h.x: images_l,
                    h.label: label_onehot_l,
                    h.lr: learning_rate
                })

                _, loss_generator_u = sess.run([h.opt_e, h.loss_e], feed_dict={
                    h.x: images_u,
                    h.label: label_onehot_u,
                    h.lr: learning_rate
                })
                loss_generator = loss_generator_l + loss_generator_u

                sum_loss_reconstruction += loss_reconstruction / batch_size_u
                sum_loss_discrminator += loss_discriminator
                sum_loss_generator += loss_generator

                if i % 1000 == 0:
                    process_iteration.show_table_2d(i, num_trains_per_epoch, {
                        'reconstruction': sum_loss_reconstruction / (i + 1),
                        'discriminator': sum_loss_discrminator / (i + 1),
                        'generator': sum_loss_generator / (i + 1),
                    })

            average_loss_per_epoch = [
                sum_loss_reconstruction / num_trains_per_epoch,
                sum_loss_discrminator / num_trains_per_epoch,
                sum_loss_generator / num_trains_per_epoch,
            ]
            training_epoch_loss.append(average_loss_per_epoch)
            training_loss_name = [
                'reconstruction',
                'discriminator',
                'generator'
            ]

            if epoch % 1 == 0:
                process.show_bar(epoch, max_epoch, {
                    'loss_r': average_loss_per_epoch[0],
                    'loss_d': average_loss_per_epoch[1],
                    'loss_g': average_loss_per_epoch[2]
                })

                plt.tile_images(sess.run(h.x_, feed_dict={h.x: images_u}),
                                dir=config.log_dir,
                                filename='x_rec_epoch_{}'.format(str(epoch).zfill(3)))

                plt.scatter_labeled_z(sess.run(h.z_r, feed_dict={h.x: images[:1000]}), [int(var) for var in labels[:1000]],
                                      dir=config.log_dir,
                                      filename='z_representation-{}'.format(epoch))

            if epoch % 10 == 0:
                saver.save(sess, os.path.join(config.ckpt_dir, 'model_ckptpoint'), global_step=epoch)
                pickle.dump((training_epoch_loss, training_loss_name), open(config.ckpt_dir + '/pickle.pkl', 'wb'))
                plt.plot_double_scale_trend(config.ckpt_dir)