예제 #1
0
    def encode(self):
        # load dataset
        images, attrs = load_pickle(self.data_path, split='test')

        # build model
        model = self.model
        model.build_model()

        with tf.Session(config=self.config) as sess:
            # load trained parameters
            print('loading testing model..')
            saver = tf.train.Saver()
            self.load_latest(saver, sess)

            print('start encoding..!')
            batch_size = self.batch_size if self.batch_size <= 32 else 32
            sample_iter = 30

            tsne_encoded = np.empty((batch_size * sample_iter, 64))
            tsne_attrs = attrs[:sample_iter * batch_size]
            for i in range(sample_iter):
                print("\r{}/{}".format(i + 1, sample_iter), end='')
                batch_images = images[i * batch_size:(i + 1) * batch_size]

                feed_dict = {model.images: batch_images}
                batch_encoded = sess.run(model.mean, feed_dict)
                tsne_encoded[i * batch_size:(i + 1) *
                             batch_size, :] = batch_encoded
            plot_tsne(tsne_encoded, tsne_attrs)
예제 #2
0
def tsne_on_ids():

    mlp_dump_file = "exports/mlp_ids.joblib"
    p = Path(mlp_dump_file)

    X, Y, names = load_ids_csv(normalize=True)

    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=1201)

    if p.is_file():
        clf = joblib.load(mlp_dump_file)
    else:
        clf = MLPClassifier(solver='adam',
                            alpha=1e-2,
                            hidden_layer_sizes=(20, 5),
                            random_state=1)
        clf.fit(X_train, Y_train)
        joblib.dump(clf, mlp_dump_file)

    choose = np.random.randint(X.shape[0], size=50)
    data_subset = X[choose, :]
    label_subset = Y[choose]
    explainer = DefaultExplainer(clf, X, None)
    counterfacts = explainer.get_counterfactuals(
        data_subset[label_subset == 0])

    distances = abs(counterfacts - data_subset[label_subset == 0])
    print(np.average(distances, axis=0))

    utils.plot_tsne(data_subset, label_subset, counterfacts)

    return
예제 #3
0
파일: train.py 프로젝트: redhat12345/ADDA-1
def step3(source,
          target,
          batch_size=64,
          logdir="./Log/ADDA/advermodel/best/MNIST2USPS/NOBN",
          classes_num=10,
          strn=None,
          sten=None,
          ttrn=None,
          tten=None):
    # prepare data
    data_func = dataset.get_dataset(source, target)
    print(data_func)

    s_x_tr, s_y_tr, s_x_te, s_y_te, s_tr_size, s_te_size, s_init = data_func[
        0](batch_size, strn, sten)
    t_x_tr, t_y_tr, t_x_te, t_y_te, t_tr_size, t_te_size, t_init = data_func[
        1](batch_size, ttrn, tten)
    print(
        "dataset information:\n source: %s train_size: %d, test_size: %d \n target: %s train_size: %d, test_size: %d"
        % (source, s_tr_size, s_te_size, target, t_tr_size, t_te_size))

    # create graph
    nn = adda.ADDA(classes_num)
    # for source domain
    feat_s = nn.s_encoder(s_x_te, reuse=False, trainable=False)
    logits_s = nn.classifier(feat_s, reuse=False, trainable=False)
    disc_s = nn.discriminator(feat_s, reuse=False, trainable=False)

    # for target domain
    feat_t = nn.t_encoder(t_x_te, reuse=False, trainable=False)
    logits_t = nn.classifier(feat_t, reuse=True, trainable=False)
    disc_t = nn.discriminator(feat_t, reuse=True, trainable=False)

    source_accuracy = nn.eval(logits_s, s_y_te)
    target_accuracy = nn.eval(logits_t, t_y_te)

    path = tf.train.latest_checkpoint(logdir)
    saver = tf.train.Saver(max_to_keep=3)

    if path is None:
        raise ValueError("Don't exits in this dir:%s" % path)
    with tf.Session() as sess:
        saver.restore(sess, path)
        sess.run([s_init, t_init])
        s_acc, t_acc, sx, sfe, sl, tx, tfe, tl = sess.run([
            source_accuracy, target_accuracy, s_x_te, logits_s, s_y_te, t_x_te,
            logits_t, t_y_te
        ])
        print(s_acc, t_acc)
        utils.plot_tsne(sfe, sl, tfe, tl, 200)
        utils.plot_tsne_orign(sx, sl, tx, tl, 200)
    plt.show()
예제 #4
0
def main(args):
    # Get data
    data, dataset_name = get_dataset(args.dataset)
    dataset_name = f"{dataset_name}-{args.latent_dim}"

    output_dir = os.path.join(f"outputs-{args.mode}", dataset_name)
    os.makedirs(output_dir, exist_ok=True)

    # set logger
    logger = simple_logger(os.path.join(output_dir, "results.csv"))

    autoencoders = get_autoencoders(data[0].shape[1], args.latent_dim,
                                    args.mode)
    evaluation_methods = get_evaluation_methods(args.mode, logger)

    train_data, train_labels, test_data, test_labels = data
    print(
        f"run_analysis on {len(train_data)} train and {len(test_data)} test samples"
    )

    for ae in autoencoders:
        logger.log(f"{ae}", end="")
        print(ae)

        # Learn encoding on train data train on it and test on test encodings
        ae.learn_encoder_decoder(
            train_data, os.path.join(output_dir, "Training-autoencoder"))

        start = time()
        print("\tProjecting Data... ", end="")
        projected_train_data = ae.encode(train_data)
        projected_test_data = ae.encode(test_data)
        print(f"Finished in {time() - start:.2f} sec")

        if args.plot_latent_interpolation:
            start = time()
            print("\tVisualizing latent interpolation... ", end="")
            plot_latent_interpolation(ae,
                                      train_data,
                                      plot_path=os.path.join(
                                          output_dir, "Latent-interpollation",
                                          f"{ae}-Train.png"))
            plot_latent_interpolation(ae,
                                      test_data,
                                      plot_path=os.path.join(
                                          output_dir, "Latent-interpollation",
                                          f"{ae}-Test.png"))
            print(f"Finished in {time() - start:.2f} sec")

        if args.plot_tsne:
            # Run T-SNE
            start = time()
            print("\tRunning T-SNE... ", end="")
            plot_tsne(projected_train_data, train_labels,
                      os.path.join(output_dir, "T-SNE", f"{ae}-Train.png"))
            plot_tsne(projected_test_data, test_labels,
                      os.path.join(output_dir, "T-SNE", f"{ae}-Test.png"))
            print(f"Finished in {time() - start:.2f} sec")

        projected_data = (projected_train_data, projected_test_data)
        for evaluator in evaluation_methods:
            result_str = evaluator.evaluate(ae,
                                            data,
                                            projected_data,
                                            plot_path=os.path.join(
                                                output_dir, "Evaluation",
                                                f"{evaluator}_{ae}.png"))
            logger.log(f",{result_str}", end="")
        logger.log("")

    plot_examples(autoencoders,
                  test_data,
                  plot_path=os.path.join(output_dir,
                                         "Test-reconstruction.png"))
예제 #5
0
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from utils import read_word_vectors, isplural, plot_with_labels, plot_tsne

# Read vectors from a text file
w = read_word_vectors('../datasets/glove.6B/glove.6B.50d.txt')

# Plot comparative, superlative words to see if an order is preserved.
# Words are not always in order.
plot_tsne(['small', 'smaller', 'smallest'], w, ndim=1)
plot_tsne(['large', 'larger', 'largest'], w, ndim=1)
plot_tsne(['happy', 'happier', 'happiest'], w, ndim=1)



# x = (w.loc[:, 'larger'] - w.loc[:, 'large']).values
# y = (w.loc[:, 'largest'] - w.loc[:, 'large']).values
# df = pd.DataFrame({'larger': x, 'largest': y})
# print np.linalg.norm(df.iloc[:, 0].values)
# print np.linalg.norm(df.iloc[:, 1].values)

# x = (w.loc[:, 'smaller'] - w.loc[:, 'small']).values
# y = (w.loc[:, 'smallest'] - w.loc[:, 'small']).values
# df = pd.DataFrame({'smaller': x, 'smallest': y})
# print np.linalg.norm(df.iloc[:, 0].values)
# print np.linalg.norm(df.iloc[:, 1].values)

# x = (w.loc[:, 'happier'] - w.loc[:, 'happy']).values
# y = (w.loc[:, 'happiest'] - w.loc[:, 'happy']).values
# df = pd.DataFrame({'happier': x, 'happiest': y})
# print np.linalg.norm(df.iloc[:, 0].values)
def tsne(embedded_X_train, y, f_name, perplexity=10):
    # tsne
    for perpex in [7, 20, 50, 100]:
        X_embedded = TSNE(n_components=2, perplexity=perpex).fit_transform(
            embedded_X_train)  # perplexity
        plot_tsne(X_embedded, y, f_name + '_' + str(perpex))
예제 #7
0
    X_, Y_ = X[:-config.NUM_TEST], Y[:-config.NUM_TEST]
    X_test, Y_test = X[config.NUM_TRAIN +
                       config.NUM_VAL:], Y[config.NUM_TRAIN + config.NUM_VAL:]

    config.CLASS_LABELS = np.unique(Y_)
    config.CLASSES = len(config.CLASS_LABELS)

    network = neural_network.Network(config.NUM_TRAIN,
                                     config.IMAGE_SIZE * config.IMAGE_SIZE,
                                     len(config.CLASS_LABELS))

    network.train(X_, Y_)

    print("Accuracy Plot for Train")
    network.plot_accuracy('Train')
    print("Error Plot for Train")
    network.plot_cost('Train')
    print("Accuracy Plot for Validation")
    network.plot_accuracy('Validation')
    print("Error Plot for Validation")
    network.plot_cost('Validation')
    print("Plot T-SNE")
    utils.plot_tsne()

    print("Read Test Data")
    network.test(X_test.T, Y_test.T)

    #network.sklearn_train(X_, Y_)
    #network.sklearn_test(X_test, Y_test)