def encode(self): # load dataset images, attrs = load_pickle(self.data_path, split='test') # build model model = self.model model.build_model() with tf.Session(config=self.config) as sess: # load trained parameters print('loading testing model..') saver = tf.train.Saver() self.load_latest(saver, sess) print('start encoding..!') batch_size = self.batch_size if self.batch_size <= 32 else 32 sample_iter = 30 tsne_encoded = np.empty((batch_size * sample_iter, 64)) tsne_attrs = attrs[:sample_iter * batch_size] for i in range(sample_iter): print("\r{}/{}".format(i + 1, sample_iter), end='') batch_images = images[i * batch_size:(i + 1) * batch_size] feed_dict = {model.images: batch_images} batch_encoded = sess.run(model.mean, feed_dict) tsne_encoded[i * batch_size:(i + 1) * batch_size, :] = batch_encoded plot_tsne(tsne_encoded, tsne_attrs)
def tsne_on_ids(): mlp_dump_file = "exports/mlp_ids.joblib" p = Path(mlp_dump_file) X, Y, names = load_ids_csv(normalize=True) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1201) if p.is_file(): clf = joblib.load(mlp_dump_file) else: clf = MLPClassifier(solver='adam', alpha=1e-2, hidden_layer_sizes=(20, 5), random_state=1) clf.fit(X_train, Y_train) joblib.dump(clf, mlp_dump_file) choose = np.random.randint(X.shape[0], size=50) data_subset = X[choose, :] label_subset = Y[choose] explainer = DefaultExplainer(clf, X, None) counterfacts = explainer.get_counterfactuals( data_subset[label_subset == 0]) distances = abs(counterfacts - data_subset[label_subset == 0]) print(np.average(distances, axis=0)) utils.plot_tsne(data_subset, label_subset, counterfacts) return
def step3(source, target, batch_size=64, logdir="./Log/ADDA/advermodel/best/MNIST2USPS/NOBN", classes_num=10, strn=None, sten=None, ttrn=None, tten=None): # prepare data data_func = dataset.get_dataset(source, target) print(data_func) s_x_tr, s_y_tr, s_x_te, s_y_te, s_tr_size, s_te_size, s_init = data_func[ 0](batch_size, strn, sten) t_x_tr, t_y_tr, t_x_te, t_y_te, t_tr_size, t_te_size, t_init = data_func[ 1](batch_size, ttrn, tten) print( "dataset information:\n source: %s train_size: %d, test_size: %d \n target: %s train_size: %d, test_size: %d" % (source, s_tr_size, s_te_size, target, t_tr_size, t_te_size)) # create graph nn = adda.ADDA(classes_num) # for source domain feat_s = nn.s_encoder(s_x_te, reuse=False, trainable=False) logits_s = nn.classifier(feat_s, reuse=False, trainable=False) disc_s = nn.discriminator(feat_s, reuse=False, trainable=False) # for target domain feat_t = nn.t_encoder(t_x_te, reuse=False, trainable=False) logits_t = nn.classifier(feat_t, reuse=True, trainable=False) disc_t = nn.discriminator(feat_t, reuse=True, trainable=False) source_accuracy = nn.eval(logits_s, s_y_te) target_accuracy = nn.eval(logits_t, t_y_te) path = tf.train.latest_checkpoint(logdir) saver = tf.train.Saver(max_to_keep=3) if path is None: raise ValueError("Don't exits in this dir:%s" % path) with tf.Session() as sess: saver.restore(sess, path) sess.run([s_init, t_init]) s_acc, t_acc, sx, sfe, sl, tx, tfe, tl = sess.run([ source_accuracy, target_accuracy, s_x_te, logits_s, s_y_te, t_x_te, logits_t, t_y_te ]) print(s_acc, t_acc) utils.plot_tsne(sfe, sl, tfe, tl, 200) utils.plot_tsne_orign(sx, sl, tx, tl, 200) plt.show()
def main(args): # Get data data, dataset_name = get_dataset(args.dataset) dataset_name = f"{dataset_name}-{args.latent_dim}" output_dir = os.path.join(f"outputs-{args.mode}", dataset_name) os.makedirs(output_dir, exist_ok=True) # set logger logger = simple_logger(os.path.join(output_dir, "results.csv")) autoencoders = get_autoencoders(data[0].shape[1], args.latent_dim, args.mode) evaluation_methods = get_evaluation_methods(args.mode, logger) train_data, train_labels, test_data, test_labels = data print( f"run_analysis on {len(train_data)} train and {len(test_data)} test samples" ) for ae in autoencoders: logger.log(f"{ae}", end="") print(ae) # Learn encoding on train data train on it and test on test encodings ae.learn_encoder_decoder( train_data, os.path.join(output_dir, "Training-autoencoder")) start = time() print("\tProjecting Data... ", end="") projected_train_data = ae.encode(train_data) projected_test_data = ae.encode(test_data) print(f"Finished in {time() - start:.2f} sec") if args.plot_latent_interpolation: start = time() print("\tVisualizing latent interpolation... ", end="") plot_latent_interpolation(ae, train_data, plot_path=os.path.join( output_dir, "Latent-interpollation", f"{ae}-Train.png")) plot_latent_interpolation(ae, test_data, plot_path=os.path.join( output_dir, "Latent-interpollation", f"{ae}-Test.png")) print(f"Finished in {time() - start:.2f} sec") if args.plot_tsne: # Run T-SNE start = time() print("\tRunning T-SNE... ", end="") plot_tsne(projected_train_data, train_labels, os.path.join(output_dir, "T-SNE", f"{ae}-Train.png")) plot_tsne(projected_test_data, test_labels, os.path.join(output_dir, "T-SNE", f"{ae}-Test.png")) print(f"Finished in {time() - start:.2f} sec") projected_data = (projected_train_data, projected_test_data) for evaluator in evaluation_methods: result_str = evaluator.evaluate(ae, data, projected_data, plot_path=os.path.join( output_dir, "Evaluation", f"{evaluator}_{ae}.png")) logger.log(f",{result_str}", end="") logger.log("") plot_examples(autoencoders, test_data, plot_path=os.path.join(output_dir, "Test-reconstruction.png"))
import matplotlib.pyplot as plt from sklearn.manifold import TSNE from utils import read_word_vectors, isplural, plot_with_labels, plot_tsne # Read vectors from a text file w = read_word_vectors('../datasets/glove.6B/glove.6B.50d.txt') # Plot comparative, superlative words to see if an order is preserved. # Words are not always in order. plot_tsne(['small', 'smaller', 'smallest'], w, ndim=1) plot_tsne(['large', 'larger', 'largest'], w, ndim=1) plot_tsne(['happy', 'happier', 'happiest'], w, ndim=1) # x = (w.loc[:, 'larger'] - w.loc[:, 'large']).values # y = (w.loc[:, 'largest'] - w.loc[:, 'large']).values # df = pd.DataFrame({'larger': x, 'largest': y}) # print np.linalg.norm(df.iloc[:, 0].values) # print np.linalg.norm(df.iloc[:, 1].values) # x = (w.loc[:, 'smaller'] - w.loc[:, 'small']).values # y = (w.loc[:, 'smallest'] - w.loc[:, 'small']).values # df = pd.DataFrame({'smaller': x, 'smallest': y}) # print np.linalg.norm(df.iloc[:, 0].values) # print np.linalg.norm(df.iloc[:, 1].values) # x = (w.loc[:, 'happier'] - w.loc[:, 'happy']).values # y = (w.loc[:, 'happiest'] - w.loc[:, 'happy']).values # df = pd.DataFrame({'happier': x, 'happiest': y}) # print np.linalg.norm(df.iloc[:, 0].values)
def tsne(embedded_X_train, y, f_name, perplexity=10): # tsne for perpex in [7, 20, 50, 100]: X_embedded = TSNE(n_components=2, perplexity=perpex).fit_transform( embedded_X_train) # perplexity plot_tsne(X_embedded, y, f_name + '_' + str(perpex))
X_, Y_ = X[:-config.NUM_TEST], Y[:-config.NUM_TEST] X_test, Y_test = X[config.NUM_TRAIN + config.NUM_VAL:], Y[config.NUM_TRAIN + config.NUM_VAL:] config.CLASS_LABELS = np.unique(Y_) config.CLASSES = len(config.CLASS_LABELS) network = neural_network.Network(config.NUM_TRAIN, config.IMAGE_SIZE * config.IMAGE_SIZE, len(config.CLASS_LABELS)) network.train(X_, Y_) print("Accuracy Plot for Train") network.plot_accuracy('Train') print("Error Plot for Train") network.plot_cost('Train') print("Accuracy Plot for Validation") network.plot_accuracy('Validation') print("Error Plot for Validation") network.plot_cost('Validation') print("Plot T-SNE") utils.plot_tsne() print("Read Test Data") network.test(X_test.T, Y_test.T) #network.sklearn_train(X_, Y_) #network.sklearn_test(X_test, Y_test)