print("\n\nTRAINING CLASSIFIER") s = time.time() lr = LogisticRegression() lr.fit(X_train, y_train) print("train acc: ", lr.score(X_train, y_train)) y_proj_pred = lr.predict(X_proj) pred_path = base_dir + "y_pred_clf.npy" np.save(pred_path, y_proj_pred) clf_sklearn_path = base_dir + "lr.joblib" joblib.dump(lr, clf_sklearn_path) clf = CLF(clf=lr, clf_type='sklearn', clf_path=clf_sklearn_path) clf_path = base_dir + 'lr.json' clf.save_json(clf_path) print("\ttime: ", time.time() - s) N = 1 R = 100 print("\n\nGRID ILAMP tSNE") grid_ilamp_tsne_path = base_dir + 'grid_ilamp_tsne.joblib' save_grid(grid_ilamp_tsne_path, R, N, clf, ilamp_tsne, X_train, tsne_proj, clf_path, ilamp_tsne_path) ui_grid_ilamp_tsne_path = base_dir + 'laps_features_ilamp_tsne.json' save_json_ui(ui_grid_ilamp_tsne_path, grid_ilamp_tsne_path, clf_path, "ilamp", ilamp_tsne_path, train_y_path, pred_path) print("\n\nGRID ILAMP UMAP") grid_ilamp_umap_path = base_dir + 'grid_ilamp_umap.joblib'
def main(): if len(sys.argv) < 2: print("Usage: ./fashion_mnist.py <base_dir>") sys.exit(0) base_dir = sys.argv[1] print("Load dataset\n\n") s = time.time() X_train, y_train = data.LoadMNISTData('train', base_dir + 'orig/') train_y_path = base_dir + "y_train.npy" np.save(train_y_path, y_train) X_test, y_test = data.LoadMNISTData('test', base_dir + 'orig/') X_nd = np.copy(X_train) X_nd = X_nd.reshape((X_nd.shape[0], X_nd.shape[1] * X_nd.shape[2])) projection_size = 60000 X_proj = np.copy(X_train[:projection_size]) new_shape = (X_proj.shape[0], X_proj.shape[1] * X_proj.shape[2]) X_proj = np.reshape(X_proj, new_shape) print("\ttime: ", time.time() - s) scaler = MinMaxScaler(feature_range=(0, 1)) print("TSNE Projection") s = time.time() tsne = TSNE(n_components=2, random_state=420, perplexity=25.0, n_iter=3000, n_iter_without_progress=300, n_jobs=4) tsne_proj = tsne.fit_transform(X_proj) tsne_proj = scaler.fit_transform(tsne_proj) print("\ttime: ", time.time() - s) print("UMAP Projection") s = time.time() umap_proj = UMAP(n_components=2, random_state=420, n_neighbors=5, min_dist=0.3).fit_transform(X_proj) umap_proj = scaler.fit_transform(umap_proj) print("\ttime: ", time.time() - s) subset_size = 15000 print("\n\nILAMP tSNE") s = time.time() k_ilamp = 20 ilamp_tsne = ILAMP(n_neighbors=k_ilamp) ilamp_tsne.fit(X_proj[:subset_size], tsne_proj[:subset_size]) ilamp_tsne_path = base_dir + "ilamp_tsne.joblib" ilamp_tsne.save(ilamp_tsne_path) print("\ttime: ", time.time() - s) print("\n\nILAMP UMAP") s = time.time() ilamp_umap = ILAMP(n_neighbors=k_ilamp) ilamp_umap.fit(X_proj[:subset_size], umap_proj[:subset_size]) ilamp_umap_path = base_dir + "ilamp_umap.joblib" ilamp_umap.save(ilamp_umap_path) print("\ttime: ", time.time() - s) print("\n\nRBFInv CTRL PTS TSNE") s = time.time() EPS = 50000 irbfcp_tsne = RBFInv(num_ctrl=400, mode='rols', kernel='gaussian', eps=EPS, normalize_c=True, normalize_d=True) irbfcp_tsne.fit(X_proj[:subset_size], tsne_proj[:subset_size]) irbfcp_tsne_path = base_dir + "irbfcp_tsne.joblib" irbfcp_tsne.save(irbfcp_tsne_path) print("\ttime: ", time.time() - s) print("\n\nRBFInv CTRL PTS UMAP") s = time.time() EPS = 50000 irbfcp_umap = RBFInv(num_ctrl=400, mode='rols', kernel='gaussian', eps=EPS, normalize_c=True, normalize_d=True) irbfcp_umap.fit(X_proj[:subset_size], umap_proj[:subset_size]) irbfcp_umap_path = base_dir + "irbfcp_umap.joblib" irbfcp_umap.save(irbfcp_umap_path) print("\ttime: ", time.time() - s) print("\n\nRBFInv CLUSTER TSNE") s = time.time() EPS = 50000 irbfc_tsne = RBFInv(num_ctrl=50, mode='cluster', kernel='gaussian', eps=EPS, normalize_c=True, normalize_d=True) irbfc_tsne.fit(X_proj[:subset_size], tsne_proj[:subset_size]) irbfc_tsne_path = base_dir + "irbfc_tsne.joblib" irbfc_tsne.save(irbfc_tsne_path) print("\ttime: ", time.time() - s) print("\n\nRBFInv CLUSTER UMAP") s = time.time() EPS = 50000 irbfc_umap = RBFInv(num_ctrl=50, mode='cluster', kernel='gaussian', eps=EPS, normalize_c=True, normalize_d=True) irbfc_umap.fit(X_proj[:subset_size], tsne_proj[:subset_size]) irbfc_umap_path = base_dir + "irbfc_umap.joblib" irbfc_umap.save(irbfc_umap_path) print("\ttime: ", time.time() - s) print("\n\nNNInv TSNE") s = time.time() nninv_tsne = NNInv() nninv_tsne.fit(X_proj[:subset_size], tsne_proj[:subset_size]) nninv_tsne_path = base_dir + "nninv_tsne.joblib" nninv_tsne.save(nninv_tsne_path, base_dir + 'nninv_tsne_keras.hdf5') print("\ttime: ", time.time() - s) print("\n\nNNInv UMAP") s = time.time() nninv_umap = NNInv() nninv_umap.fit(X_proj[:subset_size], umap_proj[:subset_size]) nninv_umap_path = base_dir + "nninv_umap.joblib" nninv_umap.save(nninv_umap_path, base_dir + 'nninv_umap_keras.hdf5') print("\ttime: ", time.time() - s) input_shape = (X_train.shape[1], X_train.shape[2], 1) X_train = X_train.reshape((X_train.shape[0], ) + input_shape) X_test = X_test.reshape((X_test.shape[0], ) + input_shape) y_train = keras.utils.to_categorical(y_train, 10) y_test = keras.utils.to_categorical(y_test, 10) X_proj = X_proj.reshape((X_proj.shape[0], ) + input_shape) print("\n\nTraining classifier") s = time.time() clf_keras = CNNModel(input_shape, 10) clf_keras.fit(X_train, y_train, batch_size=128, epochs=14, verbose=1, validation_data=(X_test, y_test)) print("\tAccuracy on test data: ", clf_keras.evaluate(X_test, y_test, verbose=0)) clf_keras_path = base_dir + "mnist_cnn.hdf5" clf_keras.save(clf_keras_path) y_proj_pred = np.argmax(clf_keras.predict(X_proj), axis=1) pred_path = base_dir + "y_pred_clf.npy" np.save(pred_path, y_proj_pred) clf = CLF(clf=clf_keras, clf_type='keras_cnn', clf_path=clf_keras_path, shape=input_shape) clf_path = base_dir + 'mnist_cnn.json' clf.save_json(clf_path) print("\ttime: ", time.time() - s) N = 1 R = 500 print("\n\nGRID ILAMP tSNE") s = time.time() grid_ilamp_tsne_path = base_dir + 'grid_ilamp_tsne.joblib' save_grid(grid_ilamp_tsne_path, R, N, clf, ilamp_tsne, X_nd, tsne_proj, clf_path, ilamp_tsne_path) ui_grid_ilamp_tsne_path = base_dir + 'mnist_500_' + 'ui_ilamp_tsne.json' save_json_ui(ui_grid_ilamp_tsne_path, grid_ilamp_tsne_path, clf_path, "ilamp", ilamp_tsne_path, train_y_path, pred_path) print("\ttime: ", time.time() - s) print("\n\nGRID ILAMP UMAP") s = time.time() grid_ilamp_umap_path = base_dir + 'grid_ilamp_umap.joblib' save_grid(grid_ilamp_umap_path, R, N, clf, ilamp_umap, X_nd, umap_proj, clf_path, ilamp_umap_path) ui_grid_ilamp_umap_path = base_dir + 'mnist_500_' + 'ui_ilamp_umap.json' save_json_ui(ui_grid_ilamp_umap_path, grid_ilamp_umap_path, clf_path, "ilamp", ilamp_umap_path, train_y_path, pred_path) print("\ttime: ", time.time() - s) print("\n\nGRID NNInv tSNE") s = time.time() grid_nninv_tsne_path = base_dir + 'grid_nninv_tsne.joblib' save_grid(grid_nninv_tsne_path, R, N, clf, nninv_tsne, X_nd, tsne_proj, clf_path, nninv_tsne_path) ui_grid_nninv_tsne_path = base_dir + 'mnist_500_' + '_ui_nninv_tsne.json' save_json_ui(ui_grid_nninv_tsne_path, grid_nninv_tsne_path, clf_path, "nninv", nninv_tsne_path, train_y_path, pred_path) print("\ttime: ", time.time() - s) print("\n\nGRID NNInv UMAP") s = time.time() grid_nninv_umap_path = base_dir + 'grid_nninv_umap.joblib' save_grid(grid_nninv_umap_path, R, N, clf, nninv_umap, X_nd, umap_proj, clf_path, nninv_umap_path) ui_grid_nninv_umap_path = base_dir + 'mnist_500_' + 'ui_nninv_umap.json' save_json_ui(ui_grid_nninv_umap_path, grid_nninv_umap_path, clf_path, "nninv", nninv_umap_path, train_y_path, pred_path) print("\ttime: ", time.time() - s) print("\n\nGRID RBFInv CTRL PTS tSNE") s = time.time() grid_irbfcp_tsne_path = base_dir + 'grid_irbfcp_tsne.joblib' save_grid(grid_irbfcp_tsne_path, R, N, clf, irbfcp_tsne, X_nd, tsne_proj, clf_path, irbfcp_tsne_path) ui_grid_irbfcp_tsne_path = base_dir + 'mnist_500_' + 'ui_irbfcp_tsne.json' save_json_ui(ui_grid_irbfcp_tsne_path, grid_irbfcp_tsne_path, clf_path, "rbf", irbfcp_tsne_path, train_y_path, pred_path) print("\ttime: ", time.time() - s) print("\n\nGRID RBFInv CTRL PTS UMAP") s = time.time() grid_irbfcp_umap_path = base_dir + 'grid_irbfcp_umap.joblib' save_grid(grid_irbfcp_umap_path, R, N, clf, irbfcp_umap, X_nd, umap_proj, clf_path, irbfcp_umap_path) ui_grid_irbfcp_umap_path = base_dir + 'mnist_500_' + 'ui_irbfcp_umap.json' save_json_ui(ui_grid_irbfcp_umap_path, grid_irbfcp_umap_path, clf_path, "rbf", irbfcp_umap_path, train_y_path, pred_path) print("\ttime: ", time.time() - s) print("\n\nGRID RBFInv CLUSTER tSNE") s = time.time() grid_irbfc_tsne_path = base_dir + 'grid_irbfc_tsne.joblib' save_grid(grid_irbfc_tsne_path, R, N, clf, irbfc_tsne, X_nd, tsne_proj, clf_path, irbfc_tsne_path) ui_grid_irbfc_tsne_path = base_dir + 'mnist_500_' + 'ui_irbfc_tsne.json' save_json_ui(ui_grid_irbfc_tsne_path, grid_irbfc_tsne_path, clf_path, "rbf", irbfc_tsne_path, train_y_path, pred_path) print("\ttime: ", time.time() - s) print("\n\nGRID RBFInv CLUSTER UMAP") s = time.time() grid_irbfc_umap_path = base_dir + 'grid_irbfc_umap.joblib' save_grid(grid_irbfc_umap_path, R, N, clf, irbfc_umap, X_nd, umap_proj, clf_path, irbfc_umap_path) ui_grid_irbfc_umap_path = base_dir + 'mnist_500_' + 'ui_irbfc_umap.json' save_json_ui(ui_grid_irbfc_umap_path, grid_irbfc_umap_path, clf_path, "rbf", irbfc_umap_path, train_y_path, pred_path) print("\ttime: ", time.time() - s)