test_X = sklearn.preprocessing.scale(test_X) # row_sums = train_X.sum(axis=1).astype(float) # train_X = np.true_divide(train_X, row_sums[:, np.newaxis]) # # row_sums = test_X.sum(axis=1).astype(float) # test_X = np.true_divide(test_X, row_sums[:, np.newaxis]) ############################################################################### # Dictionary Learning n_components = 500 n_samples_training = 1500 print("\nSparse Coding Dictionary Learning") # pca = RandomizedPCA(n_components=n_dcomponents).fit(train_X) dl = KSVDSparseCoding(n_components, n_nonzero_coefs=70, preserve_dc=False, approx=False, max_iter=5, verbose=1) dl.fit(train_X[0:n_samples_training]) print "dl.atom_bin_count", dl.atom_bin_count().shape, np.average(dl.atom_bin_count()), dl.atom_bin_count().tolist() plt.plot(dl.errors) plt.show() print "X_train.shape", train_X.shape print "Components shape", dl.dictionary.shape # components = dl.components().reshape((n_components, n_features)) components = dl.dictionary # Visualizing the components as images
print "y_train", y_train print "X_test.shape", X_test.shape print "y_test", y_test ############################################################################### # Write data to matlab matrix # scipy.io.savemat('/home/jonny2/PycharmProjects/ML-algorithms/Projects/GWAS-SparseCoding/psychiatric.mat', # mdict={'tr_dat': X_train, 'tt_dat': X_test, 'trls': y_train, 'ttls': y_test}) ############################################################################### # Sparse Representation n_components = 25 # dl = DictionaryLearning(n_components, max_iter=15, n_jobs=4, verbose=2) dl = KSVDSparseCoding(n_components, max_iter=5, verbose=1, approx=True) dl.fit(X_s) eigenfaces = dl.components_.T print("Projecting the input data on the learned dictionary bases") X_train_pca = sparse_encode(X_train, eigenfaces, algorithm='lasso_lars') X_test_pca = sparse_encode(X_test, eigenfaces, algorithm='lasso_lars') print "X_train_pca.shape", X_train_pca.shape print "X_test_pca.shape", X_test_pca.shape ############################################################################### # Train a SVM classification model print("Fitting the classifier to the training set") param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
# Split into a training set and a test set using a stratified k fold # split into a training and testing set X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.50, random_state=42) ############################################################################### # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 10 print("Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])) t0 = time() # pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train) dl = KSVDSparseCoding(n_components, preserve_dc=True, approx=False, max_iter=15, verbose=1) dl.fit(X_train) print("done in %0.3fs" % (time() - t0)) eigenfaces = dl.dictionary print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca = dl.code.T X_test_pca = dl.sparse_encode(X_test, eigenfaces).T print("done in %0.3fs" % (time() - t0)) ############################################################################### # Train a SVM classification model