def test_dict_learning_numerical_consistency(method): # verify numerically consistent among np.float32 and np.float64 rtol = 1e-6 n_components = 4 alpha = 2 U_64, V_64, _ = dict_learning( X.astype(np.float64), n_components=n_components, alpha=alpha, random_state=0, method=method, ) U_32, V_32, _ = dict_learning( X.astype(np.float32), n_components=n_components, alpha=alpha, random_state=0, method=method, ) # Optimal solution (U*, V*) is not unique. # If (U*, V*) is optimal solution, (-U*,-V*) is also optimal, # and (column permutated U*, row permutated V*) are also optional # as long as holding UV. # So here UV, ||U||_1,1 and sum(||V_k||_2^2) are verified # instead of comparing directly U and V. assert_allclose(np.matmul(U_64, V_64), np.matmul(U_32, V_32), rtol=rtol) assert_allclose(np.sum(np.abs(U_64)), np.sum(np.abs(U_32)), rtol=rtol) assert_allclose(np.sum(V_64**2), np.sum(V_32**2), rtol=rtol) # verify an obtained solution is not degenerate assert np.mean(U_64 != 0.0) > 0.05 assert np.count_nonzero(U_64 != 0.0) == np.count_nonzero(U_32 != 0.0)
def Learn_Dict(device, length, overlap): X_device = [] for i1 in range(0, len(device) - length, overlap): X_device.append(device[i1:i1 + length, 1]) X_device = np.array(X_device, np.float32) sum_ele = np.sum(X_device, axis=1) X_device = np.delete(X_device, [sum_ele < 5000], axis=0) np.random.shuffle(X_device) X_device = X_device[:6000, :] #X_device = X_device - np.reshape(np.mean(X_device,axis=1),(-1,1)) D_device = decomposition.dict_learning(X_device, n_components=500, alpha=0.7, max_iter=20, return_n_iter=True, n_jobs=1, method='lars', tol=1e-8) return D_device
def __init__(self, X, n_components, alpha, save=None): code, d, errors = decomp.dict_learning(X, n_components, alpha, n_jobs=-1, max_iter=50, tol=1e-04) self.code = code self.d = d if save: np.save(save, self.d)
def test_sparse_encode(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) _, dictionary, _ = decomposition.dict_learning( iris.data, 2, 1, random_state=self.random_state) result = df.decomposition.sparse_encode(dictionary) expected = decomposition.sparse_encode(iris.data, dictionary) self.assertIsInstance(result, pdml.ModelFrame) self.assert_index_equal(result.index, df.data.index) self.assert_numpy_array_almost_equal(result.values, expected)
def getDict(self, num_init, alpha, num_sample=None): #assert(num_init <= np.min(self.raw_image_shape)) if(num_sample is None): num_sample = 5000 data = self.getNormSample(num_sample) print("Running sklearn dict_learn for initial dictionary") print(type(data), data.shape) dictionary = dict_learning(data, num_init, alpha, verbose=2, n_jobs=-1, max_iter=50)[1] print("Done") return dictionary
def test_sparse_encode(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) _, dictionary, _ = decomposition.dict_learning(iris.data, 2, 1, random_state=self.random_state) result = df.decomposition.sparse_encode(dictionary) expected = decomposition.sparse_encode(iris.data, dictionary) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_index_equal(result.index, df.data.index) self.assert_numpy_array_almost_equal(result.values, expected)
def test_dict_learning_dtype_match(data_type, expected_type, method): # Verify output matrix dtype rng = np.random.RandomState(0) n_components = 8 code, dictionary, _ = dict_learning( X.astype(data_type), n_components=n_components, alpha=1, random_state=rng, method=method, ) assert code.dtype == expected_type assert dictionary.dtype == expected_type
def test_dict_learning(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.decomposition.dict_learning(2, 1, random_state=self.random_state) expected = decomposition.dict_learning(iris.data, 2, 1, random_state=self.random_state) self.assertEqual(len(result), 3) self.assertIsInstance(result[0], pdml.ModelFrame) tm.assert_index_equal(result[0].index, df.data.index) self.assert_numpy_array_almost_equal(result[0].values, expected[0]) self.assertIsInstance(result[1], pdml.ModelFrame) tm.assert_index_equal(result[1].columns, df.data.columns) self.assert_numpy_array_almost_equal(result[1].values, expected[1]) self.assert_numpy_array_almost_equal(result[2], expected[2])
def test_dictionary_learning(self): """Test admm dictionary learning behaves like sklearn's dict_learning """ from sklearn.decomposition import dict_learning rng_global = np.random.RandomState(0) n_samples, n_features = 10, 8 #X = rng_global.randn(n_features, n_samples) X = rng_global.randn(n_samples, n_features) rng = np.random.RandomState(0) n_components = 6 code, dictionary, errors = dict_learning(X, n_components=n_components, alpha=1, random_state=rng) np.testing.assert_almost_equal(code.shape, (n_samples, n_components)) np.testing.assert_almost_equal(dictionary.shape, (n_components, n_features)) np.testing.assert_almost_equal(np.dot(code, dictionary).shape, X.shape) rng = np.random.RandomState(0) from lasso import dict_learning as admm_dict_learning code2, dictionary2, errors2 = admm_dict_learning( X, method='admm', n_components=n_components, alpha=1, random_state=rng) np.testing.assert_almost_equal(code2.shape, (n_samples, n_components)) np.testing.assert_almost_equal(dictionary2.shape, (n_components, n_features)) np.testing.assert_almost_equal( np.dot(code2, dictionary2).shape, X.shape) # And compare it with the B obtained manually np.testing.assert_array_almost_equal(code, code2) np.testing.assert_array_almost_equal(dictionary, dictionary2)
def test_dict_learning_lars_positive_parameter(): n_components = 5 alpha = 1 err_msg = "Positive constraint not supported for 'lars' coding method." with pytest.raises(ValueError, match=err_msg): dict_learning(X, n_components, alpha=alpha, positive_code=True)
scaler = preprocessing.StandardScaler(with_mean=True, with_std=True).fit(N) X_train = scaler.transform(N) # Get the dependency matrix for the group Sigma = np.corrcoef(np.transpose(X_train)) for i in tqdm(xrange(total_sparsity)): scaler = preprocessing.StandardScaler(with_mean=True, with_std=True).fit(N) X_train = scaler.transform(N) temp_scalar = preprocessing.StandardScaler(with_mean=True, with_std=True).fit(T) X_test = temp_scalar.transform(T) x = K print("Sparsity quotient", x) P = dict_learning(Sigma, n_components=n_comp, alpha=x, max_iter=100, tol=1e-08) if (P[1].shape[0] == n_comp): Trans = P[1] else: Trans = np.transpose(P[1]) Temp_proj = [] Temp_proj_test = [] for pc in Trans: Temp_proj.append( np.array( [np.dot(X_train[p, :], pc) for p in xrange(X_train.shape[0])])) Temp_proj_test.append( np.array( [np.dot(X_test[p, :], pc) for p in xrange(X_test.shape[0])]))
tr_images, tr_labels = mndata.load_training() te_images, te_labels = mndata.load_testing() n = 1000 train_images = np.array(tr_images) train_labels = np.array(tr_labels) test_images = np.array(te_images) test_labels = np.array(te_labels) # print(test_images.shape) # svm = SVC() # svm.fit(train_images[:n, :], train_labels[:n]) # pred_labels = svm.predict(test_images) # print("SVM Accuracy:", sum(test_labels == pred_labels) / len(pred_labels)) # knn = KNeighborsClassifier() # knn.fit(train_images[:n, :], train_labels[:n]) # pred_labels = svm.predict(test_images) # print("KNN Accuracy:"sum(test_labels == pred_labels) / len(pred_labels)) U, W = dict_learning(train_images[:n, :], 1024, 0.1) print(U.shape) # knn = KNeighborsClassifier() # knn.fit(train_images[:n, :], train_labels[:n]) # pred_labels = svm.predict(test_images) # print("KNN Accuracy:"sum(test_labels == pred_labels) / len(pred_labels)) # svm.fit(train_images, )
def optimize(self, max_iter=10): print 'doing dictionary learning' # U, V = dict_learning_online(self.H, self.k, self.alpha, verbose=2, method='lars') U, V, _ = dict_learning(self.H, self.k, self.alpha, verbose=2, method='lars', max_iter=max_iter) self.X = U self.W = V.T