def test_dict_mf_reconstruction(backend): X, Q = generate_synthetic() dict_mf = DictMF( n_components=4, alpha=1e-4, max_n_iter=300, l1_ratio=0, backend=backend, random_state=rng_global, reduction=1 ) dict_mf.fit(X) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.components_) assert_array_almost_equal(X, Y, decimal=1)
def test_dict_mf_reconstruction_reduction(backend): X, Q = generate_synthetic(n_features=20, n_samples=400, dictionary_rank=5) dict_mf = DictMF( n_components=4, alpha=1e-6, max_n_iter=800, l1_ratio=0, backend=backend, random_state=rng_global, reduction=2 ) dict_mf.fit(X) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.components_) rel_error = np.sum((X - Y) ** 2) / np.sum(X ** 2) assert rel_error < 0.06
def test_dict_mf_reconstruction_reduction_batch(backend): X, Q = generate_synthetic(n_features=20, n_samples=400, dictionary_rank=5) dict_mf = DictMF( n_components=4, alpha=1e-6, max_n_iter=800, l1_ratio=0, backend=backend, random_state=rng_global, batch_size=2, reduction=2, ) dict_mf.fit(X) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.Q_) rel_error = np.sum((X - Y)**2) / np.sum(X**2) assert (rel_error < 0.02)
def test_dict_mf_reconstruction_sparse_dict(backend, var_red): X, Q = generate_sparse_synthetic(300, 4) rng = check_random_state(0) dict_init = Q + rng.randn(*Q.shape) * 0.01 dict_mf = DictMF(n_components=4, alpha=1e-4, max_n_iter=400, l1_ratio=1, dict_init=dict_init, backend=backend, var_red=var_red, random_state=rng_global) dict_mf.fit(X) Q_rec = dict_mf.components_ Q_rec /= np.sqrt(np.sum(Q_rec ** 2, axis=1))[:, np.newaxis] Q /= np.sqrt(np.sum(Q ** 2, axis=1))[:, np.newaxis] G = np.abs(Q_rec.dot(Q.T)) recovered_maps = min(np.sum(np.any(G > 0.95, axis=1)), np.sum(np.any(G > 0.95, axis=0))) assert (recovered_maps >= 4) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.components_)
def single_run(n_components, var_red, projection, offset, learning_rate, reduction, alpha, data): cb = Callback(data) estimator = DictMF(n_components=n_components, batch_size=10, reduction=reduction, l1_ratio=1, alpha=alpha, max_n_iter=20000, projection=projection, var_red=var_red, backend='python', verbose=3, learning_rate=learning_rate, offset=offset, random_state=0, callback=cb) estimator.fit(data) return cb, estimator
def test_dict_mf_reconstruction_sparse_dict(backend): X, Q = generate_sparse_synthetic(300, 4) rng = check_random_state(0) dict_init = Q + rng.randn(*Q.shape) * 0.01 dict_mf = DictMF(n_components=4, alpha=1e-4, max_n_iter=400, l1_ratio=1, dict_init=dict_init, backend=backend, random_state=rng_global) dict_mf.fit(X) Q_rec = dict_mf.components_ Q_rec /= np.sqrt(np.sum(Q_rec**2, axis=1))[:, np.newaxis] Q /= np.sqrt(np.sum(Q**2, axis=1))[:, np.newaxis] G = np.abs(Q_rec.dot(Q.T)) recovered_maps = min(np.sum(np.any(G > 0.95, axis=1)), np.sum(np.any(G > 0.95, axis=0))) assert (recovered_maps >= 4) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.components_)
def test_dict_mf_reconstruction_sparse(backend): X, Q = generate_synthetic(n_features=20, n_samples=200, dictionary_rank=5) sp_X = np.zeros((X.shape[0] * 2, X.shape[1])) # Generate a sparse simple problem for i in range(X.shape[0]): perm = rng_global.permutation(X.shape[1]) even_range = perm[::2] odd_range = perm[1::2] sp_X[2 * i, even_range] = X[i, even_range] sp_X[2 * i, odd_range] = X[i, odd_range] sp_X = sp.csr_matrix(sp_X) dict_mf = DictMF(n_components=4, alpha=1e-6, max_n_iter=500, l1_ratio=0, backend=backend, random_state=rng_global) dict_mf.fit(sp_X) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.Q_) rel_error = np.sum((X - Y)**2) / np.sum(X**2) assert (rel_error < 0.02)
def test_dict_mf_reconstruction_sparse_dict(backend): X, Q = generate_sparse_synthetic(300, 4) dict_init = Q + rng_global.randn(*Q.shape) * 0.01 dict_mf = DictMF(n_components=4, alpha=1e-2, max_n_iter=300, l1_ratio=1, dict_init=dict_init, backend=backend, random_state=rng_global) dict_mf.fit(X) Q_rec = dict_mf.Q_ Q_rec /= np.sqrt(np.sum(Q_rec**2, axis=1))[:, np.newaxis] Q /= np.sqrt(np.sum(Q**2, axis=1))[:, np.newaxis] G = np.abs(Q_rec.dot(Q.T)) recovered_maps = min(np.sum(np.any(G > 0.95, axis=1)), np.sum(np.any(G > 0.95, axis=0))) assert (recovered_maps >= 4) P = compute_code(X, dict_mf.Q_, alpha=1e-3) Y = P.T.dot(dict_mf.Q_) assert_array_almost_equal(X, Y, decimal=2) # Much stronger assert_array_almost_equal(X, Y, decimal=2)
data = faces_centered cb = Callback(data) estimator = DictMF(n_components=n_components, batch_size=10, reduction=10, l1_ratio=1, alpha=0.001, max_n_iter=10000, projection='partial', backend='c', verbose=3, learning_rate=.8, offset=0, random_state=2, callback=cb) estimator.fit(data) train_time = (time.time() - t0) print("done in %0.3fs" % train_time) import matplotlib.pyplot as plt components_ = estimator.components_ plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components]) P = estimator.transform(data) # plot_gallery('Original faces', # data[:n_components]) plot_gallery('Residual', data[:n_components] - P.T.dot(estimator.components_)[:n_components]) fig, ax = plt.subplots(1, 1, sharex=True) ax.plot(cb.iter, cb.obj, label='P')
cb = Callback(data) estimator = DictMF(n_components=n_components, batch_size=10, reduction=10, l1_ratio=1, alpha=0.001, max_n_iter=10000, projection='partial', backend='c', verbose=3, learning_rate=.8, offset=0, random_state=2, callback=cb) estimator.fit(data) train_time = (time.time() - t0) print("done in %0.3fs" % train_time) import matplotlib.pyplot as plt components_ = estimator.components_ plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components]) P = estimator.transform(data) # plot_gallery('Original faces', # data[:n_components]) plot_gallery( 'Residual', data[:n_components] - P.T.dot(estimator.components_)[:n_components]) fig, ax = plt.subplots(1, 1, sharex=True)