def test_dict_mf_reconstruction(backend): X, Q = generate_synthetic() dict_mf = DictMF( n_components=4, alpha=1e-4, max_n_iter=300, l1_ratio=0, backend=backend, random_state=rng_global, reduction=1 ) dict_mf.fit(X) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.components_) assert_array_almost_equal(X, Y, decimal=1)
def test_dict_mf_reconstruction_reduction(backend): X, Q = generate_synthetic(n_features=20, n_samples=400, dictionary_rank=5) dict_mf = DictMF( n_components=4, alpha=1e-6, max_n_iter=800, l1_ratio=0, backend=backend, random_state=rng_global, reduction=2 ) dict_mf.fit(X) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.components_) rel_error = np.sum((X - Y) ** 2) / np.sum(X ** 2) assert rel_error < 0.06
def test_dict_mf_reconstruction(backend): X, Q = generate_synthetic() dict_mf = DictMF(n_components=4, alpha=1e-4, max_n_iter=300, l1_ratio=0, backend=backend, random_state=rng_global, reduction=1) dict_mf.fit(X) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.components_) assert_array_almost_equal(X, Y, decimal=1)
def test_dict_mf_reconstruction_reduction(backend): X, Q = generate_synthetic(n_features=20, n_samples=400, dictionary_rank=5) dict_mf = DictMF(n_components=4, alpha=1e-6, max_n_iter=800, l1_ratio=0, backend=backend, random_state=rng_global, reduction=2) dict_mf.fit(X) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.components_) rel_error = np.sum((X - Y)**2) / np.sum(X**2) assert (rel_error < 0.06)
def test_dict_mf_reconstruction_sparse_dict(backend, var_red): X, Q = generate_sparse_synthetic(300, 4) rng = check_random_state(0) dict_init = Q + rng.randn(*Q.shape) * 0.01 dict_mf = DictMF(n_components=4, alpha=1e-4, max_n_iter=400, l1_ratio=1, dict_init=dict_init, backend=backend, var_red=var_red, random_state=rng_global) dict_mf.fit(X) Q_rec = dict_mf.components_ Q_rec /= np.sqrt(np.sum(Q_rec ** 2, axis=1))[:, np.newaxis] Q /= np.sqrt(np.sum(Q ** 2, axis=1))[:, np.newaxis] G = np.abs(Q_rec.dot(Q.T)) recovered_maps = min(np.sum(np.any(G > 0.95, axis=1)), np.sum(np.any(G > 0.95, axis=0))) assert (recovered_maps >= 4) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.components_)
def single_run(n_components, var_red, projection, offset, learning_rate, reduction, alpha, data): cb = Callback(data) estimator = DictMF(n_components=n_components, batch_size=10, reduction=reduction, l1_ratio=1, alpha=alpha, max_n_iter=20000, projection=projection, var_red=var_red, backend='python', verbose=3, learning_rate=learning_rate, offset=offset, random_state=0, callback=cb) estimator.fit(data) return cb, estimator
def test_dict_mf_reconstruction_sparse_dict(backend): X, Q = generate_sparse_synthetic(300, 4) rng = check_random_state(0) dict_init = Q + rng.randn(*Q.shape) * 0.01 dict_mf = DictMF(n_components=4, alpha=1e-4, max_n_iter=400, l1_ratio=1, dict_init=dict_init, backend=backend, random_state=rng_global) dict_mf.fit(X) Q_rec = dict_mf.components_ Q_rec /= np.sqrt(np.sum(Q_rec**2, axis=1))[:, np.newaxis] Q /= np.sqrt(np.sum(Q**2, axis=1))[:, np.newaxis] G = np.abs(Q_rec.dot(Q.T)) recovered_maps = min(np.sum(np.any(G > 0.95, axis=1)), np.sum(np.any(G > 0.95, axis=0))) assert (recovered_maps >= 4) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.components_)
def test_dict_mf_reconstruction_sparse(backend): X, Q = generate_synthetic(n_features=20, n_samples=200, dictionary_rank=5) sp_X = np.zeros((X.shape[0] * 2, X.shape[1])) # Generate a sparse simple problem for i in range(X.shape[0]): perm = rng_global.permutation(X.shape[1]) even_range = perm[::2] odd_range = perm[1::2] sp_X[2 * i, even_range] = X[i, even_range] sp_X[2 * i, odd_range] = X[i, odd_range] sp_X = sp.csr_matrix(sp_X) dict_mf = DictMF(n_components=4, alpha=1e-6, max_n_iter=500, l1_ratio=0, backend=backend, random_state=rng_global) dict_mf.fit(sp_X) P = dict_mf.transform(X) Y = P.T.dot(dict_mf.Q_) rel_error = np.sum((X - Y)**2) / np.sum(X**2) assert (rel_error < 0.02)
def test_dict_mf_reconstruction_sparse_dict(backend): X, Q = generate_sparse_synthetic(300, 4) dict_init = Q + rng_global.randn(*Q.shape) * 0.01 dict_mf = DictMF(n_components=4, alpha=1e-2, max_n_iter=300, l1_ratio=1, dict_init=dict_init, backend=backend, random_state=rng_global) dict_mf.fit(X) Q_rec = dict_mf.Q_ Q_rec /= np.sqrt(np.sum(Q_rec**2, axis=1))[:, np.newaxis] Q /= np.sqrt(np.sum(Q**2, axis=1))[:, np.newaxis] G = np.abs(Q_rec.dot(Q.T)) recovered_maps = min(np.sum(np.any(G > 0.95, axis=1)), np.sum(np.any(G > 0.95, axis=0))) assert (recovered_maps >= 4) P = compute_code(X, dict_mf.Q_, alpha=1e-3) Y = P.T.dot(dict_mf.Q_) assert_array_almost_equal(X, Y, decimal=2) # Much stronger assert_array_almost_equal(X, Y, decimal=2)
############################################################################### # Do the estimation and plot it name = 'MODL' print("Extracting the top %d %s..." % (n_components, name)) t0 = time.time() data = faces_centered cb = Callback(data) estimator = DictMF(n_components=n_components, batch_size=10, reduction=10, l1_ratio=1, alpha=0.001, max_n_iter=10000, projection='partial', backend='c', verbose=3, learning_rate=.8, offset=0, random_state=2, callback=cb) estimator.fit(data) train_time = (time.time() - t0) print("done in %0.3fs" % train_time) import matplotlib.pyplot as plt components_ = estimator.components_ plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components]) P = estimator.transform(data)
def fit(self, imgs, y=None, confounds=None): """Compute the mask and the ICA maps across subjects Parameters ---------- imgs: list of Niimg-like objects See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg. Data on which PCA must be calculated. If this is a list, the affine is considered the same for all. confounds: CSV file path or 2D matrix This parameter is passed to nilearn.signal.clean. Please see the related documentation for details """ # Base logic for decomposition estimators BaseDecomposition.fit(self, imgs) random_state = check_random_state(self.random_state) n_epochs = int(self.n_epochs) if self.n_epochs < 1: raise ValueError('Number of n_epochs should be at least one,' ' got {r}'.format(self.n_epochs)) if confounds is None: confounds = itertools.repeat(None) dict_mf = DictMF(n_components=self.n_components, alpha=self.alpha, reduction=self.reduction, batch_size=self.batch_size, random_state=random_state, l1_ratio=1, backend=self.backend, verbose=max(0, self.verbose - 1)) data_list = mask_and_reduce(self.masker_, imgs, confounds, n_components=self.n_components, reduction_method=None, random_state=self.random_state, memory=self.memory, memory_level= max(0, self.memory_level - 1), as_shelved_list=True, verbose=max(0, self.verbose - 1), n_jobs=self.n_jobs) data_list = itertools.chain(*[random_state.permutation( data_list) for _ in range(n_epochs)]) for record, data in enumerate(data_list): if self.verbose: print('Streaming record %s' % record) data = data.get() dict_mf.partial_fit(data) self.components_ = dict_mf.Q_ # Post processing normalization S = np.sqrt(np.sum(self.components_ ** 2, axis=1)) S[S == 0] = 1 self.components_ /= S[:, np.newaxis] # flip signs in each composant positive part is l1 larger # than negative part for component in self.components_: if np.sum(component > 0) < np.sum(component < 0): component *= -1 return self
def fit(self, imgs, y=None, confounds=None): """Compute the mask and the ICA maps across subjects Parameters ---------- imgs: list of Niimg-like objects See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg. Data on which PCA must be calculated. If this is a list, the affine is considered the same for all. confounds: CSV file path or 2D matrix This parameter is passed to nilearn.signal.clean. Please see the related documentation for details """ # Base logic for decomposition estimators BaseDecomposition.fit(self, imgs) random_state = check_random_state(self.random_state) n_epochs = int(self.n_epochs) if self.n_epochs < 1: raise ValueError('Number of n_epochs should be at least one,' ' got {r}'.format(self.n_epochs)) if confounds is None: confounds = itertools.repeat(None) dict_mf = DictMF(n_components=self.n_components, alpha=self.alpha, reduction=self.reduction, batch_size=self.batch_size, random_state=random_state, l1_ratio=1, backend=self.backend, verbose=max(0, self.verbose - 1)) data_list = mask_and_reduce(self.masker_, imgs, confounds, n_components=self.n_components, reduction_method=None, random_state=self.random_state, memory=self.memory, memory_level=max(0, self.memory_level - 1), as_shelved_list=True, verbose=max(0, self.verbose - 1), n_jobs=self.n_jobs) data_list = itertools.chain( *[random_state.permutation(data_list) for _ in range(n_epochs)]) for record, data in enumerate(data_list): if self.verbose: print('Streaming record %s' % record) data = data.get() dict_mf.partial_fit(data) self.components_ = dict_mf.Q_ # Post processing normalization S = np.sqrt(np.sum(self.components_**2, axis=1)) S[S == 0] = 1 self.components_ /= S[:, np.newaxis] # flip signs in each composant positive part is l1 larger # than negative part for component in self.components_: if np.sum(component > 0) < np.sum(component < 0): component *= -1 return self
############################################################################### # Do the estimation and plot it name = 'MODL' print("Extracting the top %d %s..." % (n_components, name)) t0 = time.time() data = faces_centered cb = Callback(data) estimator = DictMF(n_components=n_components, batch_size=10, reduction=10, l1_ratio=1, alpha=0.001, max_n_iter=10000, projection='partial', backend='c', verbose=3, learning_rate=.8, offset=0, random_state=2, callback=cb) estimator.fit(data) train_time = (time.time() - t0) print("done in %0.3fs" % train_time) import matplotlib.pyplot as plt components_ = estimator.components_ plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components])