def test_tol_iter(self, power, maxiter, tol): X, E, U, S, V = rpca_godec(self.X, rank=self.rank, power=power, maxiter=maxiter, tol=tol) compare_norms(X, self.A)
def test_regularization(self): X, E, G, U, S, V = rpca_godec( self.X, rank=self.rank, lambda1=self.lambda1) # Check the low-rank component MSE normX = np.linalg.norm(X - self.A) / (self.m * self.n) nt.assert_true(normX < self.tol)
def test_regularization(self): X, E, G, U, S, V = rpca_godec(self.X, rank=self.rank, lambda1=self.lambda1) # Check the low-rank component MSE normX = np.linalg.norm(X - self.A) / (self.m * self.n) nt.assert_true(normX < self.tol)
def test_regularization(self): X, E, U, S, V = rpca_godec(self.X, rank=self.rank, lambda1=0.01) compare_norms(X, self.A)
def test_default(self): X, E, U, S, V = rpca_godec(self.X, rank=self.rank) compare_norms(X, self.A)
def test_tol(self): X, E, G, U, S, V = rpca_godec(self.X, rank=self.rank, tol=1e-4) # Check the low-rank component MSE normX = np.linalg.norm(X - self.A) / (self.m * self.n) nt.assert_true(normX < self.tol)
def decomposition(self, normalize_poissonian_noise=False, algorithm='svd', output_dimension=None, centre=None, auto_transpose=True, navigation_mask=None, signal_mask=None, var_array=None, var_func=None, polyfit=None, reproject=None, return_info=False, **kwargs): """Decomposition with a choice of algorithms The results are stored in self.learning_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' | 'sparse_pca' | 'mini_batch_sparse_pca' | 'RPCA_GoDec' | 'ORPCA' output_dimension : None or int number of components to keep/calculate centre : None | 'variables' | 'trials' If None no centring is applied. If 'variable' the centring will be performed in the variable axis. If 'trials', the centring will be performed in the 'trials' axis. It only has effect when using the svd or fast_svd algorithms auto_transpose : bool If True, automatically transposes the data to boost performance. Only has effect when using the svd of fast_svd algorithms. navigation_mask : boolean numpy array The navigation locations marked as True are not used in the decompostion. signal_mask : boolean numpy array The signal locations marked as True are not used in the decomposition. var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomial. reproject : None | signal | navigation | both If not None, the results of the decomposition will be projected in the selected masked area. return_info: bool, default False The result of the decomposition is stored internally. However, some algorithms generate some extra information that is not stored. If True (the default is False) return any extra information if available Returns ------- (X, E) : (numpy array, numpy array) If 'algorithm' == 'RPCA_GoDec' or 'ORPCA' and 'return_info' is True, returns the low-rank (X) and sparse (E) matrices from robust PCA. See also -------- plot_decomposition_factors, plot_decomposition_loadings, plot_lev """ to_return = None # Check if it is the wrong data type if self.data.dtype.char not in ['e', 'f', 'd']: # If not float _logger.warning( 'To perform a decomposition the data must be of the float ' 'type. You can change the type using the change_dtype method' ' e.g. s.change_dtype(\'float64\')\n' 'Nothing done.') return if self.axes_manager.navigation_size < 2: raise AttributeError("It is not possible to decompose a dataset " "with navigation_size < 2") # backup the original data self._data_before_treatments = self.data.copy() # set the output target (peak results or not?) target = LearningResults() if algorithm == 'mlpca': if normalize_poissonian_noise is True: _logger.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: raise ValueError("With the MLPCA algorithm the " "output_dimension must be specified") if algorithm == 'RPCA_GoDec' or algorithm == 'ORPCA': if output_dimension is None: raise ValueError("With the robust PCA algorithms ('RPCA_GoDec' " "and 'ORPCA'), the output_dimension " "must be specified") # Apply pre-treatments # Transform the data in a line spectrum self._unfolded4decomposition = self.unfold() try: if hasattr(navigation_mask, 'ravel'): navigation_mask = navigation_mask.ravel() if hasattr(signal_mask, 'ravel'): signal_mask = signal_mask.ravel() # Normalize the poissonian noise # TODO this function can change the masks and this can cause # problems when reprojecting if normalize_poissonian_noise is True: self.normalize_poissonian_noise( navigation_mask=navigation_mask, signal_mask=signal_mask,) _logger.info('Performing decomposition analysis') # The rest of the code assumes that the first data axis # is the navigation axis. We transpose the data if that is not the # case. dc = (self.data if self.axes_manager[0].index_in_array == 0 else self.data.T) # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) else: navigation_mask = ~navigation_mask if signal_mask is None: signal_mask = slice(None) else: signal_mask = ~signal_mask # WARNING: signal_mask and navigation_mask values are now their # negaties i.e. True -> False and viceversa. However, the # stored value (at the end of the method) coincides with the # input masks # Reset the explained_variance which is not set by all the # algorithms explained_variance = None explained_variance_ratio = None mean = None if algorithm == 'svd': factors, loadings, explained_variance, mean = svd_pca( dc[:, signal_mask][navigation_mask, :], centre=centre, auto_transpose=auto_transpose) elif algorithm == 'fast_svd': factors, loadings, explained_variance, mean = svd_pca( dc[:, signal_mask][navigation_mask, :], fast=True, output_dimension=output_dimension, centre=centre, auto_transpose=auto_transpose) elif algorithm == 'sklearn_pca': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.PCA(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform(( dc[:, signal_mask][navigation_mask, :])) factors = sk.components_.T explained_variance = sk.explained_variance_ mean = sk.mean_ centre = 'trials' if return_info: to_return = sk elif algorithm == 'nmf': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.NMF(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform(( dc[:, signal_mask][navigation_mask, :])) factors = sk.components_.T if return_info: to_return = sk elif algorithm == 'sparse_pca': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.SparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:, signal_mask][navigation_mask, :]) factors = sk.components_.T if return_info: to_return = sk elif algorithm == 'mini_batch_sparse_pca': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.MiniBatchSparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:, signal_mask][navigation_mask, :]) factors = sk.components_.T if return_info: to_return = sk elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': _logger.info("Performing the MLPCA training") if output_dimension is None: raise ValueError( "For MLPCA it is mandatory to define the " "output_dimension") if var_array is None and var_func is None: _logger.info('No variance array provided.' 'Assuming poissonian data') var_array = dc[:, signal_mask][navigation_mask, :] if var_array is not None and var_func is not None: raise ValueError( "You have defined both the var_func and var_array " "keywords." "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func( dc[signal_mask, ...][:, navigation_mask]) else: try: var_array = np.polyval( polyfit, dc[ signal_mask, navigation_mask]) except: raise ValueError( 'var_func must be either a function or an ' 'array defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True U, S, V, Sobj, ErrFlag = mlpca( dc[:, signal_mask][navigation_mask, :], var_array, output_dimension, fast=fast) loadings = U * S factors = V explained_variance_ratio = S ** 2 / Sobj explained_variance = S ** 2 / len(factors) elif algorithm == 'RPCA_GoDec': _logger.info("Performing Robust PCA with GoDec") X, E, G, U, S, V = rpca_godec( dc[:, signal_mask][navigation_mask, :], rank=output_dimension, fast=True, **kwargs) loadings = U * S factors = V explained_variance = S ** 2 / len(factors) if return_info: to_return = (X, E) elif algorithm == 'ORPCA': _logger.info("Performing Online Robust PCA") X, E, U, S, V = orpca( dc[:, signal_mask][navigation_mask, :], rank=output_dimension, fast=True, **kwargs) loadings = U * S factors = V explained_variance = S ** 2 / len(factors) if return_info: to_return = (X, E) else: raise ValueError('Algorithm not recognised. ' 'Nothing done') # We must calculate the ratio here because otherwise the sum # information can be lost if the user call # crop_decomposition_dimension if explained_variance is not None and \ explained_variance_ratio is None: explained_variance_ratio = \ explained_variance / explained_variance.sum() # Store the results in learning_results target.factors = factors target.loadings = loadings target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio target.decomposition_algorithm = algorithm target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4decomposition target.centre = centre target.mean = mean if output_dimension and factors.shape[1] != output_dimension: target.crop_decomposition_dimension(output_dimension) # Delete the unmixing information, because it'll refer to a # previous decomposition target.unmixing_matrix = None target.bss_algorithm = None if self._unfolded4decomposition is True: folding = \ self.metadata._HyperSpy.Folding target.original_shape = folding.original_shape # Reproject if mean is None: mean = 0 if reproject in ('navigation', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): loadings_ = np.dot(dc[:, signal_mask] - mean, factors) else: loadings_ = sk.transform(dc[:, signal_mask]) target.loadings = loadings_ if reproject in ('signal', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): factors = np.dot(np.linalg.pinv(loadings), dc[navigation_mask, :] - mean).T target.factors = factors else: _logger.info("Reprojecting the signal is not yet " "supported for this algorithm") if reproject == 'both': reproject = 'signal' else: reproject = None # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.factors[:] *= self._root_bH.T target.loadings[:] *= self._root_aG # Set the pixels that were not processed to nan if not isinstance(signal_mask, slice): # Store the (inverted, as inputed) signal mask target.signal_mask = ~signal_mask.reshape( self.axes_manager._signal_shape_in_array) if reproject not in ('both', 'signal'): factors = np.zeros((dc.shape[-1], target.factors.shape[1])) factors[signal_mask, :] = target.factors factors[~signal_mask, :] = np.nan target.factors = factors if not isinstance(navigation_mask, slice): # Store the (inverted, as inputed) navigation mask target.navigation_mask = ~navigation_mask.reshape( self.axes_manager._navigation_shape_in_array) if reproject not in ('both', 'navigation'): loadings = np.zeros( (dc.shape[0], target.loadings.shape[1])) loadings[navigation_mask, :] = target.loadings loadings[~navigation_mask, :] = np.nan target.loadings = loadings finally: if self._unfolded4decomposition is True: self.fold() self._unfolded4decomposition is False self.learning_results.__dict__.update(target.__dict__) # undo any pre-treatments self.undo_treatments() return to_return
def test_iter(self): X, E, G, U, S, V = rpca_godec(self.X, rank=self.rank, maxiter=1e4) # Check the low-rank component MSE normX = np.linalg.norm(X - self.A) / (self.m * self.n) assert normX < self.tol
def decomposition(self, normalize_poissonian_noise=False, algorithm='svd', output_dimension=None, centre=None, auto_transpose=True, navigation_mask=None, signal_mask=None, var_array=None, var_func=None, polyfit=None, reproject=None, return_info=False, **kwargs): """Decomposition with a choice of algorithms The results are stored in self.learning_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' | 'sparse_pca' | 'mini_batch_sparse_pca' | 'RPCA_GoDec' | 'ORPCA' output_dimension : None or int number of components to keep/calculate centre : None | 'variables' | 'trials' If None no centring is applied. If 'variable' the centring will be performed in the variable axis. If 'trials', the centring will be performed in the 'trials' axis. It only has effect when using the svd or fast_svd algorithms auto_transpose : bool If True, automatically transposes the data to boost performance. Only has effect when using the svd of fast_svd algorithms. navigation_mask : boolean numpy array The navigation locations marked as True are not used in the decompostion. signal_mask : boolean numpy array The signal locations marked as True are not used in the decomposition. var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomial. reproject : None | signal | navigation | both If not None, the results of the decomposition will be projected in the selected masked area. return_info: bool, default False The result of the decomposition is stored internally. However, some algorithms generate some extra information that is not stored. If True (the default is False) return any extra information if available Returns ------- (X, E) : (numpy array, numpy array) If 'algorithm' == 'RPCA_GoDec' or 'ORPCA' and 'return_info' is True, returns the low-rank (X) and sparse (E) matrices from robust PCA. See also -------- plot_decomposition_factors, plot_decomposition_loadings, plot_lev """ to_return = None # Check if it is the wrong data type if self.data.dtype.char not in ['e', 'f', 'd']: # If not float _logger.warning( 'To perform a decomposition the data must be of the float ' 'type. You can change the type using the change_dtype method' ' e.g. s.change_dtype(\'float64\')\n' 'Nothing done.') return if self.axes_manager.navigation_size < 2: raise AttributeError("It is not possible to decompose a dataset " "with navigation_size < 2") # backup the original data self._data_before_treatments = self.data.copy() # set the output target (peak results or not?) target = LearningResults() if algorithm == 'mlpca': if normalize_poissonian_noise is True: _logger.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: raise ValueError("With the MLPCA algorithm the " "output_dimension must be specified") if algorithm == 'RPCA_GoDec' or algorithm == 'ORPCA': if output_dimension is None: raise ValueError( "With the robust PCA algorithms ('RPCA_GoDec' " "and 'ORPCA'), the output_dimension " "must be specified") # Apply pre-treatments # Transform the data in a line spectrum self._unfolded4decomposition = self.unfold() try: if hasattr(navigation_mask, 'ravel'): navigation_mask = navigation_mask.ravel() if hasattr(signal_mask, 'ravel'): signal_mask = signal_mask.ravel() # Normalize the poissonian noise # TODO this function can change the masks and this can cause # problems when reprojecting if normalize_poissonian_noise is True: self.normalize_poissonian_noise( navigation_mask=navigation_mask, signal_mask=signal_mask, ) _logger.info('Performing decomposition analysis') # The rest of the code assumes that the first data axis # is the navigation axis. We transpose the data if that is not the # case. dc = (self.data if self.axes_manager[0].index_in_array == 0 else self.data.T) # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) else: navigation_mask = ~navigation_mask if signal_mask is None: signal_mask = slice(None) else: signal_mask = ~signal_mask # WARNING: signal_mask and navigation_mask values are now their # negaties i.e. True -> False and viceversa. However, the # stored value (at the end of the method) coincides with the # input masks # Reset the explained_variance which is not set by all the # algorithms explained_variance = None explained_variance_ratio = None mean = None if algorithm == 'svd': factors, loadings, explained_variance, mean = svd_pca( dc[:, signal_mask][navigation_mask, :], centre=centre, auto_transpose=auto_transpose) elif algorithm == 'fast_svd': factors, loadings, explained_variance, mean = svd_pca( dc[:, signal_mask][navigation_mask, :], fast=True, output_dimension=output_dimension, centre=centre, auto_transpose=auto_transpose) elif algorithm == 'sklearn_pca': if import_sklearn.sklearn_installed is False: raise ImportError('sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.PCA(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform( (dc[:, signal_mask][navigation_mask, :])) factors = sk.components_.T explained_variance = sk.explained_variance_ mean = sk.mean_ centre = 'trials' if return_info: to_return = sk elif algorithm == 'nmf': if import_sklearn.sklearn_installed is False: raise ImportError('sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.NMF(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform( (dc[:, signal_mask][navigation_mask, :])) factors = sk.components_.T if return_info: to_return = sk elif algorithm == 'sparse_pca': if import_sklearn.sklearn_installed is False: raise ImportError('sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.SparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:, signal_mask][navigation_mask, :]) factors = sk.components_.T if return_info: to_return = sk elif algorithm == 'mini_batch_sparse_pca': if import_sklearn.sklearn_installed is False: raise ImportError('sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.MiniBatchSparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:, signal_mask][navigation_mask, :]) factors = sk.components_.T if return_info: to_return = sk elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': _logger.info("Performing the MLPCA training") if output_dimension is None: raise ValueError("For MLPCA it is mandatory to define the " "output_dimension") if var_array is None and var_func is None: _logger.info('No variance array provided.' 'Assuming poissonian data') var_array = dc[:, signal_mask][navigation_mask, :] if var_array is not None and var_func is not None: raise ValueError( "You have defined both the var_func and var_array " "keywords." "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func(dc[signal_mask, ...][:, navigation_mask]) else: try: var_array = np.polyval( polyfit, dc[signal_mask, navigation_mask]) except: raise ValueError( 'var_func must be either a function or an ' 'array defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True U, S, V, Sobj, ErrFlag = mlpca( dc[:, signal_mask][navigation_mask, :], var_array, output_dimension, fast=fast) loadings = U * S factors = V explained_variance_ratio = S**2 / Sobj explained_variance = S**2 / len(factors) elif algorithm == 'RPCA_GoDec': _logger.info("Performing Robust PCA with GoDec") X, E, G, U, S, V = rpca_godec( dc[:, signal_mask][navigation_mask, :], rank=output_dimension, fast=True, **kwargs) loadings = U * S factors = V explained_variance = S**2 / len(factors) if return_info: to_return = (X, E) elif algorithm == 'ORPCA': _logger.info("Performing Online Robust PCA") X, E, U, S, V = orpca(dc[:, signal_mask][navigation_mask, :], rank=output_dimension, fast=True, **kwargs) loadings = U * S factors = V explained_variance = S**2 / len(factors) if return_info: to_return = (X, E) else: raise ValueError('Algorithm not recognised. ' 'Nothing done') # We must calculate the ratio here because otherwise the sum # information can be lost if the user call # crop_decomposition_dimension if explained_variance is not None and \ explained_variance_ratio is None: explained_variance_ratio = \ explained_variance / explained_variance.sum() # Store the results in learning_results target.factors = factors target.loadings = loadings target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio target.decomposition_algorithm = algorithm target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4decomposition target.centre = centre target.mean = mean if output_dimension and factors.shape[1] != output_dimension: target.crop_decomposition_dimension(output_dimension) # Delete the unmixing information, because it'll refer to a # previous decomposition target.unmixing_matrix = None target.bss_algorithm = None if self._unfolded4decomposition is True: folding = \ self.metadata._HyperSpy.Folding target.original_shape = folding.original_shape # Reproject if mean is None: mean = 0 if reproject in ('navigation', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): loadings_ = np.dot(dc[:, signal_mask] - mean, factors) else: loadings_ = sk.transform(dc[:, signal_mask]) target.loadings = loadings_ if reproject in ('signal', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): factors = np.dot(np.linalg.pinv(loadings), dc[navigation_mask, :] - mean).T target.factors = factors else: _logger.info("Reprojecting the signal is not yet " "supported for this algorithm") if reproject == 'both': reproject = 'signal' else: reproject = None # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.factors[:] *= self._root_bH.T target.loadings[:] *= self._root_aG # Set the pixels that were not processed to nan if not isinstance(signal_mask, slice): # Store the (inverted, as inputed) signal mask target.signal_mask = ~signal_mask.reshape( self.axes_manager._signal_shape_in_array) if reproject not in ('both', 'signal'): factors = np.zeros((dc.shape[-1], target.factors.shape[1])) factors[signal_mask, :] = target.factors factors[~signal_mask, :] = np.nan target.factors = factors if not isinstance(navigation_mask, slice): # Store the (inverted, as inputed) navigation mask target.navigation_mask = ~navigation_mask.reshape( self.axes_manager._navigation_shape_in_array) if reproject not in ('both', 'navigation'): loadings = np.zeros( (dc.shape[0], target.loadings.shape[1])) loadings[navigation_mask, :] = target.loadings loadings[~navigation_mask, :] = np.nan target.loadings = loadings finally: if self._unfolded4decomposition is True: self.fold() self._unfolded4decomposition is False self.learning_results.__dict__.update(target.__dict__) # undo any pre-treatments self.undo_treatments() return to_return