def test_rqb_block_float64(): m, k = 100, 10 A = np.random.randn(m, k).astype(np.float64) A = A.dot(A.T) Q, B = rqb_block(A, k, oversample=5, n_subspace=2, n_blocks=4) Ak = Q.dot(B) assert relative_error(A, Ak) < atol_float64
def test_rqb_block_wide_complex128(): m, k = 100, 10 A = np.random.randn(m, k).astype(np.float64) + \ 1j * np.random.randn(m, k).astype(np.float64) A = A.dot(A.conj().T) A = A[0:40,:] Q, B = rqb_block(A, k, oversample=5, n_subspace=2, n_blocks=4) Ak = Q.dot(B) assert relative_error(A, Ak) < atol_float64
def rspca(X, n_components, alpha=0.1, beta=0.1, max_iter=1000, regularizer='l1', tol=1e-5, verbose=0, oversample=50, n_subspace=2, n_blocks=1, random_state=None): r"""Randomized Sparse Principal Component Analysis (rSPCA). Given a mean centered rectangular matrix `A` with shape `(m, n)`, SPCA computes a set of sparse components that can optimally reconstruct the input data. The amount of sparseness is controllable by the coefficient of the L1 penalty, given by the parameter alpha. In addition, some ridge shrinkage can be applied in order to improve conditioning. This algorithm uses randomized methods for linear algebra to accelerate the computations. The quality of the approximation can be controlled via the oversampling parameter `oversample` and `n_subspace` which specifies the number of subspace iterations. Parameters ---------- X : array_like, shape `(m, n)`. Real nonnegative input matrix. n_components : integer, `n_components << min{m,n}`. Target rank, i.e., number of sparse components to be computed. alpha : float, (default ``alpha = 0.1``). Sparsity controlling parameter. Higher values lead to sparser components. beta : float, (default ``beta = 0.1``). Amount of ridge shrinkage to apply in order to improve conditionin. regularizer : string {'l0', 'l1'}. Type of sparsity-inducing regularizer. The l1 norm (also known as LASSO) leads to softhreshold operator (default). The l0 norm is implemented via a hardthreshold operator. max_iter : integer, (default ``max_iter = 500``). Maximum number of iterations to perform before exiting. tol : float, (default ``tol = 1e-5``). Stopping tolerance for reconstruction error. verbose : bool ``{'True', 'False'}``, optional (default ``verbose = True``). Display progress. oversample : integer, optional (default: 10) Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy. n_blocks : integer, default: 2. Paramter to control in how many blocks of columns the input matrix should be split. A larger number requires less fast memory, while it leads to a higher computational time. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. Returns ------- B: array_like, `(n, n_components)`. Sparse components extracted from the data. A : array_like, `(n, n_components)`. Orthogonal components extracted from the data. eigvals : array_like, `(n_components)`. Eigenvalues correspnding to the extracted components. obj : array_like, `(n_iter)`. Objective value at the i-th iteration. Notes ----- Variable Projection for SPCA solves the following optimization problem: minimize :math:`1/2 \| X - X B A^T \|^2 + \alpha \|B\|_1 + 1/2 \beta \|B\|^2` """ # Shape of data matrix m = X.shape[0] # Compute QB decomposition Q, Xcompressed = rqb_block(X, rank=n_components, oversample=oversample, n_subspace=n_subspace, n_blocks=n_blocks, random_state=random_state) # Compute Sparse PCA B, A, eigvals, obj = spca(Xcompressed, n_components=n_components, alpha=alpha, beta=beta, regularizer=regularizer, max_iter=max_iter, tol=tol, verbose=verbose) # rescale eigen values eigvals = eigvals * (n_components + oversample - 1) / (m - 1) return B, A, eigvals, obj