def add_data(self, g, h, trn_graph, trn_x_index, trn_y_index, tst_graph, tst_x_index, tst_y_index, k=500, pos_up_ratio=5.0): """ """ self.g = g # ng * ng self.h = h # nh * nh self.trn_graph = trn_graph # ng * nh (data are the corresponding instances) self.tst_graph = tst_graph # ng * nh (data are the corresponding instances) self.ng = g.shape[0] self.nh = h.shape[0] self.sym_g = self.gen_sym_graph(self.g) self.sym_h = self.gen_sym_graph(self.h) U, s, Vh = svdp(self.trn_graph, k=k) self.gX = U * np.sqrt(s) self.hX = Vh.T * np.sqrt(s) self.pos_trn_x_index, self.pos_trn_y_index = self.trn_graph.nonzero() self.trn_x_index, self.trn_y_index = trn_x_index, trn_y_index self.tst_x_index, self.tst_y_index = tst_x_index, tst_y_index self.pos_up_ratio = pos_up_ratio print 'bipartite shape:', trn_graph.shape print 'pos_num:', len(self.pos_trn_x_index) print 'total training:', len(self.trn_x_index) print 'pos_up_ratio:', self.pos_up_ratio
def check_svdp(n, m, constructor, dtype, k, irl_mode, which, f=0.8): tol = TOLS[dtype] M = generate_matrix(np.asarray, n, m, f, dtype) Msp = CONSTRUCTORS[constructor](M) u1, sigma1, vt1 = np.linalg.svd(M, full_matrices=False) u2, sigma2, vt2 = svdp(Msp, k=k, which=which, irl_mode=irl_mode, tol=tol) # check the which if which.upper() == 'S': u1 = np.roll(u1, k, 1) vt1 = np.roll(vt1, k, 0) sigma1 = np.roll(sigma1, k) elif which.upper() == 'L': pass else: raise ValueError("which = '%s' not recognized") # check that singular values agree assert_allclose(sigma1[:k], sigma2, rtol=tol, atol=tol) # check that singular vectors are orthogonal assert_orthogonal(u1, u2, rtol=tol, atol=tol) assert_orthogonal(vt1.T, vt2.T, rtol=tol, atol=tol)
def svdPropack(X, k, kmax=None): """ Perform the SVD of a sparse matrix X using PROPACK for the largest k singular values. :param X: The input matrix as scipy.sparse.csc_matrix or a LinearOperator :param k: The number of singular vectors/values for None for all :param kmax: The maximal number of iterations / maximal dimension of Krylov subspace. """ from pypropack import svdp import sppy if k==None: k = min(X.shape[0], X.shape[1]) if kmax==None: kmax = SparseUtils.kmaxMultiplier*k if scipy.sparse.isspmatrix(X): L = scipy.sparse.linalg.aslinearoperator(X) elif type(X) == sppy.csarray: L = sppy.linalg.GeneralLinearOperator.asLinearOperator(X) else: L = X U, s, VT, info, sigma_bound = svdp(L, k, kmax=kmax, full_output=True) if info > 0: logging.debug("An invariant subspace of dimension " + str(info) + " was found.") elif info==-1: logging.warning(str(k) + " singular triplets did not converge within " + str(kmax) + " iterations") return U, s, VT.T
def update_X(self, X, mu, k=20): U, S, VT = svdp(X, k=k) P = np.c_[np.ones((k, 1)), 1 - S, 1. / 2. / mu - S] sigma_star = np.zeros(k) for t in range(k): p = P[t, :] delta = p[1]**2 - 4 * p[0] * p[2] if delta <= 0: sigma_star[t] = 0. else: solution = np.roots(p) solution = sorted(solution, key=abs) solution = np.array(solution) if solution[0] * solution[1] <= 0: sigma_star[t] = solution[1] elif solution[1] < 0: sigma_star[t] = 0. else: f = np.log(1 + solution[1]) + mu * (solution[1] - s[t])**2 if f > mu * s[t]**2: sigma_star[t] = 0. else: sigma_star[t] = solution[1] sigma_star = np.diag(sigma_star) sigma_star = np.dot(np.dot(U, sigma_star), VT) return sigma_star
def matrix_shrink_old(X,tau,sv,use_rand_svd=False): m = np.min(X.shape) if use_rand_svd: U,S,V = randomized_svd(X,int(sv)) elif choosvd_old(m,sv): U,S,V = svdp(X,int(sv)) else: U,S,V = LA.svd(X,full_matrices=0) r = np.sum(S > tau); if r > 0: Y = np.dot(U[:,:r]*(S[:r]-tau),V[:r,:]) return Y,r
def matrix_shrink(X,tau,sv,out=None,use_rand_svd=False): m = np.min(X.shape) if use_rand_svd: U,sig,V = randomized_svd(X,int(sv)) elif choosvd(m,sv): U,sig,V = svdp(X,int(sv)) else: U,sig,V = LA.svd(X,full_matrices=0) r = np.sum(sig > tau); if r > 0: np.multiply(U[:,:r],(sig[:r]-tau),out=X[:,:r]) Z = np.dot(X[:,:r],V[:r,:],out=out) else: out[:] = 0 Z = out return (Z,r)
def matrix_shrink(X, tau, sv, out=None, use_rand_svd=False): m = np.min(X.shape) if use_rand_svd: U, sig, V = randomized_svd(X, int(sv)) elif choosvd(m, sv): U, sig, V = svdp(X, int(sv)) else: U, sig, V = LA.svd(X, full_matrices=0) r = np.sum(sig > tau) if r > 0: np.multiply(U[:, :r], (sig[:r] - tau), out=X[:, :r]) Z = np.dot(X[:, :r], V[:r, :], out=out) else: out[:] = 0 Z = out return (Z, r)
def _determine_svd_function(self, svd_type, n_iter, truncated_algorithm): if svd_type == 'linalg': return lambda x: linalg.svd(x, False)[:2] elif svd_type == 'sparse': return lambda x: sparse.linalg.svds(x, np.min(x.shape) - 1)[:2] elif svd_type == 'truncated': if truncated_algorithm == 'arpack': return lambda x: self._calc_truncated_svd( x, n_iter, truncated_algorithm, np.min(x.shape) - 1) elif self.n_particle < self.n_dim_obs: return lambda x: self._calc_truncated_svd( x, n_iter, truncated_algorithm, np.min(x.shape)) else: raise ValueError('TruncatedSVD only use in the case of' + 'n_particle < n_dim_obs.') elif svd_type == 'propack': return lambda x: svdp( x, k=np.min([self.n_particle, self.n_dim_obs]))[:2] else: raise ValueError('you must check svd type.')
def svdPropack(X, k, kmax=None): """ Perform the SVD of a sparse matrix X using PROPACK for the largest k singular values. :param X: The input matrix as scipy.sparse.csc_matrix or a LinearOperator :param k: The number of singular vectors/values for None for all :param kmax: The maximal number of iterations / maximal dimension of Krylov subspace. """ from pypropack import svdp import sppy if k == None: k = min(X.shape[0], X.shape[1]) if kmax == None: kmax = SparseUtils.kmaxMultiplier * k if scipy.sparse.isspmatrix(X): L = scipy.sparse.linalg.aslinearoperator(X) elif type(X) == sppy.csarray: L = sppy.linalg.GeneralLinearOperator.asLinearOperator(X) else: L = X U, s, VT, info, sigma_bound = svdp(L, k, kmax=kmax, full_output=True) if info > 0: logging.debug("An invariant subspace of dimension " + str(info) + " was found.") elif info == -1: logging.warning( str(k) + " singular triplets did not converge within " + str(kmax) + " iterations") return U, s, VT.T
from sklearn.base import TransformerMixin, BaseEstimator import numpy as np import scipy.sparse as sp try: from pypropack import svdp raise ValueError svd = lambda X, k: svdp(X, k, 'L', kmax=max(100, 10 * k)) import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") except: from scipy.linalg import svd as svd_ def svd(X, k=-1): U, S, V = svd_(X, full_matrices=False) if k < 0: return U, S, V else: return U[:, :k], S[:k], V[:k, :] # The problem solved is # min : tau * (|A|_* + \lmbda |E|_1) + .5 * |(A,E)|_F^2 # subject to: A + E = D def _monitor(A, E, D, lmbda=0.1): diags = svd(A, min(A.shape))[1] print "|A|_*", np.abs(diags).sum(), print "|A|_0", (np.abs(diags) > 1e-6).sum(),
from sklearn.base import TransformerMixin, BaseEstimator import numpy as np import scipy.sparse as sp try: from pypropack import svdp raise ValueError svd = lambda X, k: svdp(X, k, 'L', kmax=max(100, 10 * k)) import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") except: from scipy.linalg import svd as svd_ def svd(X, k=-1): U, S, V = svd_(X, full_matrices=False) if k < 0: return U, S, V else: return U[:, :k], S[:k], V[:k, :] # The problem solved is # min : tau * (|A|_* + \lmbda |E|_1) + .5 * |(A,E)|_F^2 # subject to: A + E = D def _monitor(A, E, D, lmbda=0.1): diags = svd(A, min(A.shape))[1] print "|A|_*", np.abs(diags).sum() print "|A|_0", (np.abs(diags) > 1e-6).sum() print "|E|_1", np.abs(D - A).sum()
import numpy as np from pypropack import svdp from scipy.sparse import csr_matrix np.random.seed(0) # Create a random matrix A = np.random.random((10, 20)) # compute SVD via propack and lapack u, sigma, v = svdp(csr_matrix(A), 3) u1, sigma1, v1 = np.linalg.svd(A, full_matrices=False) # print the results np.set_printoptions(suppress=True, precision=8) print np.dot(u.T, u1) print print sigma print sigma1 print print np.dot(v, v1.T)
def ialm_RPCA(D, l=None, tol=1e-7, max_iter=1000, mu=1.25, rho=1.5): """ Parameters ---------- D : ndarray Input matrix, with size (m, n). l : float lamda, will be set to 1.0 / np.sqrt(m) if not specified. tol : float Tolerance for stopping criterion. max_iter : int Maximum number of iterations. Returns ------- A_hat : ndarray Low-rank array. E_hat : ndarray Sparse array. Copy Rights ----------- This is a Python version of implementation based on : http://perception.csl.illinois.edu/matrix-rank/sample_code.html I do not own the copy right of this. Minming Chen, October 2009. Questions? [email protected] Arvind Ganesh ([email protected]) Perception and Decision Laboratory, University of Illinois, Urbana-Champaign Microsoft Research Asia, Beijing References ---------- Kyle Kastner : https://kastnerkyle.github.io/posts/robust-matrix-decomposition/ Alex Pananicolaou : https://github.com/apapanico/RPCA """ m, n = D.shape if l == None: l = 1. / np.sqrt(m) Y = D.copy() norm_two = norm(Y.ravel(), 2) norm_inf = norm(Y.ravel(), np.inf) / l dual_norm = np.maximum(norm_two, norm_inf) Y = Y / dual_norm A_hat = np.zeros((m, n)) E_hat = np.zeros((m, n)) u = mu / norm_two u_bar = u * 1e7 d_norm = norm(D, 'fro') i = 0 converged = False stop_criterion = 1. sv = 10 while not converged: i += 1 T = D - A_hat + (1. / u) * Y E_hat = np.maximum(T - (l / u), 0) + np.minimum(T + (l / u), 0) if choosvd(n, sv): U, S, V = svdp(D - E_hat + (1. / u) * Y, sv) else: U, S, V = svd(D - E_hat + (1. / u) * Y, full_matrices=False) # in np, S is a vector of 'diagonal value', # so we don't need to do np.diag like the code in matlab svp = np.where(S > (1. / u))[0].shape[0] if svp < sv: sv = np.minimum(svp + 1, m) else: sv = np.minimum(svp + round(.05 * m), m) A_hat = np.dot(np.dot(U[:, :svp], np.diag(S[:svp] - (1. / u))), V[:svp, :]) Z = D - A_hat - E_hat Y = Y + u * Z u = np.minimum(u * rho, u_bar) stop_criterion = norm(Z, 'fro') / d_norm if stop_criterion <= tol or i >= max_iter: break return A_hat, E_hat