def svd_select(A, n, k=1, idxs=None, sps=False, **kwargs): """ Selection function which computes the CUR indices using the SVD Decomposition """ if(idxs is None): idxs = [] # indexA is initially empty. else: idxs = list(idxs) Acopy = A.copy() for nn in range(n): if(len(idxs) <= n): if(not sps): (S, v, D) = np.linalg.svd(Acopy) else: (S, v, D) = svd(Acopy, k) D = D[np.flip(np.argsort(v))] pi = (D[:k]**2.0).sum(axis=0) pi[idxs] = 0 # eliminate possibility of selecting same column twice i = pi.argmax() idxs.append(i) v = Acopy[:, idxs[nn]] / \ np.sqrt(np.matmul(Acopy[:, idxs[nn]], Acopy[:, idxs[nn]])) for i in range(Acopy.shape[1]): Acopy[:, i] -= v * np.dot(v, Acopy[:, i]) return list(idxs)
def testMat(): S = np.zeros([5,6]) A=[[1,1,1,0,0], [2,2,2,0,0], [3,3,3,0,0], [5,5,3,2,2], [0,0,0,3,3], [0,0,0,6,6]] u,sigma,vt = la.svd(A) print"-------------" print(A) print"-------------" print(u) print"-------------" print(sigma) print"-------------" print(vt) print"-------------" i=0 while i < A.shape[0]: tmp = 0; j=0 while j < A.shape[1]: k=0 tmp = 0; while k < len(sigma): tmp = tmp + u[i][k]*sigma[k]*vt[k][j] k = k+1 j = j + 1 print tmp," ", print "" i=i+1
def spectral_partition(W,q,method = 'complete', metric = 'cosine'): n,m = W.shape K = Kmatrix(W) if n == m: try: e,v = linalg.eigen(K, q) except TypeError: e,v = linalg.eigs(K, q) else: try: u,e,v = linalg.svds(K, q) except AttributeError: u,e,v = linalg.svd(K, q) v = np.concatenate((u, v.T), 0) max_index = e.argmax() v = np.delete(v,max_index,1) Obs = np.real(v) D = distance.pdist(Obs,metric = metric) D = np.multiply(D >= 0, D) Z = linkage(D, method = method, metric = metric) cluster = fcluster(Z, q, criterion = 'maxclust') cluster += - 1 cluster = {'spectral' : cluster} return cluster
def simple_svd_reduced(A): # try with full_matrices=False U, s, Vh = linalg.svd(A, full_matrices=False, compute_uv=True) S = np.diag(s) print np.shape(U), np.shape(S), np.shape(Vh) # check if SVD-resulting matrix is accurate enough A_check = np.dot(np.dot(U, S), Vh) print np.allclose(A, A_check) return U, S, Vh
def dim_reduction(X, w, l, u = None, v_T = None): #print X if not u and not v_T: u, s, v_T = svd(X) print "done SVD" u_T = u[:, :w].transpose() print "done u_T" tmp = (u[:, :w].transpose()).dot(X) X = None print "dot V" X_reduce = (tmp).dot(v_T[:l, :].transpose()) tmp = None print "cool" return X_reduce, u, v_T
def GrowSparse(H, params, trans): sys_dim = H.A.shape[0] dim = sys_dim * params['loc_dim'] U = trans.A[len(trans.A) - 1] Vh = trans.B[len(trans.B) - 1] old_dim = U.shape[0] H_Anew = kron(H.A, sigma_0) H_Bnew = kron(sigma_0, H.B) sz_Utrans = U.conj().T.dot( kron(ids(old_dim // params['loc_dim'], dtype=np.float), sigma_z).dot(U)) H_Adot = -params['J'] * kron(sz_Utrans, sigma_z) sz_Vtrans = Vh.conj().dot( kron(sigma_z, ids(old_dim // params['loc_dim'], dtype=np.float)).dot(Vh.T)) H_dotB = -params['J'] * kron(sigma_z, sz_Vtrans) H_dotdot = -params['J'] * kron( kron(kron(ids(sys_dim, dtype=np.float), sigma_z), sigma_z), ids(sys_dim, dtype=np.float)) H_dotdotA = -params['h'] * kron(ids(sys_dim, dtype=np.float), sigma_x) H_dotdotB = -params['h'] * kron(sigma_x, ids(sys_dim, dtype=np.float)) H_super = kron(H_Anew,ids(dim,dtype=np.float)) + \ kron(ids(dim,dtype=np.float),H_Bnew) + \ kron(H_Adot,ids(dim,dtype=np.float)) + \ kron(ids(dim,dtype=np.float),H_dotB) + \ H_dotdot + \ kron(H_dotdotA,ids(dim,dtype=np.float)) + \ kron(ids(dim,dtype=np.float),H_dotdotB) eigval, psi = eigsh(H_super, k=1, tol=1.e-8, which='SA') energy.append(eigval.tolist()[0]) # Since we have A <-> B symmetry, we may reshape psi here instead of # looking at tr_A |psi><psi| rho_dim = int(np.sqrt(psi.shape[0])) if (np.abs(rho_dim - np.sqrt(psi.shape[0])) > 1e-10): # Check int sqrt print('Error: Density matrix dimension not int') quit() psi_ij = np.reshape(psi, (rho_dim, rho_dim), order='C') psi_ijs = sparse.coo_matrix(psi_ij) num_SV = min(params['maxM'], dim - 1) U, s, Vh = svd(psi_ijs, k=num_SV) if sys_dim >= params['maxM']: U = U[:, 0:params['maxM']] Vh = Vh[0:params['maxM'], :] trans.A.append(U) trans.B.append(Vh) H_Amod = H_Anew + H_Adot + H_dotdotA H_Bmod = H_Bnew + H_dotB + H_dotdotB H_new = Ham(U.conj().T.dot(H_Amod.dot(U)), Vh.conj().dot(H_Bmod.dot(Vh.T))) return H_new
def simple_svd(A): # try with full_matrices = True U, s, Vh = linalg.svd(A, full_matrices=True, compute_uv=True) M, N = np.shape(A) S = np.zeros((M, N), dtype=complex) S[:N, :N] = np.diag(s) A_check = np.dot(np.dot(U, S), Vh) # check if SVD-resulting matrix is accurate enough for i in range(0, M): for j in range(0, N): try: abs(A[i, j] - A_check[i, j]) < 1.0e-14 except ValueError: print "Oops! A and A_check are not accurately close to each other." return U, S, Vh
def filter_sparse(g1, n_c = 5, max_edges = -1, last_component = False): ''' Filter a sparse version of the network by PCA. g1: The input network graph. n_c: The number of principal components to compute max_edges: The maximum number of edges to keep. -1 => keep all. last_component: Keep only the final principal component ''' import scipy.sparse.linalg as las import scipy.sparse.lil as ll import scipy.sparse as ssp adj = ssp.csr_matrix(nx.to_scipy_sparse_matrix(g1)) nodes = g1.nodes() U,s, Vh = svd = las.svd(adj, n_c) U[less(abs(U), .001)] = 0 Vh[less(abs(Vh), .001)] = 0 if last_component: s_last = s; s_last[1:] *= 0 filtered = ll.lil_matrix(U)*ll.lil_matrix(diag(s_last)) *ll.lil_matrix(Vh) else: filtered = ll.lil_matrix(U)*ll.lil_matrix(diag(s)) *ll.lil_matrix(Vh) if max_edges != -1: filtered.data[argsort(abs(filtered.data))[:-1 * max_edges]] = 0 filtered.eliminate_zeros() g = nx.DiGraph() g.add_nodes_from(nodes) g.add_weighted_edges_from([(nodes[nz[0]],nodes[nz[1]],nz[2]) for nz in zip(*ssp.find(filtered))]) return g
def whiten_with_filled_zeros(data, k): judgement_columns = data.columns[2:] raw_data = data[judgement_columns] # need to interpolate missing judgements: # we'll fill them in w/ the avg of all other ratings matrix = raw_data.fillna(0).as_matrix() #matrix = raw_data.as_matrix() U, S, V = svd(matrix, k) # actual whitening S[k:] = 0 # multiply stuff back out W = np.dot(U, np.dot(np.diag(S), V)) # we should renull the values? whitened_data = data.copy() for i, j in enumerate(judgement_columns): whitened_data[j] = W[:, i] #nulls = data[j].isnull() #whitened_data[j][nulls] = float("nan") return whitened_data
def whiten_with_filled_zeros(data, k): judgement_columns = data.columns[2:] raw_data = data[judgement_columns] # need to interpolate missing judgements: # we'll fill them in w/ the avg of all other ratings matrix = raw_data.fillna(0).as_matrix() #matrix = raw_data.as_matrix() U, S, V = svd(matrix, k) # actual whitening S[k:] = 0 # multiply stuff back out W = np.dot(U, np.dot(np.diag(S), V)) # we should renull the values? whitened_data = data.copy() for i, j in enumerate(judgement_columns): whitened_data[j] = W[:,i] #nulls = data[j].isnull() #whitened_data[j][nulls] = float("nan") return whitened_data
def CUR(mat, numr, numc, nrows, ncols): A_T = np.transpose(mat) A = sp.csr_matrix(mat) A_T = sp.csr_matrix(A_T) rows = np.zeros(nrows, dtype=float) r = [] rowsprob = np.zeros(nrows, dtype=float) total = 0 tempr = sp.coo_matrix(A) #calculating the probability of each row being selected for i, j, v in zip(tempr.row, tempr.col, tempr.data): rowsprob[i] = rowsprob[i] + v * v total = total + v * v rowsprob = rowsprob / total #calculating the cumulative probabilities cumrowsprob = np.zeros(nrows, dtype=float) cumrowsprob[0] = rowsprob[0] for i in range(1, rowsprob.size): cumrowsprob[i] = cumrowsprob[i - 1] + rowsprob[i] #generating random rows and building r matrix for i in range(0, numr): rand = random.random() entry = np.searchsorted(cumrowsprob, rand) rows[entry] = rows[entry] + 1 #handling duplicates by multiplying duplicate rows with square root of number of duplications and removing duplicates selectedrows = [] rows = np.sqrt(rows) for i in range(0, nrows): if rows[i] > 0: r.append((A[i].toarray() / ((numr * rowsprob[i])**0.5)) * rows[i]) selectedrows.append(i) cols = np.zeros(ncols, dtype=float) c = [] colsprob = np.zeros(ncols, dtype=float) total = 0 tempc = sp.coo_matrix(A_T) #calculating the probability of each column being selected for i, j, v in zip(tempc.row, tempc.col, tempc.data): colsprob[i] = colsprob[i] + v * v total = total + v * v colsprob = colsprob / total #calculating the cumulative probabilities cumcolsprob = np.zeros(ncols, dtype=float) cumcolsprob[0] = colsprob[0] for i in range(1, colsprob.size): cumcolsprob[i] = cumcolsprob[i - 1] + colsprob[i] #generating random cols and building r matrix for i in range(0, numc): rand = random.random() entry = np.searchsorted(cumcolsprob, rand) cols[entry] = cols[entry] + 1 #handling duplicates by multiplying duplicate columns with square root of number of duplications and removing duplicates selectedcols = [] cols = np.sqrt(cols) for i in range(0, ncols): if cols[i] > 0: c.append( (A_T[i].toarray() / ((numc * colsprob[i])**0.5)) * cols[i]) selectedcols.append(i) c = np.vstack(c) r = np.vstack(r) #finding the intersection of c and r = w w = np.zeros(shape=(len(selectedrows), len(selectedcols))) for i in range(0, len(selectedrows)): for j in range(0, len(selectedcols)): w[i][j] = mat[selectedrows[i]][selectedcols[j]] c = sp.csr_matrix(c) c = c.transpose() r = sp.csr_matrix(r) #computing the SVD decomposition of the w matrix x, z, y_T = linalg.svd(w) z = linalg.diagsvd(z, x.shape[1], y_T.shape[0]) y = np.transpose(y_T) #computing the u matrix zplus = linalg.pinv(np.matrix(z)) zplussquare = zplus * zplus u = np.matmul(y, np.matmul(zplussquare, np.transpose(x))) #computing the reconstructed matrix and error reconstructedmatrix = c * (u * r) errormatrix = sp.csr_matrix(A - reconstructedmatrix) reconstructionerror = norm(errormatrix) return (c, u, r, reconstructionerror)
def calc(self): # calculate the svd self.U, self.S, self.Vt = svd(self.A)
def dim_reduction(X, w, l, u = None, v_T = None): if not u and not v_T: u, s, v_T = svd(X) X_reduce = ((u[:, :w].transpose()).dot(X)).dot(v_T[:l, :].transpose()) return X_reduce, u, v_T