def blend_svd(mats, factors=None, k=50): ''' Special optimized version of blend for doing just an SVD. Like matrix.svd, returns a triple of: - U as a dense labeled matrix - S, a dense vector representing the diagonal of Sigma - V as a dense labeled matrix ''' if factors is None: factors = [blend_factor(mat) for mat in mats] # Align matrices. # FIXME: only works for fully labeleed matrices right now. # TODO: could micro-optimize by using the first ordered set's indices. from csc_utils.ordered_set import OrderedSet row_labels, row_mappings = OrderedSet(), [] for mat in mats: row_mappings.append( np.array([row_labels.add(item) for item in mat.row_labels], dtype=np.uint64)) col_labels, col_mappings = OrderedSet(), [] for mat in mats: col_mappings.append( np.array([col_labels.add(item) for item in mat.col_labels], dtype=np.uint64)) # Elide zero row tests, etc. from divisi2._svdlib import svd_sum from divisi2 import DenseMatrix Ut, S, Vt = svd_sum(mats, k, factors, row_mappings, col_mappings) U = DenseMatrix(Ut.T, row_labels, None) V = DenseMatrix(Vt.T, col_labels, None) return U, S, V
def load_model(self, filename): """ Loads SVD transformation (U, Sigma and V matrices) from a ZIP file :param filename: path to the SVD matrix transformation (a ZIP file) :type filename: string """ try: zip = zipfile.ZipFile(filename, allowZip64=True) except: zip = zipfile.ZipFile(filename + '.zip', allowZip64=True) # Options file options = dict() for line in zip.open('README'): data = line.strip().split('\t') options[data[0]] = data[1] try: k = int(options['k']) except: k = 100 #TODO: nasty!!! # Load U, S, and V """ #Python 2.6 only: #self._U = loads(zip.open('.U').read()) #self._S = loads(zip.open('.S').read()) #self._V = loads(zip.open('.V').read()) """ try: self._U = loads(zip.read('.U')) except: matrix = fromfile(zip.extract('.U', TMPDIR)) vectors = [] i = 0 while i < len(matrix) / k: v = DenseVector(matrix[k * i:k * (i + 1)]) vectors.append(v) i += 1 try: idx = [ int(idx.strip()) for idx in zip.read('.row_ids').split('\n') if idx ] except: idx = [ idx.strip() for idx in zip.read('.row_ids').split('\n') if idx ] #self._U = DenseMatrix(vectors) self._U = DenseMatrix(vectors, OrderedSet(idx), None) try: self._V = loads(zip.read('.V')) except: matrix = fromfile(zip.extract('.V', TMPDIR)) vectors = [] i = 0 while i < len(matrix) / k: v = DenseVector(matrix[k * i:k * (i + 1)]) vectors.append(v) i += 1 try: idx = [ int(idx.strip()) for idx in zip.read('.col_ids').split('\n') if idx ] except: idx = [ idx.strip() for idx in zip.read('.col_ids').split('\n') if idx ] #self._V = DenseMatrix(vectors) self._V = DenseMatrix(vectors, OrderedSet(idx), None) self._S = loads(zip.read('.S')) # Shifts for Mean Centerer Matrix self._shifts = None if '.shifts.row' in zip.namelist(): self._shifts = [ loads(zip.read('.shifts.row')), loads(zip.read('.shifts.col')), loads(zip.read('.shifts.total')) ] self._reconstruct_matrix(shifts=self._shifts, force=True) self._reconstruct_similarity(force=True)