def inDegreeSequence(self): """ Return a vector of the (in)degree sequence for each vertex. """ A = self.nativeAdjacencyMatrix() j = spmatrix.ll_mat(self.vList.getNumVertices(), 1) j[:, 0] = 1 degrees = spmatrix.dot(A, j) degrees = PysparseMatrix(matrix=degrees) degrees = numpy.array(degrees.getNumpyArray().ravel(), numpy.int) return degrees
def association_matrix_to_similarity_matrix(self, metric="cosine", dataset="FREESOUND", save_sim=False, training_set=None, out_name_prefix="", is_general_recommender=False): if self.verbose: print "Loading association matrix and tag names, ids files..." try: M = spmatrix.ll_mat_from_mtx(RECOMMENDATION_TMP_DATA_DIR + dataset + "_ASSOCIATION_MATRIX.mtx") resource_ids = load(RECOMMENDATION_TMP_DATA_DIR + dataset + "_RESOURCE_IDS.npy") tag_names = load(RECOMMENDATION_TMP_DATA_DIR + dataset + "_TAG_NAMES.npy") except Exception: raise Exception( "Error loading association matrix and tag names, ids data") if metric not in ['cosine', 'binary', 'coocurrence', 'jaccard']: raise Exception("Wrong similarity metric specified") if self.verbose: print "Computing similarity matrix from a resource subset of the whole association matrix..." # Get index of resources to train (usable index for M) resource_id_positions = where( in1d(resource_ids, training_set, assume_unique=True))[0] # Matrix multiplication (only taking in account resources in training set and ALL tags) MM = spmatrix.dot(M[resource_id_positions, :], M[resource_id_positions, :]) # Get similarity matrix sim_matrix = spmatrix.ll_mat(MM.shape[0], MM.shape[0]) non_zero_index = MM.keys() for index in non_zero_index: if metric == 'cosine': sim_matrix[index[0], index[1]] = MM[index[0], index[1]] * ( 1 / (sqrt(MM[index[0], index[0]]) * sqrt(MM[index[1], index[1]]))) elif metric == 'coocurrence': sim_matrix[index[0], index[1]] = MM[index[0], index[1]] elif metric == 'binary': sim_matrix[index[0], index[1]] = MM[index[0], index[1]] / MM[index[0], index[1]] elif metric == 'jaccard': sim_matrix[index[0], index[1]] = MM[index[0], index[1]] * ( 1 / (MM[index[0], index[0]] + MM[index[1], index[1]] - MM[index[0], index[1]])) # Clean out similarity matrix (clean tags that are not used) tag_positions = [] for i in range(0, sim_matrix.shape[0]): if sim_matrix[i, i] != 0.0: tag_positions.append(i) # Transform sparse similarity matrix to npy format sim_matrix_npy = mtx2npy(sim_matrix[tag_positions, tag_positions]) tag_names_sim_matrix = tag_names[tag_positions] if save_sim: if not is_general_recommender: # Save sim path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_%s_SIMILARITY_MATRIX_" % out_name_prefix + metric + "_SUBSET.npy" if self.verbose: print "Saving to " + path + "..." save(path, sim_matrix_npy) # Save tag names path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_%s_SIMILARITY_MATRIX_" % out_name_prefix + metric + "_SUBSET_TAG_NAMES.npy" if self.verbose: print "Saving to " + path + "..." save(path, tag_names_sim_matrix) else: # Save sim path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_SIMILARITY_MATRIX_" + metric + ".npy" if self.verbose: print "Saving to " + path + "..." save(path, sim_matrix_npy) # Save tag names path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_SIMILARITY_MATRIX_" + metric + "_TAG_NAMES.npy" if self.verbose: print "Saving to " + path + "..." save(path, tag_names_sim_matrix) return { 'SIMILARITY_MATRIX': sim_matrix_npy, 'TAG_NAMES': tag_names_sim_matrix }
def association_matrix_to_similarity_matrix(self, metric="cosine", dataset="FREESOUND", save_sim=False, training_set=None, out_name_prefix="", is_general_recommender=False): if self.verbose: print "Loading association matrix and tag names, ids files..." try: M = spmatrix.ll_mat_from_mtx(RECOMMENDATION_TMP_DATA_DIR + dataset + "_ASSOCIATION_MATRIX.mtx") resource_ids = load(RECOMMENDATION_TMP_DATA_DIR + dataset + "_RESOURCE_IDS.npy") tag_names = load(RECOMMENDATION_TMP_DATA_DIR + dataset + "_TAG_NAMES.npy") except Exception: raise Exception("Error loading association matrix and tag names, ids data") if metric not in ['cosine', 'binary', 'coocurrence', 'jaccard']: raise Exception("Wrong similarity metric specified") if self.verbose: print "Computing similarity matrix from a resource subset of the whole association matrix..." # Get index of resources to train (usable index for M) resource_id_positions = where(in1d(resource_ids, training_set, assume_unique=True))[0] # Matrix multiplication (only taking in account resources in training set and ALL tags) MM = spmatrix.dot(M[resource_id_positions, :], M[resource_id_positions, :]) # Get similarity matrix sim_matrix = spmatrix.ll_mat(MM.shape[0],MM.shape[0]) non_zero_index = MM.keys() for index in non_zero_index: if metric == 'cosine': sim_matrix[index[0], index[1]] = MM[index[0], index[1]] * (1 / (sqrt(MM[index[0], index[0]]) * sqrt(MM[index[1], index[1]]))) elif metric == 'coocurrence': sim_matrix[index[0], index[1]] = MM[index[0], index[1]] elif metric == 'binary': sim_matrix[index[0], index[1]] = MM[index[0], index[1]]/MM[index[0], index[1]] elif metric == 'jaccard': sim_matrix[index[0], index[1]] = MM[index[0], index[1]] * (1 / (MM[index[0], index[0]] + MM[index[1], index[1]] - MM[index[0], index[1]])) # Clean out similarity matrix (clean tags that are not used) tag_positions = [] for i in range(0, sim_matrix.shape[0]): if sim_matrix[i, i] != 0.0: tag_positions.append(i) # Transform sparse similarity matrix to npy format sim_matrix_npy = mtx2npy(sim_matrix[tag_positions,tag_positions]) tag_names_sim_matrix = tag_names[tag_positions] if save_sim: if not is_general_recommender: # Save sim path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_%s_SIMILARITY_MATRIX_" % out_name_prefix + metric + "_SUBSET.npy" if self.verbose: print "Saving to " + path + "..." save(path, sim_matrix_npy) # Save tag names path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_%s_SIMILARITY_MATRIX_" % out_name_prefix + metric + "_SUBSET_TAG_NAMES.npy" if self.verbose: print "Saving to " + path + "..." save(path, tag_names_sim_matrix) else: # Save sim path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_SIMILARITY_MATRIX_" + metric + ".npy" if self.verbose: print "Saving to " + path + "..." save(path, sim_matrix_npy) # Save tag names path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_SIMILARITY_MATRIX_" + metric + "_TAG_NAMES.npy" if self.verbose: print "Saving to " + path + "..." save(path, tag_names_sim_matrix) return {'SIMILARITY_MATRIX': sim_matrix_npy, 'TAG_NAMES': tag_names_sim_matrix}
traceback.print_exc() try: T[5:9, 4:10] = A[5:9, 4:10] except: traceback.print_exc() print 'Matrix multiplications' printMatrix(spmatrix.matrixmultiply(I, A)) printMatrix(spmatrix.matrixmultiply(Is, A)) printMatrix(spmatrix.matrixmultiply(O, O)) printMatrix(spmatrix.matrixmultiply(Os, O)) print 'Dot product' printMatrix(spmatrix.dot(I, A)) print 'Matrix export' A[:4, :4].export_mtx('A.mtx', 3) As[:4, :4].export_mtx('As.mtx', 3) print open('A.mtx').read() print open('As.mtx').read() print 'Matrix import' printMatrix(spmatrix.ll_mat_from_mtx('A.mtx')) printMatrix(spmatrix.ll_mat_from_mtx('As.mtx')) print 'Conversion to CSR' print A[:4, :4] print A[:4, :4].to_csr()
traceback.print_exc() try: T[5:9, 4:10] = A[5:9, 4:10] except: traceback.print_exc() print 'Matrix multiplications' printMatrix(spmatrix.matrixmultiply(I, A)) printMatrix(spmatrix.matrixmultiply(Is, A)) printMatrix(spmatrix.matrixmultiply(O, O)) printMatrix(spmatrix.matrixmultiply(Os, O)) print 'Dot product' printMatrix(spmatrix.dot(I, A)) print 'Matrix export' A[:4,:4].export_mtx('A.mtx', 3) As[:4,:4].export_mtx('As.mtx', 3) print open('A.mtx').read() print open('As.mtx').read() print 'Matrix import' printMatrix(spmatrix.ll_mat_from_mtx('A.mtx')) printMatrix(spmatrix.ll_mat_from_mtx('As.mtx')) print 'Conversion to CSR' print A[:4,:4] print A[:4,:4].to_csr()