def inDegreeSequence(self):
        """
        Return a vector of the (in)degree sequence for each vertex.
        """
        A = self.nativeAdjacencyMatrix()
        j = spmatrix.ll_mat(self.vList.getNumVertices(), 1)
        j[:, 0] = 1

        degrees = spmatrix.dot(A, j)
        degrees = PysparseMatrix(matrix=degrees)
        degrees = numpy.array(degrees.getNumpyArray().ravel(), numpy.int)
        return degrees
    def inDegreeSequence(self):
        """
        Return a vector of the (in)degree sequence for each vertex.
        """
        A = self.nativeAdjacencyMatrix()
        j = spmatrix.ll_mat(self.vList.getNumVertices(), 1)
        j[:, 0] = 1

        degrees = spmatrix.dot(A, j)
        degrees = PysparseMatrix(matrix=degrees)
        degrees = numpy.array(degrees.getNumpyArray().ravel(), numpy.int)
        return degrees
示例#3
0
    def association_matrix_to_similarity_matrix(self,
                                                metric="cosine",
                                                dataset="FREESOUND",
                                                save_sim=False,
                                                training_set=None,
                                                out_name_prefix="",
                                                is_general_recommender=False):

        if self.verbose:
            print "Loading association matrix and tag names, ids files..."
        try:
            M = spmatrix.ll_mat_from_mtx(RECOMMENDATION_TMP_DATA_DIR +
                                         dataset + "_ASSOCIATION_MATRIX.mtx")
            resource_ids = load(RECOMMENDATION_TMP_DATA_DIR + dataset +
                                "_RESOURCE_IDS.npy")
            tag_names = load(RECOMMENDATION_TMP_DATA_DIR + dataset +
                             "_TAG_NAMES.npy")
        except Exception:
            raise Exception(
                "Error loading association matrix and tag names, ids data")

        if metric not in ['cosine', 'binary', 'coocurrence', 'jaccard']:
            raise Exception("Wrong similarity metric specified")

        if self.verbose:
            print "Computing similarity matrix from a resource subset of the whole association matrix..."
        # Get index of resources to train (usable index for M)
        resource_id_positions = where(
            in1d(resource_ids, training_set, assume_unique=True))[0]

        # Matrix multiplication (only taking in account resources in training set and ALL tags)
        MM = spmatrix.dot(M[resource_id_positions, :],
                          M[resource_id_positions, :])

        # Get similarity matrix
        sim_matrix = spmatrix.ll_mat(MM.shape[0], MM.shape[0])
        non_zero_index = MM.keys()
        for index in non_zero_index:
            if metric == 'cosine':
                sim_matrix[index[0], index[1]] = MM[index[0], index[1]] * (
                    1 / (sqrt(MM[index[0], index[0]]) *
                         sqrt(MM[index[1], index[1]])))
            elif metric == 'coocurrence':
                sim_matrix[index[0], index[1]] = MM[index[0], index[1]]
            elif metric == 'binary':
                sim_matrix[index[0],
                           index[1]] = MM[index[0], index[1]] / MM[index[0],
                                                                   index[1]]
            elif metric == 'jaccard':
                sim_matrix[index[0], index[1]] = MM[index[0], index[1]] * (
                    1 / (MM[index[0], index[0]] + MM[index[1], index[1]] -
                         MM[index[0], index[1]]))

        # Clean out similarity matrix (clean tags that are not used)
        tag_positions = []
        for i in range(0, sim_matrix.shape[0]):
            if sim_matrix[i, i] != 0.0:
                tag_positions.append(i)

        # Transform sparse similarity matrix to npy format
        sim_matrix_npy = mtx2npy(sim_matrix[tag_positions, tag_positions])
        tag_names_sim_matrix = tag_names[tag_positions]

        if save_sim:
            if not is_general_recommender:
                # Save sim
                path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_%s_SIMILARITY_MATRIX_" % out_name_prefix + metric + "_SUBSET.npy"
                if self.verbose:
                    print "Saving to " + path + "..."
                save(path, sim_matrix_npy)

                # Save tag names
                path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_%s_SIMILARITY_MATRIX_" % out_name_prefix + metric + "_SUBSET_TAG_NAMES.npy"
                if self.verbose:
                    print "Saving to " + path + "..."
                save(path, tag_names_sim_matrix)
            else:
                # Save sim
                path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_SIMILARITY_MATRIX_" + metric + ".npy"
                if self.verbose:
                    print "Saving to " + path + "..."
                save(path, sim_matrix_npy)

                # Save tag names
                path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_SIMILARITY_MATRIX_" + metric + "_TAG_NAMES.npy"
                if self.verbose:
                    print "Saving to " + path + "..."
                save(path, tag_names_sim_matrix)

        return {
            'SIMILARITY_MATRIX': sim_matrix_npy,
            'TAG_NAMES': tag_names_sim_matrix
        }
示例#4
0
文件: __init__.py 项目: MTG/freesound
    def association_matrix_to_similarity_matrix(self,
                                                metric="cosine",
                                                dataset="FREESOUND",
                                                save_sim=False,
                                                training_set=None,
                                                out_name_prefix="",
                                                is_general_recommender=False):

        if self.verbose:
            print "Loading association matrix and tag names, ids files..."
        try:
            M = spmatrix.ll_mat_from_mtx(RECOMMENDATION_TMP_DATA_DIR + dataset + "_ASSOCIATION_MATRIX.mtx")
            resource_ids = load(RECOMMENDATION_TMP_DATA_DIR + dataset + "_RESOURCE_IDS.npy")
            tag_names = load(RECOMMENDATION_TMP_DATA_DIR + dataset + "_TAG_NAMES.npy")
        except Exception:
            raise Exception("Error loading association matrix and tag names, ids data")

        if metric not in ['cosine', 'binary', 'coocurrence', 'jaccard']:
            raise Exception("Wrong similarity metric specified")

        if self.verbose:
            print "Computing similarity matrix from a resource subset of the whole association matrix..."
        # Get index of resources to train (usable index for M)
        resource_id_positions = where(in1d(resource_ids, training_set, assume_unique=True))[0]

        # Matrix multiplication (only taking in account resources in training set and ALL tags)
        MM = spmatrix.dot(M[resource_id_positions, :], M[resource_id_positions, :])

        # Get similarity matrix
        sim_matrix = spmatrix.ll_mat(MM.shape[0],MM.shape[0])
        non_zero_index = MM.keys()
        for index in non_zero_index:
            if metric == 'cosine':
                sim_matrix[index[0], index[1]] = MM[index[0], index[1]] * (1 / (sqrt(MM[index[0], index[0]]) * sqrt(MM[index[1], index[1]])))
            elif metric == 'coocurrence':
                sim_matrix[index[0], index[1]] = MM[index[0], index[1]]
            elif metric == 'binary':
                sim_matrix[index[0], index[1]] = MM[index[0], index[1]]/MM[index[0], index[1]]
            elif metric == 'jaccard':
                sim_matrix[index[0], index[1]] = MM[index[0], index[1]] * (1 / (MM[index[0], index[0]] + MM[index[1], index[1]] - MM[index[0], index[1]]))

        # Clean out similarity matrix (clean tags that are not used)
        tag_positions = []
        for i in range(0, sim_matrix.shape[0]):
            if sim_matrix[i, i] != 0.0:
                tag_positions.append(i)

        # Transform sparse similarity matrix to npy format
        sim_matrix_npy = mtx2npy(sim_matrix[tag_positions,tag_positions])
        tag_names_sim_matrix = tag_names[tag_positions]

        if save_sim:
            if not is_general_recommender:
                # Save sim
                path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_%s_SIMILARITY_MATRIX_" % out_name_prefix + metric + "_SUBSET.npy"
                if self.verbose:
                    print "Saving to " + path + "..."
                save(path, sim_matrix_npy)

                # Save tag names
                path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_%s_SIMILARITY_MATRIX_" % out_name_prefix + metric + "_SUBSET_TAG_NAMES.npy"
                if self.verbose:
                    print "Saving to " + path + "..."
                save(path, tag_names_sim_matrix)
            else:
                # Save sim
                path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_SIMILARITY_MATRIX_" + metric + ".npy"
                if self.verbose:
                    print "Saving to " + path + "..."
                save(path, sim_matrix_npy)

                # Save tag names
                path = RECOMMENDATION_TMP_DATA_DIR + dataset + "_SIMILARITY_MATRIX_" + metric + "_TAG_NAMES.npy"
                if self.verbose:
                    print "Saving to " + path + "..."
                save(path, tag_names_sim_matrix)

        return {'SIMILARITY_MATRIX': sim_matrix_npy, 'TAG_NAMES': tag_names_sim_matrix}
示例#5
0
    traceback.print_exc()
try:
    T[5:9, 4:10] = A[5:9, 4:10]
except:
    traceback.print_exc()

print 'Matrix multiplications'

printMatrix(spmatrix.matrixmultiply(I, A))
printMatrix(spmatrix.matrixmultiply(Is, A))

printMatrix(spmatrix.matrixmultiply(O, O))
printMatrix(spmatrix.matrixmultiply(Os, O))

print 'Dot product'
printMatrix(spmatrix.dot(I, A))

print 'Matrix export'
A[:4, :4].export_mtx('A.mtx', 3)
As[:4, :4].export_mtx('As.mtx', 3)

print open('A.mtx').read()
print open('As.mtx').read()

print 'Matrix import'
printMatrix(spmatrix.ll_mat_from_mtx('A.mtx'))
printMatrix(spmatrix.ll_mat_from_mtx('As.mtx'))

print 'Conversion to CSR'
print A[:4, :4]
print A[:4, :4].to_csr()
示例#6
0
    traceback.print_exc()
try:
    T[5:9, 4:10] = A[5:9, 4:10]
except:
    traceback.print_exc()

print 'Matrix multiplications'

printMatrix(spmatrix.matrixmultiply(I, A))
printMatrix(spmatrix.matrixmultiply(Is, A))

printMatrix(spmatrix.matrixmultiply(O, O))
printMatrix(spmatrix.matrixmultiply(Os, O))

print 'Dot product'
printMatrix(spmatrix.dot(I, A))

print 'Matrix export'
A[:4,:4].export_mtx('A.mtx', 3)
As[:4,:4].export_mtx('As.mtx', 3)

print open('A.mtx').read()
print open('As.mtx').read()

print 'Matrix import'
printMatrix(spmatrix.ll_mat_from_mtx('A.mtx'))
printMatrix(spmatrix.ll_mat_from_mtx('As.mtx'))

print 'Conversion to CSR'
print A[:4,:4]
print A[:4,:4].to_csr()