示例#1
0
 def __generateSet(self):
     triple = []
     for line in self.relation:
         userId1,userId2,weight = line
         #add relations to dict
         if userId1 not in self.followees.keys():
             self.followees[userId1] = {}
         #if not self.followees.has_key(userId1):
         #    self.followees[userId1] = {}
         self.followees[userId1][userId2] = weight
         if userId2 not in self.followers.keys():
             self.followers[userId2] = {}
         #if not self.followers.has_key(userId2):
         #    self.followers[userId2] = {}
         self.followers[userId2][userId1] = weight
         # order the user
         if userId1 not in self.user.keys():
             self.user[userId1] = len(self.user)
         if userId2 not in self.user.keys():
             self.user[userId2] = len(self.user)
         #if not self.user.has_key(userId1):
         #    self.user[userId1] = len(self.user)
         #if not self.user.has_key(userId2):
         #    self.user[userId2] = len(self.user)
         triple.append([self.user[userId1], self.user[userId2], weight])
     return new_sparseMatrix.SparseMatrix(triple)
示例#2
0
文件: data.py 项目: wsdflink/RecQ
    def loadRatings(self, file, bTest=False):
        with open(file) as f:
            ratings = f.readlines()
        #ignore the headline
        if self.ratingConfig.contains('-header'):
            ratings = ratings[1:]
        #order of the columns
        order = self.ratingConfig['-columns'].strip().split()
        #split data
        userList = []
        u_i_r = {}
        i_u_r = {}
        triple = []
        for line in ratings:
            items = split(' |,|\t', line.strip())
            userId = items[int(order[0])]
            itemId = items[int(order[1])]
            rating = items[int(order[2])]
            if float(rating) > self.rScale[0]:
                self.rScale[0] = float(rating)
            if float(rating) < self.rScale[1]:
                self.rScale[1] = float(rating)
            #order the user
            if not self.user.has_key(userId):
                self.user[userId] = len(self.user)
            #order the item
            if not self.item.has_key(itemId):
                self.item[itemId] = len(self.item)
            if not u_i_r.has_key(userId):
                u_i_r[userId] = []
                userList.append(userId)
            u_i_r[userId].append([itemId, float(rating)])
            if not i_u_r.has_key(itemId):
                i_u_r[itemId] = []
            i_u_r[itemId].append([userId, float(rating)])
            triple.append(
                [self.user[userId], self.item[itemId],
                 float(rating)])

        if not bTest:
            #contruct the sparse matrix
            # data=[]
            # indices=[]
            # indptr=[]
            # offset = 0
            # for uid in userList:
            #     uRating = [r[1] for r in u_i_r[uid]]
            #     uColunms = [self.item[r[0]] for r in u_i_r[uid]]
            #     data += uRating
            #     indices += uColunms
            #     indptr .append(offset)
            #     offset += len(uRating)
            # indptr.append(offset)
            # return sparseMatrix.SparseMatrix(data, indices, indptr)
            return new_sparseMatrix.SparseMatrix(
                triple, (len(self.user), len(self.item)))
        else:
            # return testSet
            return u_i_r, i_u_r
示例#3
0
    def __generateSet(self):
        triple = []
        scale = set()
        # find the maximum rating and minimum value
        for i, entry in enumerate(self.trainingData):
            userName, itemName, rating = entry
            scale.add(float(rating))
        self.rScale = list(scale)
        self.rScale.sort()

        for i, entry in enumerate(self.trainingData):
            userName, itemName, rating = entry
            # makes the rating within the range [0, 1].
            rating = normalize(float(rating), self.rScale[-1], self.rScale[0])
            self.trainingData[i][2] = rating
            # order the user
            if not self.user.has_key(userName):
                self.user[userName] = len(self.user)
                self.id2user[self.user[userName]] = userName
            # order the item
            if not self.item.has_key(itemName):
                self.item[itemName] = len(self.item)
                self.id2item[self.item[itemName]] = itemName
                # userList.append
            triple.append([self.user[userName], self.item[itemName], rating])
        self.trainingMatrix = new_sparseMatrix.SparseMatrix(triple)

        self.all_User.update(self.user)
        self.all_Item.update(self.item)
        for entry in self.testData:
            userId, itemId, rating = entry
            # order the user
            if not self.user.has_key(userId):
                self.all_User[userId] = len(self.all_User)
            # order the item
            if not self.item.has_key(itemId):
                self.all_Item[itemId] = len(self.all_Item)

            if not self.testSet_u.has_key(userId):
                self.testSet_u[userId] = {}
            self.testSet_u[userId][itemId] = rating
            if not self.testSet_i.has_key(itemId):
                self.testSet_i[itemId] = {}
            self.testSet_i[itemId][userId] = rating
示例#4
0
文件: social.py 项目: linksboy/RecQ
 def __generateSet(self):
     triple = []
     for line in self.relation:
         userId1,userId2,weight = line
         #add relations to dict
         if userId1 in self.followees:
             self.followees[userId1].update({userId2:weight})
         else:
             self.followees.update({userId1:{userId2:weight}})
         
         if userId2 in self.followers:
             self.followers[userId2].update({userId1:weight})
         else:
             self.followers.update({userId2:{userId1:weight}})
         # order the user
         if userId1 not in self.user:
             self.user[userId1] = len(self.user)
         if userId2 not in self.user:
             self.user[userId2] = len(self.user)
         triple.append([self.user[userId1], self.user[userId2], weight])
     return new_sparseMatrix.SparseMatrix(triple)
示例#5
0
    def __generateDireSet(self):
        triple = []
        for line in self.inform2:
            movieId, direId, weight = line
            #add relations to dict
            if movieId in self.md:
                self.md[movieId].append(direId)
            else:
                self.md.update({movieId: [direId]})

            if direId in self.dm:
                self.dm[direId].append(movieId)
            else:
                self.dm.update({direId: [movieId]})

            # order the movie
            if movieId not in self.item:
                self.item[movieId] = len(self.item)
            if direId not in self.dire:
                self.dire[direId] = len(self.dire)
            triple.append([self.item[movieId], self.dire[direId], weight])
        return new_sparseMatrix.SparseMatrix(triple)
示例#6
0
    def __generateActSet(self):
        triple = []
        for line in self.inform1:
            movieId, actorId, weight = line
            #add relations to dict
            if movieId in self.actors:
                self.actors[movieId].append(actorId)
            else:
                self.actors.update({movieId: [actorId]})

            if actorId in self.act:
                self.act[actorId].append(movieId)
            else:
                self.act.update({actorId: [movieId]})

            # order the movie
            if movieId not in self.item:
                self.item[movieId] = len(self.item)
            if actorId not in self.actor:
                self.actor[actorId] = len(self.actor)
            triple.append([self.item[movieId], self.actor[actorId], weight])
        return new_sparseMatrix.SparseMatrix(triple)
示例#7
0
 def loadRelationship(self, filePath):
     print 'load social data...'
     triple = []
     with open(filePath) as f:
         relations = f.readlines()
         # ignore the headline
     if self.socialConfig.contains('-header'):
         relations = relations[1:]
     # order of the columns
     order = self.socialConfig['-columns'].strip().split()
     if len(order) <= 2:
         print 'The social file is not in a correct format.'
     for line in relations:
         items = split(' |,|\t', line.strip())
         if len(order) < 2:
             print 'The social file is not in a correct format. Error: Line num %d' % lineNo
             exit(-1)
         userId1 = items[int(order[0])]
         userId2 = items[int(order[1])]
         if len(order) < 3:
             weight = 1
         else:
             weight = float(items[int(order[2])])
         #add relations to dict
         if not self.followees.has_key(userId1):
             self.followees[userId1] = {}
         self.followees[userId1][userId2] = weight
         if not self.followers.has_key(userId2):
             self.followers[userId2] = {}
         self.followers[userId2][userId1] = weight
         # order the user
         if not self.user.has_key(userId1):
             self.user[userId1] = len(self.user)
         if not self.user.has_key(userId2):
             self.user[userId2] = len(self.user)
         self.triple.append([userId1, userId2, weight])
         triple.append([self.user[userId1], self.user[userId2], weight])
     return new_sparseMatrix.SparseMatrix(triple)
示例#8
0
文件: rating.py 项目: hyliqd/RecQ
    def __loadRatings(self, file, bTest=False):
        if not bTest:
            print 'load training data...'
        else:
            print 'load test data...'
        with open(file) as f:
            ratings = f.readlines()
        #ignore the headline
        if self.ratingConfig.contains('-header'):
            ratings = ratings[1:]
        #order of the columns
        order = self.ratingConfig['-columns'].strip().split()
        #split data
        #userList= []
        u_i_r = {}
        i_u_r = {}
        triple = []
        #find the maximum rating and minimum value
        for lineNo, line in enumerate(ratings):
            items = split(' |,|\t', line.strip())
            if len(order) < 3:
                print 'The rating file is not in a correct format. Error: Line num %d' % lineNo
                exit(-1)
            userId = items[int(order[0])]
            itemId = items[int(order[1])]
            rating = items[int(order[2])]
            if float(rating) > self.rScale[0]:
                self.rScale[0] = float(rating)
            if float(rating) < self.rScale[1]:
                self.rScale[1] = float(rating)

        for lineNo, line in enumerate(ratings):
            items = split(' |,|\t', line.strip())
            if len(order) < 3:
                print 'The rating file is not in a correct format. Error: Line num %d' % lineNo
                exit(-1)
            userId = items[int(order[0])]
            itemId = items[int(order[1])]
            rating = items[int(order[2])]

            #makes the rating within the range [0, 1].
            normRating = normalize(float(rating), self.rScale[0],
                                   self.rScale[1])
            #order the user
            if not self.user.has_key(userId):
                self.user[userId] = len(self.user)
            #order the item
            if not self.item.has_key(itemId):
                self.item[itemId] = len(self.item)
            if not u_i_r.has_key(userId):
                u_i_r[userId] = []
                #userList.append(userId)
            u_i_r[userId].append([itemId, float(rating)])
            if not i_u_r.has_key(itemId):
                i_u_r[itemId] = []
            i_u_r[itemId].append([userId, float(rating)])
            if not bTest:
                self.triple.append([userId, itemId, normRating])
                triple.append(
                    [self.user[userId], self.item[itemId], normRating])

        if not bTest:
            #contruct the sparse matrix
            # data=[]
            # indices=[]
            # indptr=[]
            # offset = 0
            # for uid in userList:
            #     uRating = [r[1] for r in u_i_r[uid]]
            #     uColunms = [self.item[r[0]] for r in u_i_r[uid]]
            #     data += uRating
            #     indices += uColunms
            #     indptr .append(offset)
            #     offset += len(uRating)
            # indptr.append(offset)
            # return sparseMatrix.SparseMatrix(data, indices, indptr)
            return new_sparseMatrix.SparseMatrix(triple)
        else:
            # return testSet
            return u_i_r, i_u_r