Пример #1
0
class UserKNN(Recommender):
    def __init__(self, conf):
        super(UserKNN, self).__init__(conf)
        super(UserKNN, self).readConfiguration()
        self.userSim = SymmetricMatrix(len(self.dao.user))

    def readConfiguration(self):
        self.sim = self.config['similarity']
        self.shrinkage = int(self.config['num.shrinkage'])
        self.neighbors = int(self.config['num.neighbors'])

    def initModel(self):
        self.computeCorr()

    def predict(self, u, i):
        #find the closest neighbors of user u
        topUsers = sorted(self.userSim[u].iteritems(),
                          key=lambda d: d[1],
                          reverse=True)
        userCount = self.neighbors
        if userCount > len(topUsers):
            userCount = len(topUsers)
        #predict
        pred = 0
        denom = 0
        for n in range(userCount):
            #if user n has rating on item i
            if self.dao.rating(topUsers[n][0], i) != 0:
                corr = topUsers[n][1]
                rating = self.dao.rating(topUsers[n][0], i)
                pred += corr * rating
                denom += topUsers[n][1]
        if pred == 0:
            #no users have rating on item i,return the average rating of user u
            n = self.dao.row(u) > 0
            if sum(n[0]) == 0:  #no data about current user in training set
                return 0
            pred = float(self.dao.row(u)[0].sum() / n[0].sum())
            return round(pred, 3)
        pred = pred / float(denom)
        return round(pred, 3)

    def computeCorr(self):
        'compute correlation among users'
        print 'Computing user correlation...'
        for u1 in self.dao.testSet_u:

            for u2 in self.dao.user:
                if u1 <> u2:
                    if self.userSim.contains(u1, u2):
                        continue
                    sim = qmath.similarity(self.dao.row(u1), self.dao.row(u2),
                                           self.sim)
                    self.userSim.set(u1, u2, sim)
            print u1, 'finished.'
        print 'The user correlation has been figured out.'
Пример #2
0
class ItemKNN(Recommender):
    def __init__(self, conf):
        super(ItemKNN, self).__init__(conf)
        super(ItemKNN, self).readConfiguration()
        self.itemSim = SymmetricMatrix(len(self.dao.user))

    def readConfiguration(self):
        self.sim = self.config['similarity']
        self.shrinkage = int(self.config['num.shrinkage'])
        self.neighbors = int(self.config['num.neighbors'])

    def initModel(self):
        self.computeCorr()

    def predict(self, u, i):
        #find the closest neighbors of user u
        topItems = sorted(self.itemSim[i].iteritems(),
                          key=lambda d: d[1],
                          reverse=True)
        itemCount = self.neighbors
        if itemCount > len(topItems):
            itemCount = len(topItems)
        #predict
        pred = 0
        denom = 0
        for n in range(itemCount):
            #if user n has rating on item i
            if self.dao.rating(u, topItems[n][0]) != 0:
                corr = topItems[n][1]
                rating = self.dao.rating(u, topItems[n][0])
                pred += corr * rating
                denom += topItems[n][1]
        if pred == 0:
            #no users have rating on item i,return the average rating of user u
            n = self.dao.col(i) > 0
            if n[0].sum() == 0:  #no data about current user in training set
                return 0
            pred = float(self.dao.col(i)[0].sum() / n[0].sum())
            return round(pred, 3)
        pred = pred / float(denom)
        return round(pred, 3)

    def computeCorr(self):
        'compute correlation among users'
        print 'Computing item correlation...'
        for i1 in self.dao.testSet_i:

            for i2 in self.dao.item:
                if i1 <> i2:
                    if self.itemSim.contains(i1, i2):
                        continue
                    sim = qmath.similarity(self.dao.col(i1), self.dao.col(i2),
                                           self.sim)
                    self.itemSim.set(i1, i2, sim)
            print i1, 'finished.'
        print 'The item correlation has been figured out.'
Пример #3
0
class UserKNN(Recommender):
    def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'):
        super(UserKNN, self).__init__(conf, trainingSet, testSet, fold)
        self.userSim = SymmetricMatrix(len(self.data.name2id['user']))
        self.topUsers = {}

    def readConfiguration(self):
        super(UserKNN, self).readConfiguration()
        self.neighbors = int(self.config['num.neighbors'])

    def printAlgorConfig(self):
        "show algorithm's configuration"
        super(UserKNN, self).printAlgorConfig()
        print('Specified Arguments of', self.config['recommender'] + ':')
        print('num.neighbors:', self.config['num.neighbors'])
        print('=' * 80)

    def initModel(self):
        self.computeCorr()

    def predict(self, u):
        recommendations = []
        for item in self.data.listened[self.recType]:
            sum, denom = 0, 0
            for simUser in self.topUsers[u]:
                #if user n has rating on item i
                if simUser[0] in self.data.listened[self.recType][item]:
                    similarity = simUser[1]
                    score = self.data.listened[self.recType][item][simUser[0]]
                    sum += similarity * score
                    denom += similarity
            if sum != 0:
                score = sum / float(denom)
                recommendations.append((item, score))
        recommendations = sorted(recommendations,
                                 key=lambda d: d[1],
                                 reverse=True)
        recommendations = [item[0] for item in recommendations]
        return recommendations

    def computeCorr(self):
        'compute correlation among users'
        userListen = defaultdict(dict)
        for user in self.data.userRecord:
            for item in self.data.userRecord[user]:
                if item[self.recType] in userListen[user]:
                    userListen[user][item[self.recType]] += 1
                else:
                    userListen[user][item[self.recType]] = 0
        print('Computing user similarities...')
        for ind, u1 in enumerate(userListen):
            set1 = set(userListen[u1].keys())
            for u2 in userListen:
                if u1 != u2:
                    if self.userSim.contains(u1, u2):
                        continue
                    set2 = set(userListen[u2].keys())
                    sim = self.jaccard(set1, set2)
                    self.userSim.set(u1, u2, sim)
            self.topUsers[u1] = sorted(self.userSim[u1].items(),
                                       key=lambda d: d[1],
                                       reverse=True)[:self.neighbors]
            if ind % 100 == 0:
                print(ind, '/', len(userListen), 'finished.')
        print('The user correlation has been figured out.')

    def jaccard(self, s1, s2):
        return 2 * len(s1.intersection(s2)) / (len(s1.union(s2)) + 0.0)
Пример #4
0
class UserKNN(Recommender):
    def __init__(self,conf,trainingSet=None,testSet=None,fold='[1]'):
        super(UserKNN, self).__init__(conf,trainingSet,testSet,fold)
        self.userSim = SymmetricMatrix(len(self.dao.user))

    def readConfiguration(self):
        super(UserKNN, self).readConfiguration()
        self.sim = self.config['similarity']
        self.shrinkage =int(self.config['num.shrinkage'])
        self.neighbors = int(self.config['num.neighbors'])

    def printAlgorConfig(self):
        "show algorithm's configuration"
        super(UserKNN, self).printAlgorConfig()
        print 'Specified Arguments of',self.config['recommender']+':'
        print 'num.neighbors:',self.config['num.neighbors']
        print 'num.shrinkage:', self.config['num.shrinkage']
        print 'similarity:', self.config['similarity']
        print '='*80

    def initModel(self):
        self.computeCorr()

    def predict(self,u,i):
        #find the closest neighbors of user u
        topUsers = sorted(self.userSim[u].iteritems(),key = lambda d:d[1],reverse=True)
        userCount = self.neighbors
        if userCount > len(topUsers):
            userCount = len(topUsers)
        #predict
        sum,denom = 0,0
        for n in range(userCount):
            #if user n has rating on item i
            similarUser = topUsers[n][0]
            if self.dao.rating(similarUser,i) != 0:
                similarity = topUsers[n][1]
                rating = self.dao.rating(similarUser,i)
                sum += similarity*(rating-self.dao.userMeans[similarUser])
                denom += similarity
        if sum == 0:
            #no users have rating on item i,return the average rating of user u
            if not self.dao.containsUser(u):
                #user u has no ratings in the training set,return the global mean
                return self.dao.globalMean
            return self.dao.userMeans[u]
        pred = self.dao.userMeans[u]+sum/float(denom)
        return pred

    def computeCorr(self):
        'compute correlation among users'
        print 'Computing user correlation...'
        for u1 in self.dao.testSet_u:

            for u2 in self.dao.user:
                if u1 <> u2:
                    if self.userSim.contains(u1,u2):
                        continue
                    sim = qmath.similarity(self.dao.row(u1),self.dao.row(u2),self.sim)
                    self.userSim.set(u1,u2,sim)
            print 'user '+u1+' finished.'
        print 'The user correlation has been figured out.'
Пример #5
0
class UserKNN(Recommender):
    def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'):
        super(UserKNN, self).__init__(conf, trainingSet, testSet, fold)
        self.userSim = SymmetricMatrix(len(self.data.user))

    def readConfiguration(self):
        super(UserKNN, self).readConfiguration()
        self.sim = self.config['similarity']
        self.shrinkage = int(self.config['num.shrinkage'])
        self.neighbors = int(self.config['num.neighbors'])

    def printAlgorConfig(self):
        "show algorithm's configuration"
        super(UserKNN, self).printAlgorConfig()
        print('Specified Arguments of', self.config['recommender'] + ':')
        print('num.neighbors:', self.config['num.neighbors'])
        print('num.shrinkage:', self.config['num.shrinkage'])
        print('similarity:', self.config['similarity'])
        print('=' * 80)

    def initModel(self):
        self.topUsers = {}
        self.computeCorr()

    def predict(self, u, i):
        #find the closest neighbors of user u
        topUsers = self.topUsers[u]
        userCount = self.neighbors
        if userCount > len(topUsers):
            userCount = len(topUsers)
        #predict
        sum, denom = 0, 0
        for n in range(userCount):
            #if user n has rating on item i
            similarUser = topUsers[n][0]
            if self.data.rating(similarUser, i) != -1:
                similarity = topUsers[n][1]
                rating = self.data.rating(similarUser, i)
                sum += similarity * (rating - self.data.userMeans[similarUser])
                denom += similarity
        if sum == 0:
            #no users have rating on item i,return the average rating of user u
            if not self.data.containsUser(u):
                #user u has no ratings in the training set,return the global mean
                return self.data.globalMean
            return self.data.userMeans[u]
        pred = self.data.userMeans[u] + sum / float(denom)
        return pred

    def computeCorr(self):
        'compute correlation among users'
        print('Computing user similarities...')
        for idx, u1 in enumerate(self.data.testSet_u):

            for u2 in self.data.user:
                if u1 != u2:
                    if self.userSim.contains(u1, u2):
                        continue
                    sim = qmath.similarity(self.data.sRow(u1),
                                           self.data.sRow(u2), self.sim)
                    self.userSim.set(u1, u2, sim)
            self.topUsers[u1] = sorted(iter(self.userSim[u1].items()),
                                       key=lambda d: d[1],
                                       reverse=True)
            if idx % 100 == 0:
                print('progress:', idx, '/', len(self.data.testSet_u))

        print('The user similarities have been calculated.')

    def predictForRanking(self, u):
        print(
            'Using Memory based algorithms to rank items is extremely time-consuming. So ranking for all items in UserKNN is not available.'
        )
        exit(0)
Пример #6
0
class ItemKNN(Recommender):
    def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'):
        super(ItemKNN, self).__init__(conf, trainingSet, testSet, fold)
        self.itemSim = SymmetricMatrix(len(
            self.data.user))  #used to store the similarity among items

    def readConfiguration(self):
        super(ItemKNN, self).readConfiguration()
        self.sim = self.config['similarity']
        self.shrinkage = int(self.config['num.shrinkage'])
        self.neighbors = int(self.config['num.neighbors'])

    def printAlgorConfig(self):
        "show algorithm's configuration"
        super(ItemKNN, self).printAlgorConfig()
        print 'Specified Arguments of', self.config['recommender'] + ':'
        print 'num.neighbors:', self.config['num.neighbors']
        print 'num.shrinkage:', self.config['num.shrinkage']
        print 'similarity:', self.config['similarity']
        print '=' * 80

    def initModel(self):
        self.topItems = {}
        self.computeCorr()

    def predict(self, u, i):
        #find the closest neighbors of item i
        topItems = self.topItems[i]
        itemCount = self.neighbors
        if itemCount > len(topItems):
            itemCount = len(topItems)
        #predict
        sum = 0
        denom = 0
        for n in range(itemCount):
            similarItem = topItems[n][0]
            #if user n has rating on item i
            if self.data.contains(u, similarItem):
                similarity = topItems[n][1]
                rating = self.data.rating(u, similarItem)
                sum += similarity * (rating - self.data.itemMeans[similarItem])
                denom += similarity
        if sum == 0:
            #no items have rating on item i,return the average rating of item i
            if not self.data.containsItem(i):
                # item i has no ratings in the training set
                return self.data.globalMean
            return self.data.itemMeans[i]
        pred = self.data.itemMeans[i] + sum / float(denom)
        return pred

    def computeCorr(self):
        'compute correlation among items'
        print 'Computing item similarities...'
        for idx, i1 in enumerate(self.data.testSet_i):

            for i2 in self.data.item:
                if i1 <> i2:
                    if self.itemSim.contains(i1, i2):
                        continue
                    sim = qmath.similarity(self.data.sCol(i1),
                                           self.data.sCol(i2), self.sim)
                    self.itemSim.set(i1, i2, sim)
            self.topItems[i1] = sorted(self.itemSim[i1].iteritems(),
                                       key=lambda d: d[1],
                                       reverse=True)
            if idx % 100 == 0:
                print 'progress:', idx, '/', len(self.data.testSet_i)
        print 'The item similarities have been calculated.'
Пример #7
0
class CUNE_BPR(IterativeRecommender):
    def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'):
        super(CUNE_BPR, self).__init__(conf, trainingSet, testSet, fold)
        self.nonLeafVec = {}
        self.leafVec = {}

    def readConfiguration(self):
        super(CUNE_BPR, self).readConfiguration()
        options = config.LineConfig(self.config['CUNE-BPR'])
        self.walkCount = int(options['-T'])
        self.walkLength = int(options['-L'])
        self.walkDim = int(options['-l'])
        self.winSize = int(options['-w'])
        self.topK = int(options['-k'])
        self.s = float(options['-s'])

    def printAlgorConfig(self):
        super(CUNE_BPR, self).printAlgorConfig()
        print 'Specified Arguments of', self.config['recommender'] + ':'
        print 'Walks count per user', self.walkCount
        print 'Length of each walk', self.walkLength
        print 'Dimension of user embedding', self.walkDim
        print '=' * 80

    def buildModel(self):
        print 'Kind Note: This method will probably take much time.'
        #build C-U-NET
        print 'Building collaborative user network...'
        #filter isolated nodes
        self.itemNet = {}
        for item in self.dao.trainSet_i:
            if len(self.dao.trainSet_i[item]) > 1:
                self.itemNet[item] = self.dao.trainSet_i[item]

        self.filteredRatings = defaultdict(list)
        for item in self.itemNet:
            for user in self.itemNet[item]:
                if self.itemNet[item][user] >= 1:
                    self.filteredRatings[user].append(item)

        self.CUNet = defaultdict(list)

        for user1 in self.filteredRatings:
            for user2 in self.filteredRatings:
                if user1 <> user2:
                    weight = len(
                        set(self.filteredRatings[user1]).intersection(
                            set(self.filteredRatings[user2])))
                    if weight > 0:
                        self.CUNet[user1] += [user2] * weight

        #build Huffman Tree First
        #get weight
        print 'Building Huffman tree...'
        #To accelerate the method, the weight is estimated roughly
        nodes = {}
        for user in self.CUNet:
            nodes[user] = len(self.CUNet[user])
        nodes = sorted(nodes.iteritems(), key=lambda d: d[1])
        nodes = [HTreeNode(None, None, user[1], user[0]) for user in nodes]
        nodeList = OrderedLinkList()
        for node in nodes:
            listNode = Node()
            listNode.val = node
            try:
                nodeList.insert(listNode)
            except AttributeError:
                pass
        self.HTree = HuffmanTree(vecLength=self.walkDim)
        self.HTree.buildTree(nodeList)
        print 'Coding for all users...'
        self.HTree.coding(self.HTree.root, '', 0)

        print 'Generating random deep walks...'
        self.walks = []
        self.visited = defaultdict(dict)
        for user in self.CUNet:
            for t in range(self.walkCount):
                currentNode = user
                path = [user]
                for i in range(1, self.walkLength):
                    nextNode = self.CUNet[user][
                        randint(0, len(self.CUNet[user])) - 1]
                    count = 0
                    while (self.visited[user].has_key(nextNode)):
                        nextNode = self.CUNet[randint(0, len(self.CUNet[user]))
                                              - 1]
                        #break infinite loop
                        count += 1
                        if count == 10:
                            break
                    path.append(nextNode)
                self.walks.append(path)
                #print path
        shuffle(self.walks)

        #Training get top-k friends
        print 'Generating user embedding...'
        iteration = 1
        while iteration <= self.maxIter:
            loss = 0
            for walk in self.walks:
                for user in walk:
                    centerUser = walk[len(walk) / 2]
                    if user <> centerUser:
                        code = self.HTree.code[user]
                        centerCode = self.HTree.code[centerUser]
                        x = self.HTree.vector[centerCode]
                        for i in range(1, len(code)):
                            prefix = code[0:i]
                            w = self.HTree.vector[prefix]
                            self.HTree.vector[prefix] += self.lRate * (
                                1 - sigmoid(w.dot(x))) * x
                            self.HTree.vector[centerCode] += self.lRate * (
                                1 - sigmoid(w.dot(x))) * w
                            loss += -log(sigmoid(w.dot(x)), 2)
            print 'iteration:', iteration, 'loss:', loss
            iteration += 1
        print 'User embedding generated.'

        print 'Constructing similarity matrix...'
        self.Sim = SymmetricMatrix(len(self.CUNet))
        for user1 in self.CUNet:
            for user2 in self.CUNet:
                if user1 <> user2:
                    prefix1 = self.HTree.code[user1]
                    vec1 = self.HTree.vector[prefix1]
                    prefix2 = self.HTree.code[user2]
                    vec2 = self.HTree.vector[prefix2]
                    if self.Sim.contains(user1, user2):
                        continue
                    sim = cosine(vec1, vec2)
                    self.Sim.set(user1, user2, sim)
        self.topKSim = {}
        for user in self.CUNet:
            self.topKSim[user] = sorted(self.Sim[user].iteritems(),
                                        key=lambda d: d[1],
                                        reverse=True)[:self.topK]
        print 'Similarity matrix finished.'
        #print self.topKSim

        #prepare Pu set, IPu set, and Nu set
        print 'Preparing item sets...'
        self.PositiveSet = defaultdict(dict)
        self.IPositiveSet = defaultdict(list)
        self.NegativeSet = defaultdict(list)

        for user in self.topKSim:
            for item in self.dao.trainSet_u[user]:
                if self.dao.trainSet_u[user][item] >= 1:
                    self.PositiveSet[user][item] = 1
                else:
                    self.NegativeSet[user].append(item)

            for friend in self.topKSim[user]:
                for item in self.dao.trainSet_u[friend[0]]:
                    if not self.PositiveSet[user].has_key(item):
                        self.IPositiveSet[user].append(item)

        print 'Training...'
        iteration = 0
        while iteration < self.maxIter:
            self.loss = 0

            for user in self.PositiveSet:
                u = self.dao.user[user]
                for item in self.PositiveSet[user]:
                    if len(self.IPositiveSet[user]) > 0:
                        item_k = self.IPositiveSet[user][randint(
                            0,
                            len(self.IPositiveSet[user]) - 1)]
                        i = self.dao.item[item]
                        k = self.dao.item[item_k]
                        self.P[u] += self.lRate * (
                            1 - sigmoid(self.P[u].dot(self.Q[i]) -
                                        self.P[u].dot(self.Q[k]))) * (
                                            self.Q[i] - self.Q[k])
                        self.Q[i] += self.lRate * (
                            1 - sigmoid(self.P[u].dot(self.Q[i]) -
                                        self.P[u].dot(self.Q[k]))) * self.P[u]
                        self.Q[k] -= self.lRate * (
                            1 - sigmoid(self.P[u].dot(self.Q[i]) -
                                        self.P[u].dot(self.Q[k]))) * self.P[u]

                        item_j = ''
                        if len(self.NegativeSet[user]) > 0:
                            item_j = self.NegativeSet[user][randint(
                                0,
                                len(self.NegativeSet[user]) - 1)]
                        else:
                            item_j = self.dao.item.keys()[randint(
                                0,
                                len(self.dao.item) - 1)]
                            while (self.PositiveSet[user].has_key(item_j)):
                                item_j = self.dao.item.keys()[randint(
                                    0,
                                    len(self.dao.item) - 1)]
                        j = self.dao.item[item_j]
                        self.P[u] += (1 / self.s) * self.lRate * (1 - sigmoid(
                            (1 / self.s) *
                            (self.P[u].dot(self.Q[k]) - self.P[u].dot(
                                self.Q[j])))) * (self.Q[k] - self.Q[j])
                        self.Q[k] += (1 / self.s) * self.lRate * (1 - sigmoid(
                            (1 / self.s) *
                            (self.P[u].dot(self.Q[k]) -
                             self.P[u].dot(self.Q[j])))) * self.P[u]
                        self.Q[j] -= (1 / self.s) * self.lRate * (1 - sigmoid(
                            (1 / self.s) *
                            (self.P[u].dot(self.Q[k]) -
                             self.P[u].dot(self.Q[j])))) * self.P[u]

                        self.P[u] += self.lRate * self.regU * self.P[u]
                        self.Q[i] += self.lRate * self.regI * self.Q[i]
                        self.Q[j] += self.lRate * self.regI * self.Q[j]
                        self.Q[k] += self.lRate * self.regI * self.Q[k]

                        self.loss += log(sigmoid(self.P[u].dot(self.Q[i])-self.P[u].dot(self.Q[k]))) + \
                                     log(sigmoid((1/self.s)*(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))))

            self.loss += self.regU * (self.P * self.P).sum() + self.regI * (
                self.Q * self.Q).sum()
            iteration += 1
            if self.isConverged(iteration):
                break

    def predictForRanking(self, u):
        'invoked to rank all the items for the user'
        if self.dao.containsUser(u):
            u = self.dao.getUserId(u)
            return self.Q.dot(self.P[u])
        else:
            return [self.dao.globalMean] * len(self.dao.item)
Пример #8
0
class CUNE_MF(IterativeRecommender):
    def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'):
        super(CUNE_MF, self).__init__(conf, trainingSet, testSet, fold)
        self.nonLeafVec = {}
        self.leafVec = {}

    def readConfiguration(self):
        super(CUNE_MF, self).readConfiguration()
        options = config.LineConfig(self.config['CUNE-MF'])
        self.walkCount = int(options['-T'])
        self.walkLength = int(options['-L'])
        self.walkDim = int(options['-l'])
        self.winSize = int(options['-w'])
        self.topK = int(options['-k'])
        self.alpha = float(options['-a'])
        self.epoch = int(options['-ep'])

    def printAlgorConfig(self):
        super(CUNE_MF, self).printAlgorConfig()
        print 'Specified Arguments of', self.config['recommender'] + ':'
        print 'Walks count per user', self.walkCount
        print 'Length of each walk', self.walkLength
        print 'Dimension of user embedding', self.walkDim
        print '=' * 80

    def buildModel(self):
        print 'Kind Note: This method will probably take much time.'
        #build C-U-NET
        print 'Building collaborative user network...'
        #filter isolated nodes and low ratings

        self.itemNet = {}
        for item in self.dao.trainSet_i:
            if len(self.dao.trainSet_i[item]) > 1:
                self.itemNet[item] = self.dao.trainSet_i[item]

        self.filteredRatings = defaultdict(list)
        for item in self.itemNet:
            for user in self.itemNet[item]:
                if self.itemNet[item][user] > 0.75:
                    self.filteredRatings[user].append(item)

        self.CUNet = defaultdict(list)

        for user1 in self.filteredRatings:
            s1 = set(self.filteredRatings[user1])
            for user2 in self.filteredRatings:
                if user1 <> user2:
                    s2 = set(self.filteredRatings[user2])
                    weight = len(s1.intersection(s2))
                    if weight > 0:
                        self.CUNet[user1] += [user2] * weight

        #build Huffman Tree First
        #get weight
        print 'Building Huffman tree...'
        #To accelerate the method, the weight is estimated roughly
        nodes = {}
        for user in self.CUNet:
            nodes[user] = len(self.CUNet[user])
        nodes = sorted(nodes.iteritems(), key=lambda d: d[1])
        nodes = [HTreeNode(None, None, user[1], user[0]) for user in nodes]
        nodeList = OrderedLinkList()
        for node in nodes:
            listNode = Node()
            listNode.val = node
            try:
                nodeList.insert(listNode)
            except AttributeError:
                pass
        self.HTree = HuffmanTree(vecLength=self.walkDim)
        self.HTree.buildTree(nodeList)
        print 'Coding for all users...'
        self.HTree.coding(self.HTree.root, '', 0)

        print 'Generating random deep walks...'
        self.walks = []
        self.visited = defaultdict(dict)
        for user in self.CUNet:
            for t in range(self.walkCount):
                path = [user]
                for i in range(1, self.walkLength):
                    nextNode = choice(self.CUNet[user])
                    count = 0
                    while (self.visited[user].has_key(nextNode)):
                        nextNode = choice(self.CUNet[user])
                        #break infinite loop
                        count += 1
                        if count == 10:
                            break
                    path.append(nextNode)
                    self.visited[user][nextNode] = 1
                self.walks.append(path)
                #print path
        shuffle(self.walks)

        #Training get top-k friends
        print 'Generating user embedding...'
        iteration = 1
        while iteration <= self.epoch:
            loss = 0
            for walk in self.walks:
                for user in walk:
                    centerUser = walk[len(walk) / 2]
                    if user <> centerUser:
                        code = self.HTree.code[user]
                        centerCode = self.HTree.code[centerUser]
                        x = self.HTree.vector[centerCode]
                        for i in range(1, len(code)):
                            prefix = code[0:i]
                            w = self.HTree.vector[prefix]
                            self.HTree.vector[prefix] += self.lRate * (
                                1 - sigmoid(w.dot(x))) * x
                            self.HTree.vector[centerCode] += self.lRate * (
                                1 - sigmoid(w.dot(x))) * w
                            loss += -log(sigmoid(w.dot(x)))
            print 'iteration:', iteration, 'loss:', loss
            iteration += 1
        print 'User embedding generated.'

        print 'Constructing similarity matrix...'
        self.Sim = SymmetricMatrix(len(self.CUNet))
        for user1 in self.CUNet:
            for user2 in self.CUNet:
                if user1 <> user2:
                    prefix1 = self.HTree.code[user1]
                    vec1 = self.HTree.vector[prefix1]
                    prefix2 = self.HTree.code[user2]
                    vec2 = self.HTree.vector[prefix2]
                    if self.Sim.contains(user1, user2):
                        continue
                    sim = cosine(vec1, vec2)
                    self.Sim.set(user1, user2, sim)
        self.topKSim = {}
        for user in self.CUNet:
            self.topKSim[user] = sorted(self.Sim[user].iteritems(),
                                        key=lambda d: d[1],
                                        reverse=True)[:self.topK]
        print 'Similarity matrix finished.'
        #print self.topKSim

        #matrix decomposition
        print 'Decomposing...'

        iteration = 0
        while iteration < self.maxIter:
            self.loss = 0
            for entry in self.dao.trainingData:
                user, item, rating = entry
                u = self.dao.user[user]  #get user id
                i = self.dao.item[item]  #get item id
                error = rating - self.P[u].dot(self.Q[i])
                self.loss += error**2
                p = self.P[u]
                q = self.Q[i]

                #update latent vectors
                self.P[u] += self.lRate * (error * q - self.regU * p)
                self.Q[i] += self.lRate * (error * p - self.regI * q)

            for user in self.CUNet:

                u = self.dao.user[user]
                friends = self.topKSim[user]
                for friend in friends:
                    uf = self.dao.user[friend[0]]
                    self.P[u] -= self.lRate * (self.P[u] -
                                               self.P[uf]) * self.alpha
                    self.loss += self.alpha * (
                        self.P[u] - self.P[uf]).dot(self.P[u] - self.P[uf])

            self.loss += self.regU * (self.P * self.P).sum() + self.regI * (
                self.Q * self.Q).sum()
            iteration += 1
            if self.isConverged(iteration):
                break
Пример #9
0
class TSWalker(Recommender):
    def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'):
        super(TSWalker, self).__init__(conf, trainingSet, testSet, fold)
        self.userSim = SymmetricMatrix(len(self.dao.user))
        self.itemSim = SymmetricMatrix(len(self.dao.item))

    def readConfiguration(self):
        super(TSWalker, self).readConfiguration()
        self.sim = self.config['similarity']
        TW = LineConfig(self.config['TSWalker'])
        self.k = int(TW['-k'])
        self.v = float(TW['-v'])
        self.tw = int(TW['-tw'])

    def printAlgorConfig(self):
        "show algorithm's configuration"
        super(TSWalker, self).printAlgorConfig()
        print 'Specified Arguments of', self.config['recommender'] + ':'
        print 'similarity:', self.config['similarity']
        print 'step: %d' % self.k
        print 'Random Walk times: %d' % self.tw
        print 'The trust value of u: %f' % self.v
        print '=' * 80

    def initModel(self):
        self.computeICorr()
        self.computeUCorr()

    def predict(self, u, i):
        u0 = u
        twcount = 0
        pre = []
        rating = 0
        while twcount < self.tw:
            tk = 0
            while tk < self.k:
                u1 = choice(list(self.dao.user))
                if (u0 <> u1) and (u1 not in pre):
                    pre.append(u1)
                else:
                    continue
                pu = self.dao.getUserId(u1)
                if self.userSim[u][u1] != 1:
                    continue
                else:
                    if self.dao.rating(u1, i) != 0:
                        rating += self.dao.rating(u1, i)
                        tk += 1
                        twcount += 1
                        print 'Finished TSWalker for %d time in %d step-1' % (
                            twcount, tk)
                    else:
                        tk += 1
                        pk = self.proOfK(u1, i, tk)
                        pv = random()
                        if pv < pk:
                            uj = self.dao.trainingMatrix.matrix_User[pu].keys()
                            temp = 0
                            bitem = 0
                            for j in uj:
                                if self.itemSim[i][self.dao.id2item[j]] > temp:
                                    temp = self.itemSim[i][self.dao.id2item[j]]
                                    bitem = j
                            rating += self.dao.rating(u1,
                                                      self.dao.id2item[bitem])
                            twcount += 1
                            print 'Finished TSWalker for %d time in %d step-2' % (
                                twcount, tk)
                        else:
                            u0 = u1
        print rating
        pred = rating / float(self.tw)
        return pred

    def computeUCorr(self):
        'compute correlation among users'
        print 'Computing user correlation...'
        for u1 in self.dao.testSet_u:
            for u2 in self.dao.user:
                if u1 <> u2:
                    if self.userSim.contains(u1, u2):
                        continue
                    sim = qmath.similarity(self.dao.sRow(u1),
                                           self.dao.sRow(u2), self.sim)
                    if sim >= self.v:
                        self.userSim.set(u1, u2, 1)
                    else:
                        self.userSim.set(u1, u2, 0)
            tcount = 0
            for i in range(len(self.userSim[u1])):
                us = list(self.userSim[u1].iteritems())
                if us[i][1] == 1:
                    tcount += 1
            print 'user ' + u1 + ' finished.'
        print 'The user correlation has been figured out.'

    def computeICorr(self):
        'compute correlation among items'
        for i in self.dao.item:
            d1 = 0
            for r in self.dao.user:
                ui = self.dao.rating(r, i)
                um = self.dao.userMeans[r]
                d1 += (ui - um)**2
            for j in self.dao.item:
                if i <> j:
                    if self.itemSim.contains(i, j):
                        continue
                    aui, rui = self.dao.itemRated(i)
                    auj, ruj = self.dao.itemRated(j)
                    cuser = set(aui).intersection(set(auj))
                    sum = 0
                    d2 = 0
                    for cu in cuser:
                        rui = self.dao.rating(self.dao.id2user[cu], i)
                        ruj = self.dao.rating(self.dao.id2user[cu], j)
                        umean = self.dao.userMeans[self.dao.id2user[cu]]
                        sum += (rui - umean) * (ruj - umean)
                        d2 += (rui - umean)**2
                    try:
                        denom = sqrt(d1 * d2)
                        corr = float(sum) / denom
                    except ZeroDivisionError:
                        corr = 0
                    finally:
                        l = float(len(cuser)) / 2
                        sim = corr / (1 + exp(-l))
                        self.itemSim.set(i, j, sim)
            print 'item ' + i + ' finished.'
        print 'The item correlation has been figured out.'

    def proOfK(self, u, i, k):
        res = []
        nk = float(k) / 2
        ui, ur = self.dao.userRated(u)
        for j in ui:
            res.append(self.itemSim[i][self.dao.id2item[j]])
        denom = 1 + exp(-nk)
        nres = map(lambda x: (x / denom), res)
        return max(nres)