class UserKNN(Recommender): def __init__(self, conf): super(UserKNN, self).__init__(conf) super(UserKNN, self).readConfiguration() self.userSim = SymmetricMatrix(len(self.dao.user)) def readConfiguration(self): self.sim = self.config['similarity'] self.shrinkage = int(self.config['num.shrinkage']) self.neighbors = int(self.config['num.neighbors']) def initModel(self): self.computeCorr() def predict(self, u, i): #find the closest neighbors of user u topUsers = sorted(self.userSim[u].iteritems(), key=lambda d: d[1], reverse=True) userCount = self.neighbors if userCount > len(topUsers): userCount = len(topUsers) #predict pred = 0 denom = 0 for n in range(userCount): #if user n has rating on item i if self.dao.rating(topUsers[n][0], i) != 0: corr = topUsers[n][1] rating = self.dao.rating(topUsers[n][0], i) pred += corr * rating denom += topUsers[n][1] if pred == 0: #no users have rating on item i,return the average rating of user u n = self.dao.row(u) > 0 if sum(n[0]) == 0: #no data about current user in training set return 0 pred = float(self.dao.row(u)[0].sum() / n[0].sum()) return round(pred, 3) pred = pred / float(denom) return round(pred, 3) def computeCorr(self): 'compute correlation among users' print 'Computing user correlation...' for u1 in self.dao.testSet_u: for u2 in self.dao.user: if u1 <> u2: if self.userSim.contains(u1, u2): continue sim = qmath.similarity(self.dao.row(u1), self.dao.row(u2), self.sim) self.userSim.set(u1, u2, sim) print u1, 'finished.' print 'The user correlation has been figured out.'
class ItemKNN(Recommender): def __init__(self, conf): super(ItemKNN, self).__init__(conf) super(ItemKNN, self).readConfiguration() self.itemSim = SymmetricMatrix(len(self.dao.user)) def readConfiguration(self): self.sim = self.config['similarity'] self.shrinkage = int(self.config['num.shrinkage']) self.neighbors = int(self.config['num.neighbors']) def initModel(self): self.computeCorr() def predict(self, u, i): #find the closest neighbors of user u topItems = sorted(self.itemSim[i].iteritems(), key=lambda d: d[1], reverse=True) itemCount = self.neighbors if itemCount > len(topItems): itemCount = len(topItems) #predict pred = 0 denom = 0 for n in range(itemCount): #if user n has rating on item i if self.dao.rating(u, topItems[n][0]) != 0: corr = topItems[n][1] rating = self.dao.rating(u, topItems[n][0]) pred += corr * rating denom += topItems[n][1] if pred == 0: #no users have rating on item i,return the average rating of user u n = self.dao.col(i) > 0 if n[0].sum() == 0: #no data about current user in training set return 0 pred = float(self.dao.col(i)[0].sum() / n[0].sum()) return round(pred, 3) pred = pred / float(denom) return round(pred, 3) def computeCorr(self): 'compute correlation among users' print 'Computing item correlation...' for i1 in self.dao.testSet_i: for i2 in self.dao.item: if i1 <> i2: if self.itemSim.contains(i1, i2): continue sim = qmath.similarity(self.dao.col(i1), self.dao.col(i2), self.sim) self.itemSim.set(i1, i2, sim) print i1, 'finished.' print 'The item correlation has been figured out.'
def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): super(UserKNN, self).__init__(conf, trainingSet, testSet, fold) self.userSim = SymmetricMatrix(len(self.data.name2id['user'])) self.topUsers = {}
class UserKNN(Recommender): def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): super(UserKNN, self).__init__(conf, trainingSet, testSet, fold) self.userSim = SymmetricMatrix(len(self.data.name2id['user'])) self.topUsers = {} def readConfiguration(self): super(UserKNN, self).readConfiguration() self.neighbors = int(self.config['num.neighbors']) def printAlgorConfig(self): "show algorithm's configuration" super(UserKNN, self).printAlgorConfig() print('Specified Arguments of', self.config['recommender'] + ':') print('num.neighbors:', self.config['num.neighbors']) print('=' * 80) def initModel(self): self.computeCorr() def predict(self, u): recommendations = [] for item in self.data.listened[self.recType]: sum, denom = 0, 0 for simUser in self.topUsers[u]: #if user n has rating on item i if simUser[0] in self.data.listened[self.recType][item]: similarity = simUser[1] score = self.data.listened[self.recType][item][simUser[0]] sum += similarity * score denom += similarity if sum != 0: score = sum / float(denom) recommendations.append((item, score)) recommendations = sorted(recommendations, key=lambda d: d[1], reverse=True) recommendations = [item[0] for item in recommendations] return recommendations def computeCorr(self): 'compute correlation among users' userListen = defaultdict(dict) for user in self.data.userRecord: for item in self.data.userRecord[user]: if item[self.recType] in userListen[user]: userListen[user][item[self.recType]] += 1 else: userListen[user][item[self.recType]] = 0 print('Computing user similarities...') for ind, u1 in enumerate(userListen): set1 = set(userListen[u1].keys()) for u2 in userListen: if u1 != u2: if self.userSim.contains(u1, u2): continue set2 = set(userListen[u2].keys()) sim = self.jaccard(set1, set2) self.userSim.set(u1, u2, sim) self.topUsers[u1] = sorted(self.userSim[u1].items(), key=lambda d: d[1], reverse=True)[:self.neighbors] if ind % 100 == 0: print(ind, '/', len(userListen), 'finished.') print('The user correlation has been figured out.') def jaccard(self, s1, s2): return 2 * len(s1.intersection(s2)) / (len(s1.union(s2)) + 0.0)
def __init__(self,conf,trainingSet=None,testSet=None,fold='[1]'): super(UserKNN, self).__init__(conf,trainingSet,testSet,fold) self.userSim = SymmetricMatrix(len(self.dao.user))
class UserKNN(Recommender): def __init__(self,conf,trainingSet=None,testSet=None,fold='[1]'): super(UserKNN, self).__init__(conf,trainingSet,testSet,fold) self.userSim = SymmetricMatrix(len(self.dao.user)) def readConfiguration(self): super(UserKNN, self).readConfiguration() self.sim = self.config['similarity'] self.shrinkage =int(self.config['num.shrinkage']) self.neighbors = int(self.config['num.neighbors']) def printAlgorConfig(self): "show algorithm's configuration" super(UserKNN, self).printAlgorConfig() print 'Specified Arguments of',self.config['recommender']+':' print 'num.neighbors:',self.config['num.neighbors'] print 'num.shrinkage:', self.config['num.shrinkage'] print 'similarity:', self.config['similarity'] print '='*80 def initModel(self): self.computeCorr() def predict(self,u,i): #find the closest neighbors of user u topUsers = sorted(self.userSim[u].iteritems(),key = lambda d:d[1],reverse=True) userCount = self.neighbors if userCount > len(topUsers): userCount = len(topUsers) #predict sum,denom = 0,0 for n in range(userCount): #if user n has rating on item i similarUser = topUsers[n][0] if self.dao.rating(similarUser,i) != 0: similarity = topUsers[n][1] rating = self.dao.rating(similarUser,i) sum += similarity*(rating-self.dao.userMeans[similarUser]) denom += similarity if sum == 0: #no users have rating on item i,return the average rating of user u if not self.dao.containsUser(u): #user u has no ratings in the training set,return the global mean return self.dao.globalMean return self.dao.userMeans[u] pred = self.dao.userMeans[u]+sum/float(denom) return pred def computeCorr(self): 'compute correlation among users' print 'Computing user correlation...' for u1 in self.dao.testSet_u: for u2 in self.dao.user: if u1 <> u2: if self.userSim.contains(u1,u2): continue sim = qmath.similarity(self.dao.row(u1),self.dao.row(u2),self.sim) self.userSim.set(u1,u2,sim) print 'user '+u1+' finished.' print 'The user correlation has been figured out.'
class UserKNN(Recommender): def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): super(UserKNN, self).__init__(conf, trainingSet, testSet, fold) self.userSim = SymmetricMatrix(len(self.data.user)) def readConfiguration(self): super(UserKNN, self).readConfiguration() self.sim = self.config['similarity'] self.shrinkage = int(self.config['num.shrinkage']) self.neighbors = int(self.config['num.neighbors']) def printAlgorConfig(self): "show algorithm's configuration" super(UserKNN, self).printAlgorConfig() print('Specified Arguments of', self.config['recommender'] + ':') print('num.neighbors:', self.config['num.neighbors']) print('num.shrinkage:', self.config['num.shrinkage']) print('similarity:', self.config['similarity']) print('=' * 80) def initModel(self): self.topUsers = {} self.computeCorr() def predict(self, u, i): #find the closest neighbors of user u topUsers = self.topUsers[u] userCount = self.neighbors if userCount > len(topUsers): userCount = len(topUsers) #predict sum, denom = 0, 0 for n in range(userCount): #if user n has rating on item i similarUser = topUsers[n][0] if self.data.rating(similarUser, i) != -1: similarity = topUsers[n][1] rating = self.data.rating(similarUser, i) sum += similarity * (rating - self.data.userMeans[similarUser]) denom += similarity if sum == 0: #no users have rating on item i,return the average rating of user u if not self.data.containsUser(u): #user u has no ratings in the training set,return the global mean return self.data.globalMean return self.data.userMeans[u] pred = self.data.userMeans[u] + sum / float(denom) return pred def computeCorr(self): 'compute correlation among users' print('Computing user similarities...') for idx, u1 in enumerate(self.data.testSet_u): for u2 in self.data.user: if u1 != u2: if self.userSim.contains(u1, u2): continue sim = qmath.similarity(self.data.sRow(u1), self.data.sRow(u2), self.sim) self.userSim.set(u1, u2, sim) self.topUsers[u1] = sorted(iter(self.userSim[u1].items()), key=lambda d: d[1], reverse=True) if idx % 100 == 0: print('progress:', idx, '/', len(self.data.testSet_u)) print('The user similarities have been calculated.') def predictForRanking(self, u): print( 'Using Memory based algorithms to rank items is extremely time-consuming. So ranking for all items in UserKNN is not available.' ) exit(0)
def __init__(self, conf): super(UserKNN, self).__init__(conf) super(UserKNN, self).readConfiguration() self.userSim = SymmetricMatrix(len(self.dao.user))
def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): super(ItemKNN, self).__init__(conf, trainingSet, testSet, fold) self.itemSim = SymmetricMatrix(len( self.data.user)) #used to store the similarity among items
class ItemKNN(Recommender): def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): super(ItemKNN, self).__init__(conf, trainingSet, testSet, fold) self.itemSim = SymmetricMatrix(len( self.data.user)) #used to store the similarity among items def readConfiguration(self): super(ItemKNN, self).readConfiguration() self.sim = self.config['similarity'] self.shrinkage = int(self.config['num.shrinkage']) self.neighbors = int(self.config['num.neighbors']) def printAlgorConfig(self): "show algorithm's configuration" super(ItemKNN, self).printAlgorConfig() print 'Specified Arguments of', self.config['recommender'] + ':' print 'num.neighbors:', self.config['num.neighbors'] print 'num.shrinkage:', self.config['num.shrinkage'] print 'similarity:', self.config['similarity'] print '=' * 80 def initModel(self): self.topItems = {} self.computeCorr() def predict(self, u, i): #find the closest neighbors of item i topItems = self.topItems[i] itemCount = self.neighbors if itemCount > len(topItems): itemCount = len(topItems) #predict sum = 0 denom = 0 for n in range(itemCount): similarItem = topItems[n][0] #if user n has rating on item i if self.data.contains(u, similarItem): similarity = topItems[n][1] rating = self.data.rating(u, similarItem) sum += similarity * (rating - self.data.itemMeans[similarItem]) denom += similarity if sum == 0: #no items have rating on item i,return the average rating of item i if not self.data.containsItem(i): # item i has no ratings in the training set return self.data.globalMean return self.data.itemMeans[i] pred = self.data.itemMeans[i] + sum / float(denom) return pred def computeCorr(self): 'compute correlation among items' print 'Computing item similarities...' for idx, i1 in enumerate(self.data.testSet_i): for i2 in self.data.item: if i1 <> i2: if self.itemSim.contains(i1, i2): continue sim = qmath.similarity(self.data.sCol(i1), self.data.sCol(i2), self.sim) self.itemSim.set(i1, i2, sim) self.topItems[i1] = sorted(self.itemSim[i1].iteritems(), key=lambda d: d[1], reverse=True) if idx % 100 == 0: print 'progress:', idx, '/', len(self.data.testSet_i) print 'The item similarities have been calculated.'
def buildModel(self): print 'Kind Note: This method will probably take much time.' #build C-U-NET print 'Building collaborative user network...' #filter isolated nodes self.itemNet = {} for item in self.dao.trainSet_i: if len(self.dao.trainSet_i[item]) > 1: self.itemNet[item] = self.dao.trainSet_i[item] self.filteredRatings = defaultdict(list) for item in self.itemNet: for user in self.itemNet[item]: if self.itemNet[item][user] >= 1: self.filteredRatings[user].append(item) self.CUNet = defaultdict(list) for user1 in self.filteredRatings: for user2 in self.filteredRatings: if user1 <> user2: weight = len( set(self.filteredRatings[user1]).intersection( set(self.filteredRatings[user2]))) if weight > 0: self.CUNet[user1] += [user2] * weight #build Huffman Tree First #get weight print 'Building Huffman tree...' #To accelerate the method, the weight is estimated roughly nodes = {} for user in self.CUNet: nodes[user] = len(self.CUNet[user]) nodes = sorted(nodes.iteritems(), key=lambda d: d[1]) nodes = [HTreeNode(None, None, user[1], user[0]) for user in nodes] nodeList = OrderedLinkList() for node in nodes: listNode = Node() listNode.val = node try: nodeList.insert(listNode) except AttributeError: pass self.HTree = HuffmanTree(vecLength=self.walkDim) self.HTree.buildTree(nodeList) print 'Coding for all users...' self.HTree.coding(self.HTree.root, '', 0) print 'Generating random deep walks...' self.walks = [] self.visited = defaultdict(dict) for user in self.CUNet: for t in range(self.walkCount): currentNode = user path = [user] for i in range(1, self.walkLength): nextNode = self.CUNet[user][ randint(0, len(self.CUNet[user])) - 1] count = 0 while (self.visited[user].has_key(nextNode)): nextNode = self.CUNet[randint(0, len(self.CUNet[user])) - 1] #break infinite loop count += 1 if count == 10: break path.append(nextNode) self.walks.append(path) #print path shuffle(self.walks) #Training get top-k friends print 'Generating user embedding...' iteration = 1 while iteration <= self.maxIter: loss = 0 for walk in self.walks: for user in walk: centerUser = walk[len(walk) / 2] if user <> centerUser: code = self.HTree.code[user] centerCode = self.HTree.code[centerUser] x = self.HTree.vector[centerCode] for i in range(1, len(code)): prefix = code[0:i] w = self.HTree.vector[prefix] self.HTree.vector[prefix] += self.lRate * ( 1 - sigmoid(w.dot(x))) * x self.HTree.vector[centerCode] += self.lRate * ( 1 - sigmoid(w.dot(x))) * w loss += -log(sigmoid(w.dot(x)), 2) print 'iteration:', iteration, 'loss:', loss iteration += 1 print 'User embedding generated.' print 'Constructing similarity matrix...' self.Sim = SymmetricMatrix(len(self.CUNet)) for user1 in self.CUNet: for user2 in self.CUNet: if user1 <> user2: prefix1 = self.HTree.code[user1] vec1 = self.HTree.vector[prefix1] prefix2 = self.HTree.code[user2] vec2 = self.HTree.vector[prefix2] if self.Sim.contains(user1, user2): continue sim = cosine(vec1, vec2) self.Sim.set(user1, user2, sim) self.topKSim = {} for user in self.CUNet: self.topKSim[user] = sorted(self.Sim[user].iteritems(), key=lambda d: d[1], reverse=True)[:self.topK] print 'Similarity matrix finished.' #print self.topKSim #prepare Pu set, IPu set, and Nu set print 'Preparing item sets...' self.PositiveSet = defaultdict(dict) self.IPositiveSet = defaultdict(list) self.NegativeSet = defaultdict(list) for user in self.topKSim: for item in self.dao.trainSet_u[user]: if self.dao.trainSet_u[user][item] >= 1: self.PositiveSet[user][item] = 1 else: self.NegativeSet[user].append(item) for friend in self.topKSim[user]: for item in self.dao.trainSet_u[friend[0]]: if not self.PositiveSet[user].has_key(item): self.IPositiveSet[user].append(item) print 'Training...' iteration = 0 while iteration < self.maxIter: self.loss = 0 for user in self.PositiveSet: u = self.dao.user[user] for item in self.PositiveSet[user]: if len(self.IPositiveSet[user]) > 0: item_k = self.IPositiveSet[user][randint( 0, len(self.IPositiveSet[user]) - 1)] i = self.dao.item[item] k = self.dao.item[item_k] self.P[u] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * ( self.Q[i] - self.Q[k]) self.Q[i] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * self.P[u] self.Q[k] -= self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * self.P[u] item_j = '' if len(self.NegativeSet[user]) > 0: item_j = self.NegativeSet[user][randint( 0, len(self.NegativeSet[user]) - 1)] else: item_j = self.dao.item.keys()[randint( 0, len(self.dao.item) - 1)] while (self.PositiveSet[user].has_key(item_j)): item_j = self.dao.item.keys()[randint( 0, len(self.dao.item) - 1)] j = self.dao.item[item_j] self.P[u] += (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot( self.Q[j])))) * (self.Q[k] - self.Q[j]) self.Q[k] += (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.Q[j] -= (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.P[u] += self.lRate * self.regU * self.P[u] self.Q[i] += self.lRate * self.regI * self.Q[i] self.Q[j] += self.lRate * self.regI * self.Q[j] self.Q[k] += self.lRate * self.regI * self.Q[k] self.loss += log(sigmoid(self.P[u].dot(self.Q[i])-self.P[u].dot(self.Q[k]))) + \ log(sigmoid((1/self.s)*(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k])))) self.loss += self.regU * (self.P * self.P).sum() + self.regI * ( self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break
def buildModel(self): print 'Kind Note: This method will probably take much time.' #build C-U-NET print 'Building collaborative user network...' #filter isolated nodes and low ratings self.itemNet = {} for item in self.dao.trainSet_i: if len(self.dao.trainSet_i[item]) > 1: self.itemNet[item] = self.dao.trainSet_i[item] self.filteredRatings = defaultdict(list) for item in self.itemNet: for user in self.itemNet[item]: if self.itemNet[item][user] > 0.75: self.filteredRatings[user].append(item) self.CUNet = defaultdict(list) for user1 in self.filteredRatings: s1 = set(self.filteredRatings[user1]) for user2 in self.filteredRatings: if user1 <> user2: s2 = set(self.filteredRatings[user2]) weight = len(s1.intersection(s2)) if weight > 0: self.CUNet[user1] += [user2] * weight #build Huffman Tree First #get weight print 'Building Huffman tree...' #To accelerate the method, the weight is estimated roughly nodes = {} for user in self.CUNet: nodes[user] = len(self.CUNet[user]) nodes = sorted(nodes.iteritems(), key=lambda d: d[1]) nodes = [HTreeNode(None, None, user[1], user[0]) for user in nodes] nodeList = OrderedLinkList() for node in nodes: listNode = Node() listNode.val = node try: nodeList.insert(listNode) except AttributeError: pass self.HTree = HuffmanTree(vecLength=self.walkDim) self.HTree.buildTree(nodeList) print 'Coding for all users...' self.HTree.coding(self.HTree.root, '', 0) print 'Generating random deep walks...' self.walks = [] self.visited = defaultdict(dict) for user in self.CUNet: for t in range(self.walkCount): path = [user] for i in range(1, self.walkLength): nextNode = choice(self.CUNet[user]) count = 0 while (self.visited[user].has_key(nextNode)): nextNode = choice(self.CUNet[user]) #break infinite loop count += 1 if count == 10: break path.append(nextNode) self.visited[user][nextNode] = 1 self.walks.append(path) #print path shuffle(self.walks) #Training get top-k friends print 'Generating user embedding...' iteration = 1 while iteration <= self.epoch: loss = 0 for walk in self.walks: for user in walk: centerUser = walk[len(walk) / 2] if user <> centerUser: code = self.HTree.code[user] centerCode = self.HTree.code[centerUser] x = self.HTree.vector[centerCode] for i in range(1, len(code)): prefix = code[0:i] w = self.HTree.vector[prefix] self.HTree.vector[prefix] += self.lRate * ( 1 - sigmoid(w.dot(x))) * x self.HTree.vector[centerCode] += self.lRate * ( 1 - sigmoid(w.dot(x))) * w loss += -log(sigmoid(w.dot(x))) print 'iteration:', iteration, 'loss:', loss iteration += 1 print 'User embedding generated.' print 'Constructing similarity matrix...' self.Sim = SymmetricMatrix(len(self.CUNet)) for user1 in self.CUNet: for user2 in self.CUNet: if user1 <> user2: prefix1 = self.HTree.code[user1] vec1 = self.HTree.vector[prefix1] prefix2 = self.HTree.code[user2] vec2 = self.HTree.vector[prefix2] if self.Sim.contains(user1, user2): continue sim = cosine(vec1, vec2) self.Sim.set(user1, user2, sim) self.topKSim = {} for user in self.CUNet: self.topKSim[user] = sorted(self.Sim[user].iteritems(), key=lambda d: d[1], reverse=True)[:self.topK] print 'Similarity matrix finished.' #print self.topKSim #matrix decomposition print 'Decomposing...' iteration = 0 while iteration < self.maxIter: self.loss = 0 for entry in self.dao.trainingData: user, item, rating = entry u = self.dao.user[user] #get user id i = self.dao.item[item] #get item id error = rating - self.P[u].dot(self.Q[i]) self.loss += error**2 p = self.P[u] q = self.Q[i] #update latent vectors self.P[u] += self.lRate * (error * q - self.regU * p) self.Q[i] += self.lRate * (error * p - self.regI * q) for user in self.CUNet: u = self.dao.user[user] friends = self.topKSim[user] for friend in friends: uf = self.dao.user[friend[0]] self.P[u] -= self.lRate * (self.P[u] - self.P[uf]) * self.alpha self.loss += self.alpha * ( self.P[u] - self.P[uf]).dot(self.P[u] - self.P[uf]) self.loss += self.regU * (self.P * self.P).sum() + self.regI * ( self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break
def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): super(TSWalker, self).__init__(conf, trainingSet, testSet, fold) self.userSim = SymmetricMatrix(len(self.dao.user)) self.itemSim = SymmetricMatrix(len(self.dao.item))
class TSWalker(Recommender): def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): super(TSWalker, self).__init__(conf, trainingSet, testSet, fold) self.userSim = SymmetricMatrix(len(self.dao.user)) self.itemSim = SymmetricMatrix(len(self.dao.item)) def readConfiguration(self): super(TSWalker, self).readConfiguration() self.sim = self.config['similarity'] TW = LineConfig(self.config['TSWalker']) self.k = int(TW['-k']) self.v = float(TW['-v']) self.tw = int(TW['-tw']) def printAlgorConfig(self): "show algorithm's configuration" super(TSWalker, self).printAlgorConfig() print 'Specified Arguments of', self.config['recommender'] + ':' print 'similarity:', self.config['similarity'] print 'step: %d' % self.k print 'Random Walk times: %d' % self.tw print 'The trust value of u: %f' % self.v print '=' * 80 def initModel(self): self.computeICorr() self.computeUCorr() def predict(self, u, i): u0 = u twcount = 0 pre = [] rating = 0 while twcount < self.tw: tk = 0 while tk < self.k: u1 = choice(list(self.dao.user)) if (u0 <> u1) and (u1 not in pre): pre.append(u1) else: continue pu = self.dao.getUserId(u1) if self.userSim[u][u1] != 1: continue else: if self.dao.rating(u1, i) != 0: rating += self.dao.rating(u1, i) tk += 1 twcount += 1 print 'Finished TSWalker for %d time in %d step-1' % ( twcount, tk) else: tk += 1 pk = self.proOfK(u1, i, tk) pv = random() if pv < pk: uj = self.dao.trainingMatrix.matrix_User[pu].keys() temp = 0 bitem = 0 for j in uj: if self.itemSim[i][self.dao.id2item[j]] > temp: temp = self.itemSim[i][self.dao.id2item[j]] bitem = j rating += self.dao.rating(u1, self.dao.id2item[bitem]) twcount += 1 print 'Finished TSWalker for %d time in %d step-2' % ( twcount, tk) else: u0 = u1 print rating pred = rating / float(self.tw) return pred def computeUCorr(self): 'compute correlation among users' print 'Computing user correlation...' for u1 in self.dao.testSet_u: for u2 in self.dao.user: if u1 <> u2: if self.userSim.contains(u1, u2): continue sim = qmath.similarity(self.dao.sRow(u1), self.dao.sRow(u2), self.sim) if sim >= self.v: self.userSim.set(u1, u2, 1) else: self.userSim.set(u1, u2, 0) tcount = 0 for i in range(len(self.userSim[u1])): us = list(self.userSim[u1].iteritems()) if us[i][1] == 1: tcount += 1 print 'user ' + u1 + ' finished.' print 'The user correlation has been figured out.' def computeICorr(self): 'compute correlation among items' for i in self.dao.item: d1 = 0 for r in self.dao.user: ui = self.dao.rating(r, i) um = self.dao.userMeans[r] d1 += (ui - um)**2 for j in self.dao.item: if i <> j: if self.itemSim.contains(i, j): continue aui, rui = self.dao.itemRated(i) auj, ruj = self.dao.itemRated(j) cuser = set(aui).intersection(set(auj)) sum = 0 d2 = 0 for cu in cuser: rui = self.dao.rating(self.dao.id2user[cu], i) ruj = self.dao.rating(self.dao.id2user[cu], j) umean = self.dao.userMeans[self.dao.id2user[cu]] sum += (rui - umean) * (ruj - umean) d2 += (rui - umean)**2 try: denom = sqrt(d1 * d2) corr = float(sum) / denom except ZeroDivisionError: corr = 0 finally: l = float(len(cuser)) / 2 sim = corr / (1 + exp(-l)) self.itemSim.set(i, j, sim) print 'item ' + i + ' finished.' print 'The item correlation has been figured out.' def proOfK(self, u, i, k): res = [] nk = float(k) / 2 ui, ur = self.dao.userRated(u) for j in ui: res.append(self.itemSim[i][self.dao.id2item[j]]) denom = 1 + exp(-nk) nres = map(lambda x: (x / denom), res) return max(nres)
class CUNE_BPR(IterativeRecommender): def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): super(CUNE_BPR, self).__init__(conf, trainingSet, testSet, fold) self.nonLeafVec = {} self.leafVec = {} def readConfiguration(self): super(CUNE_BPR, self).readConfiguration() options = config.LineConfig(self.config['CUNE-BPR']) self.walkCount = int(options['-T']) self.walkLength = int(options['-L']) self.walkDim = int(options['-l']) self.winSize = int(options['-w']) self.topK = int(options['-k']) self.s = float(options['-s']) self.epoch = int(options['-ep']) def printAlgorConfig(self): super(CUNE_BPR, self).printAlgorConfig() print 'Specified Arguments of', self.config['recommender'] + ':' print 'Walks count per user', self.walkCount print 'Length of each walk', self.walkLength print 'Dimension of user embedding', self.walkDim print '=' * 80 def buildModel(self): print 'Kind Note: This method will probably take much time.' #build C-U-NET print 'Building collaborative user network...' #filter isolated nodes self.itemNet = {} for item in self.dao.trainSet_i: if len(self.dao.trainSet_i[item]) > 1: self.itemNet[item] = self.dao.trainSet_i[item] self.filteredRatings = defaultdict(list) for item in self.itemNet: for user in self.itemNet[item]: if self.itemNet[item][user] >= 1: self.filteredRatings[user].append(item) self.CUNet = defaultdict(list) for user1 in self.filteredRatings: s1 = set(self.filteredRatings[user1]) for user2 in self.filteredRatings: if user1 <> user2: s2 = set(self.filteredRatings[user2]) weight = len(s1.intersection(s2)) if weight > 0: self.CUNet[user1] += [user2] * weight #build Huffman Tree First #get weight print 'Building Huffman tree...' #To accelerate the method, the weight is estimated roughly nodes = {} for user in self.CUNet: nodes[user] = len(self.CUNet[user]) nodes = sorted(nodes.iteritems(), key=lambda d: d[1]) nodes = [HTreeNode(None, None, user[1], user[0]) for user in nodes] nodeList = OrderedLinkList() for node in nodes: listNode = Node() listNode.val = node try: nodeList.insert(listNode) except AttributeError: pass self.HTree = HuffmanTree(vecLength=self.walkDim) self.HTree.buildTree(nodeList) print 'Coding for all users...' self.HTree.coding(self.HTree.root, '', 0) print 'Generating random deep walks...' self.walks = [] self.visited = defaultdict(dict) for user in self.CUNet: for t in range(self.walkCount): path = [user] for i in range(1, self.walkLength): nextNode = choice(self.CUNet[user]) count = 0 while (self.visited[user].has_key(nextNode)): nextNode = choice(self.CUNet[user]) #break infinite loop count += 1 if count == 10: break path.append(nextNode) self.visited[user][nextNode] = 1 self.walks.append(path) #print path shuffle(self.walks) #Training get top-k friends print 'Generating user embedding...' iteration = 1 while iteration <= self.epoch: loss = 0 #slide windows randomly for n in range(self.walkLength / self.winSize): for walk in self.walks: center = randint(0, len(walk) - 1) s = max(0, center - self.winSize / 2) e = min(center + self.winSize / 2, len(walk) - 1) for user in walk[s:e]: centerUser = walk[center] if user <> centerUser: code = self.HTree.code[user] centerCode = self.HTree.code[centerUser] x = self.HTree.vector[centerCode] for i in range(1, len(code)): prefix = code[0:i] w = self.HTree.vector[prefix] self.HTree.vector[prefix] += self.lRate * ( 1 - sigmoid(w.dot(x))) * x self.HTree.vector[centerCode] += self.lRate * ( 1 - sigmoid(w.dot(x))) * w loss += -log(sigmoid(w.dot(x)), 2) print 'iteration:', iteration, 'loss:', loss iteration += 1 print 'User embedding generated.' print 'Constructing similarity matrix...' self.Sim = SymmetricMatrix(len(self.CUNet)) for user1 in self.CUNet: for user2 in self.CUNet: if user1 <> user2: prefix1 = self.HTree.code[user1] vec1 = self.HTree.vector[prefix1] prefix2 = self.HTree.code[user2] vec2 = self.HTree.vector[prefix2] if self.Sim.contains(user1, user2): continue sim = cosine(vec1, vec2) self.Sim.set(user1, user2, sim) self.topKSim = {} for user in self.CUNet: self.topKSim[user] = sorted(self.Sim[user].iteritems(), key=lambda d: d[1], reverse=True)[:self.topK] print 'Similarity matrix finished.' #print self.topKSim #prepare Pu set, IPu set, and Nu set print 'Preparing item sets...' self.PositiveSet = defaultdict(dict) self.IPositiveSet = defaultdict(list) #self.NegativeSet = defaultdict(list) for user in self.topKSim: for item in self.dao.trainSet_u[user]: if self.dao.trainSet_u[user][item] >= 1: self.PositiveSet[user][item] = 1 # else: # self.NegativeSet[user].append(item) for friend in self.topKSim[user]: for item in self.dao.trainSet_u[friend[0]]: if not self.PositiveSet[user].has_key(item): self.IPositiveSet[user].append(item) print 'Training...' iteration = 0 while iteration < self.maxIter: self.loss = 0 itemList = self.dao.item.keys() for user in self.PositiveSet: u = self.dao.user[user] for item in self.PositiveSet[user]: if len(self.IPositiveSet[user]) > 0: item_k = choice(self.IPositiveSet[user]) i = self.dao.item[item] k = self.dao.item[item_k] self.P[u] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * ( self.Q[i] - self.Q[k]) self.Q[i] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * self.P[u] self.Q[k] -= self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * self.P[u] item_j = '' # if len(self.NegativeSet[user])>0: # item_j = choice(self.NegativeSet[user]) # else: item_j = choice(itemList) while (self.PositiveSet[user].has_key(item_j)): item_j = choice(itemList) j = self.dao.item[item_j] self.P[u] += (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot( self.Q[j])))) * (self.Q[k] - self.Q[j]) self.Q[k] += (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.Q[j] -= (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.Q[k] -= self.lRate * self.regI * self.Q[k] self.loss += -log(sigmoid(self.P[u].dot(self.Q[i])-self.P[u].dot(self.Q[k]))) - \ log(sigmoid((1/self.s)*(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k])))) self.loss += self.regU * (self.P * self.P).sum() + self.regI * ( self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break def predict(self, u, i): if self.dao.containsUser(u) and self.dao.containsItem(i): return sigmoid(self.P[self.dao.user[u]].dot( self.Q[self.dao.item[i]])) elif self.dao.containsUser(u) and not self.dao.containsItem(i): return self.dao.userMeans[u] elif not self.dao.containsUser(u) and self.dao.containsItem(i): return self.dao.itemMeans[i] else: return self.dao.globalMean def predictForRanking(self, u): 'invoked to rank all the items for the user' if self.dao.containsUser(u): u = self.dao.getUserId(u) return self.Q.dot(self.P[u]) else: return [self.dao.globalMean] * len(self.dao.item)
def __init__(self, conf): super(ItemKNN, self).__init__(conf) self.itemSim = SymmetricMatrix(len( self.dao.user)) #used to store the similarity among items