def optimization_thres(self, u, i, j, user, friend): #print 'inner', (self.pSimilarity[user][friend]-self.threshold[user])/(self.avg_sim[user]-self.threshold[user]) try: g_theta = sigmoid( (self.pSimilarity[user][friend] - self.threshold[user]) / (self.avg_sim[user] - self.threshold[user])) except OverflowError: print('threshold', self.threshold[user], 'smilarity', self.pSimilarity[user][friend], 'avg', self.avg_sim[user]) print((self.pSimilarity[user][friend] - self.threshold[user]), (self.avg_sim[user] - self.threshold[user])) print((self.pSimilarity[user][friend] - self.threshold[user]) / (self.avg_sim[user] - self.threshold[user])) exit(-1) #print 'g_theta',g_theta s = sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])) / (1 + g_theta)) self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * (1 - s) * self.P[u] self.Q[j] -= self.lRate * (1 - s) * self.P[u] self.loss += -log(s) self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] t_derivative = -g_theta*(1-g_theta)*(1-s)*(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))\ *(self.pSimilarity[user][friend]-self.avg_sim[user])/(self.avg_sim[user]- self.threshold[user])**2/(1+g_theta)**2 + 0.005*self.threshold[user] #print 'derivative', t_derivative self.thres_d[user] += t_derivative self.thres_count[user] += 1
def buildModel(self): self.b = np.random.random(self.num_items) print('Training...') epoch = 0 while epoch < self.maxEpoch: self.loss = 0 itemList = list(self.data.item.keys()) for user in self.PositiveSet: u = self.data.user[user] kItems = list(self.FPSet[user].keys()) for item in self.PositiveSet[user]: i = self.data.item[item] if len(self.FPSet[user]) > 0: item_k = choice(kItems) k = self.data.item[item_k] Suk = self.FPSet[user][kItems] s = sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot( self.Q[k]) + self.b[i] - self.b[k]) / (Suk + 1)) self.P[u] += 1 / (Suk + 1) * self.lRate * (1 - s) * ( self.Q[i] - self.Q[k]) self.Q[i] += 1 / (Suk + 1) * self.lRate * (1 - s) * self.P[u] self.Q[k] -= 1 / (Suk + 1) * self.lRate * (1 - s) * self.P[u] item_j = choice(itemList) while item_j in self.PositiveSet[ user] or item_j in self.FPSet: item_j = choice(itemList) j = self.data.item[item_j] s = sigmoid(self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j]) + self.b[k] - self.b[j]) self.P[u] += self.lRate * (1 - s) * (self.Q[k] - self.Q[j]) self.Q[k] += self.lRate * (1 - s) * self.P[u] self.Q[j] -= self.lRate * (1 - s) * self.P[u] self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.Q[k] -= self.lRate * self.regI * self.Q[k] self.loss += -log(sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))/ (Suk+1))) \ - log(sigmoid(self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j]))) else: item_j = choice(itemList) while item_j in self.PositiveSet[user]: item_j = choice(itemList) j = self.data.item[item_j] s = sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]) + self.b[i] - self.b[j]) self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * (1 - s) * self.P[u] self.Q[j] -= self.lRate * (1 - s) * self.P[u] self.loss += -log(s) self.loss += self.regU * (self.P * self.P).sum( ) + self.regI * (self.Q * self.Q).sum() + self.b.dot(self.b) epoch += 1 if self.isConverged(epoch): break
def optimization_theta(self, u, i, j): # s = sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])) # self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j]) # self.Q[i] += self.lRate * (1 - s) * self.P[u] # self.Q[j] -= self.lRate * (1 - s) * self.P[u] # self.loss += -log(s) # self.P[u] -= self.lRate * self.regU * self.P[u] # self.Q[i] -= self.lRate * self.regI * self.Q[i] # self.Q[j] -= self.lRate * self.regI * self.Q[j] s = sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])) / (1 + 1 / self.g_theta)) self.P[u] += self.lRate * 1 / (1 + 1 / self.g_theta) * (1 - s) * ( self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * 1 / (1 + 1 / self.g_theta) * (1 - s) * self.P[u] self.Q[j] -= self.lRate * 1 / (1 + 1 / self.g_theta) * (1 - s) * self.P[u] self.loss += -log(s) self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.theta_derivative += self.regT * self.theta + ( (1 - s) * (self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])) * (self.t_w + self.t_s - 2 * self.theta)) / (self.g_theta + 1)**2 self.theta_count += 1
def optimization(self, u, i, j): s = sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])) self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * (1 - s) * self.P[u] self.Q[j] -= self.lRate * (1 - s) * self.P[u] self.loss += -log(s) self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j]
def buildModel(self): print('Kind Note: This method will probably take much time.') #build C-U-NET print('Building collaborative user network...') #filter isolated nodes self.itemNet = {} for item in self.data.trainSet_i: if len(self.data.trainSet_i[item]) > 1: self.itemNet[item] = self.data.trainSet_i[item] self.filteredRatings = defaultdict(list) for item in self.itemNet: for user in self.itemNet[item]: if self.itemNet[item][user] >= 1: self.filteredRatings[user].append(item) self.CUNet = defaultdict(list) for user1 in self.filteredRatings: s1 = set(self.filteredRatings[user1]) for user2 in self.filteredRatings: if user1 != user2: s2 = set(self.filteredRatings[user2]) weight = len(s1.intersection(s2)) if weight > 0: self.CUNet[user1] += [user2] * weight #build Huffman Tree First #get weight # print 'Building Huffman tree...' # #To accelerate the method, the weight is estimated roughly # nodes = {} # for user in self.CUNet: # nodes[user] = len(self.CUNet[user]) # nodes = sorted(nodes.iteritems(),key=lambda d:d[1]) # nodes = [HTreeNode(None,None,user[1],user[0]) for user in nodes] # nodeList = OrderedLinkList() # for node in nodes: # listNode = Node() # listNode.val = node # try: # nodeList.insert(listNode) # except AttributeError: # pass # self.HTree = HuffmanTree(vecLength=self.walkDim) # self.HTree.buildTree(nodeList) # print 'Coding for all users...' # self.HTree.coding(self.HTree.root,'',0) print('Generating random deep walks...') self.walks = [] self.visited = defaultdict(dict) for user in self.CUNet: for t in range(self.walkCount): path = [user] lastNode = user for i in range(1, self.walkLength): nextNode = choice(self.CUNet[lastNode]) count = 0 while nextNode in self.visited[lastNode]: nextNode = choice(self.CUNet[lastNode]) #break infinite loop count += 1 if count == 10: break path.append(nextNode) self.visited[user][nextNode] = 1 lastNode = nextNode self.walks.append(path) #print path shuffle(self.walks) #Training get top-k friends print('Generating user embedding...') # epoch = 1 # while epoch <= self.epoch: # loss = 0 # #slide windows randomly # # for n in range(self.walkLength/self.winSize): # # for walk in self.walks: # center = randint(0, len(walk)-1) # s = max(0,center-self.winSize/2) # e = min(center+self.winSize/2,len(walk)-1) # for user in walk[s:e]: # centerUser = walk[center] # if user <> centerUser: # code = self.HTree.code[user] # centerCode = self.HTree.code[centerUser] # x = self.HTree.vector[centerCode] # for i in range(1,len(code)): # prefix = code[0:i] # w = self.HTree.vector[prefix] # self.HTree.vector[prefix] += self.lRate*(1-sigmoid(w.dot(x)))*x # self.HTree.vector[centerCode] += self.lRate*(1-sigmoid(w.dot(x)))*w # loss += -log(sigmoid(w.dot(x)),2) # print 'epoch:', epoch, 'loss:', loss # epoch+=1 model = w2v.Word2Vec(self.walks, size=self.walkDim, window=5, min_count=0, iter=3) print('User embedding generated.') print('Constructing similarity matrix...') self.W = np.random.rand(self.data.trainingSize()[0], self.walkDim) / 10 self.topKSim = {} i = 0 for user1 in self.CUNet: # prefix1 = self.HTree.code[user1] # vec1 = self.HTree.vector[prefix1] sims = [] u1 = self.data.user[user1] self.W[u1] = model.wv[user1] for user2 in self.CUNet: if user1 != user2: u2 = self.data.user[user2] self.W[u2] = model.wv[user2] sims.append((user2, cosine(self.W[u1], self.W[u2]))) self.topKSim[user1] = sorted(sims, key=lambda d: d[1], reverse=True)[:self.topK] i += 1 if i % 200 == 0: print('progress:', i, '/', len(self.CUNet)) print('Similarity matrix finished.') #prepare Pu set, IPu set, and Nu set print('Preparing item sets...') self.PositiveSet = defaultdict(dict) self.IPositiveSet = defaultdict(dict) for user in self.topKSim: for item in self.data.trainSet_u[user]: self.PositiveSet[user][item] = 1 for friend in self.topKSim[user]: for item in self.data.trainSet_u[friend[0]]: if item not in self.PositiveSet[user]: self.IPositiveSet[user][item] = 1 print('Training...') epoch = 0 while epoch < self.maxEpoch: self.loss = 0 itemList = list(self.data.item.keys()) for user in self.PositiveSet: u = self.data.user[user] kItems = list(self.IPositiveSet[user].keys()) for item in self.PositiveSet[user]: i = self.data.item[item] for n in range(3): #negative sampling for 3 times if len(self.IPositiveSet[user]) > 0: item_k = choice(kItems) k = self.data.item[item_k] self.P[u] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * ( self.Q[i] - self.Q[k]) self.Q[i] += self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * \ self.P[u] self.Q[k] -= self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * \ self.P[u] item_j = choice(itemList) while item_j in self.PositiveSet[ user] or item_j in self.IPositiveSet: item_j = choice(itemList) j = self.data.item[item_j] self.P[u] += (1 / self.s) * self.lRate * ( 1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot( self.Q[j])))) * (self.Q[k] - self.Q[j]) self.Q[k] += (1 / self.s) * self.lRate * ( 1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.Q[j] -= (1 / self.s) * self.lRate * ( 1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.Q[k] -= self.lRate * self.regI * self.Q[k] self.loss += -log(sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) - \ log(sigmoid((1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) else: item_j = choice(itemList) while item_j in self.PositiveSet[user]: item_j = choice(itemList) j = self.data.item[item_j] self.P[u] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * ( self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * self.P[u] self.Q[j] -= self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * self.P[u] self.loss += -log( sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) self.loss += self.regU * (self.P * self.P).sum( ) + self.regI * (self.Q * self.Q).sum() epoch += 1 if self.isConverged(epoch): break