예제 #1
0
    def optimization_thres(self, u, i, j, user, friend):
        #print 'inner', (self.pSimilarity[user][friend]-self.threshold[user])/(self.avg_sim[user]-self.threshold[user])
        try:
            g_theta = sigmoid(
                (self.pSimilarity[user][friend] - self.threshold[user]) /
                (self.avg_sim[user] - self.threshold[user]))
        except OverflowError:
            print('threshold', self.threshold[user], 'smilarity',
                  self.pSimilarity[user][friend], 'avg', self.avg_sim[user])
            print((self.pSimilarity[user][friend] - self.threshold[user]),
                  (self.avg_sim[user] - self.threshold[user]))
            print((self.pSimilarity[user][friend] - self.threshold[user]) /
                  (self.avg_sim[user] - self.threshold[user]))
            exit(-1)
        #print 'g_theta',g_theta

        s = sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])) /
                    (1 + g_theta))
        self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j])
        self.Q[i] += self.lRate * (1 - s) * self.P[u]
        self.Q[j] -= self.lRate * (1 - s) * self.P[u]
        self.loss += -log(s)
        self.P[u] -= self.lRate * self.regU * self.P[u]
        self.Q[i] -= self.lRate * self.regI * self.Q[i]
        self.Q[j] -= self.lRate * self.regI * self.Q[j]
        t_derivative = -g_theta*(1-g_theta)*(1-s)*(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))\
                       *(self.pSimilarity[user][friend]-self.avg_sim[user])/(self.avg_sim[user]-
                       self.threshold[user])**2/(1+g_theta)**2 + 0.005*self.threshold[user]
        #print 'derivative', t_derivative
        self.thres_d[user] += t_derivative
        self.thres_count[user] += 1
예제 #2
0
 def buildModel(self):
     self.b = np.random.random(self.num_items)
     print('Training...')
     epoch = 0
     while epoch < self.maxEpoch:
         self.loss = 0
         itemList = list(self.data.item.keys())
         for user in self.PositiveSet:
             u = self.data.user[user]
             kItems = list(self.FPSet[user].keys())
             for item in self.PositiveSet[user]:
                 i = self.data.item[item]
                 if len(self.FPSet[user]) > 0:
                     item_k = choice(kItems)
                     k = self.data.item[item_k]
                     Suk = self.FPSet[user][kItems]
                     s = sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(
                         self.Q[k]) + self.b[i] - self.b[k]) / (Suk + 1))
                     self.P[u] += 1 / (Suk + 1) * self.lRate * (1 - s) * (
                         self.Q[i] - self.Q[k])
                     self.Q[i] += 1 / (Suk +
                                       1) * self.lRate * (1 - s) * self.P[u]
                     self.Q[k] -= 1 / (Suk +
                                       1) * self.lRate * (1 - s) * self.P[u]
                     item_j = choice(itemList)
                     while item_j in self.PositiveSet[
                             user] or item_j in self.FPSet:
                         item_j = choice(itemList)
                     j = self.data.item[item_j]
                     s = sigmoid(self.P[u].dot(self.Q[k]) -
                                 self.P[u].dot(self.Q[j]) + self.b[k] -
                                 self.b[j])
                     self.P[u] += self.lRate * (1 - s) * (self.Q[k] -
                                                          self.Q[j])
                     self.Q[k] += self.lRate * (1 - s) * self.P[u]
                     self.Q[j] -= self.lRate * (1 - s) * self.P[u]
                     self.P[u] -= self.lRate * self.regU * self.P[u]
                     self.Q[i] -= self.lRate * self.regI * self.Q[i]
                     self.Q[j] -= self.lRate * self.regI * self.Q[j]
                     self.Q[k] -= self.lRate * self.regI * self.Q[k]
                     self.loss += -log(sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))/ (Suk+1))) \
                                  - log(sigmoid(self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))
                 else:
                     item_j = choice(itemList)
                     while item_j in self.PositiveSet[user]:
                         item_j = choice(itemList)
                     j = self.data.item[item_j]
                     s = sigmoid(self.P[u].dot(self.Q[i]) -
                                 self.P[u].dot(self.Q[j]) + self.b[i] -
                                 self.b[j])
                     self.P[u] += self.lRate * (1 - s) * (self.Q[i] -
                                                          self.Q[j])
                     self.Q[i] += self.lRate * (1 - s) * self.P[u]
                     self.Q[j] -= self.lRate * (1 - s) * self.P[u]
                     self.loss += -log(s)
             self.loss += self.regU * (self.P * self.P).sum(
             ) + self.regI * (self.Q * self.Q).sum() + self.b.dot(self.b)
         epoch += 1
         if self.isConverged(epoch):
             break
예제 #3
0
 def optimization_theta(self, u, i, j):
     # s = sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))
     # self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j])
     # self.Q[i] += self.lRate * (1 - s) * self.P[u]
     # self.Q[j] -= self.lRate * (1 - s) * self.P[u]
     # self.loss += -log(s)
     # self.P[u] -= self.lRate * self.regU * self.P[u]
     # self.Q[i] -= self.lRate * self.regI * self.Q[i]
     # self.Q[j] -= self.lRate * self.regI * self.Q[j]
     s = sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])) /
                 (1 + 1 / self.g_theta))
     self.P[u] += self.lRate * 1 / (1 + 1 / self.g_theta) * (1 - s) * (
         self.Q[i] - self.Q[j])
     self.Q[i] += self.lRate * 1 / (1 + 1 / self.g_theta) * (1 -
                                                             s) * self.P[u]
     self.Q[j] -= self.lRate * 1 / (1 + 1 / self.g_theta) * (1 -
                                                             s) * self.P[u]
     self.loss += -log(s)
     self.P[u] -= self.lRate * self.regU * self.P[u]
     self.Q[i] -= self.lRate * self.regI * self.Q[i]
     self.Q[j] -= self.lRate * self.regI * self.Q[j]
     self.theta_derivative += self.regT * self.theta + (
         (1 - s) * (self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])) *
         (self.t_w + self.t_s - 2 * self.theta)) / (self.g_theta + 1)**2
     self.theta_count += 1
예제 #4
0
 def optimization(self, u, i, j):
     s = sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))
     self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j])
     self.Q[i] += self.lRate * (1 - s) * self.P[u]
     self.Q[j] -= self.lRate * (1 - s) * self.P[u]
     self.loss += -log(s)
     self.P[u] -= self.lRate * self.regU * self.P[u]
     self.Q[i] -= self.lRate * self.regI * self.Q[i]
     self.Q[j] -= self.lRate * self.regI * self.Q[j]
예제 #5
0
    def buildModel(self):
        print('Kind Note: This method will probably take much time.')
        #build C-U-NET
        print('Building collaborative user network...')
        #filter isolated nodes
        self.itemNet = {}
        for item in self.data.trainSet_i:
            if len(self.data.trainSet_i[item]) > 1:
                self.itemNet[item] = self.data.trainSet_i[item]

        self.filteredRatings = defaultdict(list)
        for item in self.itemNet:
            for user in self.itemNet[item]:
                if self.itemNet[item][user] >= 1:
                    self.filteredRatings[user].append(item)

        self.CUNet = defaultdict(list)

        for user1 in self.filteredRatings:
            s1 = set(self.filteredRatings[user1])
            for user2 in self.filteredRatings:
                if user1 != user2:
                    s2 = set(self.filteredRatings[user2])
                    weight = len(s1.intersection(s2))
                    if weight > 0:
                        self.CUNet[user1] += [user2] * weight

        #build Huffman Tree First
        #get weight
        # print 'Building Huffman tree...'
        # #To accelerate the method, the weight is estimated roughly
        # nodes = {}
        # for user in self.CUNet:
        #     nodes[user] = len(self.CUNet[user])
        # nodes = sorted(nodes.iteritems(),key=lambda d:d[1])
        # nodes = [HTreeNode(None,None,user[1],user[0]) for user in nodes]
        # nodeList = OrderedLinkList()
        # for node in nodes:
        #     listNode = Node()
        #     listNode.val = node
        #     try:
        #         nodeList.insert(listNode)
        #     except AttributeError:
        #         pass
        # self.HTree = HuffmanTree(vecLength=self.walkDim)
        # self.HTree.buildTree(nodeList)
        # print 'Coding for all users...'
        # self.HTree.coding(self.HTree.root,'',0)

        print('Generating random deep walks...')
        self.walks = []
        self.visited = defaultdict(dict)
        for user in self.CUNet:
            for t in range(self.walkCount):
                path = [user]
                lastNode = user
                for i in range(1, self.walkLength):
                    nextNode = choice(self.CUNet[lastNode])
                    count = 0
                    while nextNode in self.visited[lastNode]:
                        nextNode = choice(self.CUNet[lastNode])
                        #break infinite loop
                        count += 1
                        if count == 10:
                            break
                    path.append(nextNode)
                    self.visited[user][nextNode] = 1
                    lastNode = nextNode
                self.walks.append(path)
                #print path
        shuffle(self.walks)

        #Training get top-k friends
        print('Generating user embedding...')
        # epoch = 1
        # while epoch <= self.epoch:
        #     loss = 0
        #     #slide windows randomly
        #
        #     for n in range(self.walkLength/self.winSize):
        #
        #         for walk in self.walks:
        #             center = randint(0, len(walk)-1)
        #             s = max(0,center-self.winSize/2)
        #             e = min(center+self.winSize/2,len(walk)-1)
        #             for user in walk[s:e]:
        #                 centerUser = walk[center]
        #                 if user <> centerUser:
        #                     code = self.HTree.code[user]
        #                     centerCode = self.HTree.code[centerUser]
        #                     x = self.HTree.vector[centerCode]
        #                     for i in range(1,len(code)):
        #                         prefix = code[0:i]
        #                         w = self.HTree.vector[prefix]
        #                         self.HTree.vector[prefix] += self.lRate*(1-sigmoid(w.dot(x)))*x
        #                         self.HTree.vector[centerCode] += self.lRate*(1-sigmoid(w.dot(x)))*w
        #                         loss += -log(sigmoid(w.dot(x)),2)
        #     print 'epoch:', epoch, 'loss:', loss
        #     epoch+=1
        model = w2v.Word2Vec(self.walks,
                             size=self.walkDim,
                             window=5,
                             min_count=0,
                             iter=3)
        print('User embedding generated.')

        print('Constructing similarity matrix...')
        self.W = np.random.rand(self.data.trainingSize()[0], self.walkDim) / 10
        self.topKSim = {}
        i = 0
        for user1 in self.CUNet:
            # prefix1 = self.HTree.code[user1]
            # vec1 = self.HTree.vector[prefix1]
            sims = []
            u1 = self.data.user[user1]
            self.W[u1] = model.wv[user1]
            for user2 in self.CUNet:
                if user1 != user2:
                    u2 = self.data.user[user2]
                    self.W[u2] = model.wv[user2]
                    sims.append((user2, cosine(self.W[u1], self.W[u2])))
            self.topKSim[user1] = sorted(sims,
                                         key=lambda d: d[1],
                                         reverse=True)[:self.topK]
            i += 1
            if i % 200 == 0:
                print('progress:', i, '/', len(self.CUNet))
        print('Similarity matrix finished.')
        #prepare Pu set, IPu set, and Nu set
        print('Preparing item sets...')
        self.PositiveSet = defaultdict(dict)
        self.IPositiveSet = defaultdict(dict)

        for user in self.topKSim:
            for item in self.data.trainSet_u[user]:
                self.PositiveSet[user][item] = 1
            for friend in self.topKSim[user]:
                for item in self.data.trainSet_u[friend[0]]:
                    if item not in self.PositiveSet[user]:
                        self.IPositiveSet[user][item] = 1

        print('Training...')
        epoch = 0
        while epoch < self.maxEpoch:
            self.loss = 0
            itemList = list(self.data.item.keys())
            for user in self.PositiveSet:
                u = self.data.user[user]
                kItems = list(self.IPositiveSet[user].keys())
                for item in self.PositiveSet[user]:
                    i = self.data.item[item]
                    for n in range(3):  #negative sampling for 3 times
                        if len(self.IPositiveSet[user]) > 0:
                            item_k = choice(kItems)
                            k = self.data.item[item_k]
                            self.P[u] += self.lRate * (
                                1 - sigmoid(self.P[u].dot(self.Q[i]) -
                                            self.P[u].dot(self.Q[k]))) * (
                                                self.Q[i] - self.Q[k])
                            self.Q[i] += self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * \
                                         self.P[u]
                            self.Q[k] -= self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * \
                                         self.P[u]
                            item_j = choice(itemList)
                            while item_j in self.PositiveSet[
                                    user] or item_j in self.IPositiveSet:
                                item_j = choice(itemList)
                            j = self.data.item[item_j]
                            self.P[u] += (1 / self.s) * self.lRate * (
                                1 - sigmoid(
                                    (1 / self.s) *
                                    (self.P[u].dot(self.Q[k]) - self.P[u].dot(
                                        self.Q[j])))) * (self.Q[k] - self.Q[j])
                            self.Q[k] += (1 / self.s) * self.lRate * (
                                1 - sigmoid(
                                    (1 / self.s) *
                                    (self.P[u].dot(self.Q[k]) -
                                     self.P[u].dot(self.Q[j])))) * self.P[u]
                            self.Q[j] -= (1 / self.s) * self.lRate * (
                                1 - sigmoid(
                                    (1 / self.s) *
                                    (self.P[u].dot(self.Q[k]) -
                                     self.P[u].dot(self.Q[j])))) * self.P[u]

                            self.P[u] -= self.lRate * self.regU * self.P[u]
                            self.Q[i] -= self.lRate * self.regI * self.Q[i]
                            self.Q[j] -= self.lRate * self.regI * self.Q[j]
                            self.Q[k] -= self.lRate * self.regI * self.Q[k]

                            self.loss += -log(sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) - \
                                         log(sigmoid((1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j]))))
                        else:
                            item_j = choice(itemList)
                            while item_j in self.PositiveSet[user]:
                                item_j = choice(itemList)
                            j = self.data.item[item_j]
                            self.P[u] += self.lRate * (
                                1 - sigmoid(self.P[u].dot(self.Q[i]) -
                                            self.P[u].dot(self.Q[j]))) * (
                                                self.Q[i] - self.Q[j])
                            self.Q[i] += self.lRate * (
                                1 -
                                sigmoid(self.P[u].dot(self.Q[i]) -
                                        self.P[u].dot(self.Q[j]))) * self.P[u]
                            self.Q[j] -= self.lRate * (
                                1 -
                                sigmoid(self.P[u].dot(self.Q[i]) -
                                        self.P[u].dot(self.Q[j]))) * self.P[u]

                            self.loss += -log(
                                sigmoid(self.P[u].dot(self.Q[i]) -
                                        self.P[u].dot(self.Q[j])))

                self.loss += self.regU * (self.P * self.P).sum(
                ) + self.regI * (self.Q * self.Q).sum()
            epoch += 1
            if self.isConverged(epoch):
                break