Пример #1
0
    def estimate(self, iterations=300, corrupt_value=0.):

        cost, updates = self.get_cost_ranking_updates(
            corruption_level=corrupt_value)

        train_da = theano.function([],
                                   cost,
                                   updates=updates,
                                   givens={
                                       self.x: self.train,
                                       self.o: self.train
                                   })

        for iteration in range(iterations):
            train_cost = train_da()
            print ("iterations: %3d, cost: %.6f"  % \
                (iteration + 1, train_cost))

        train_preds = self.predict(self.train)

        #####################################
        ns = [5, 10, 15]
        precisions = np.zeros(len(ns))
        recalls = np.zeros(len(ns))
        ndcgs = np.zeros(len(ns))
        maps = np.zeros(len(ns))
        auc = 0
        numberOfUsers = 0

        for i in range(self._num_user):

            train_data = self.train[i].eval()
            missing_index = np.where(train_data == 0)
            test_data = self.test[i].eval()
            #groundTruth = np.where(test_data == 1)
            groundTruth = np.where(test_data > 0.5)

            if len(groundTruth[0]) == 0:
                continue

            if len(np.where(train_data > 0)) == 0:
                continue

            numberOfUsers += 1

            train_predicted = train_preds[i].eval()

            predicted = train_preds[i].eval()
            missing_index = missing_index[0]
            predicted_missingdata = predicted[missing_index]

            index = np.argsort(predicted_missingdata)
            missing_index = np.take(missing_index, index)

            rankedList = missing_index[::-1]  # reverse

            precisions += PrecAt(rankedList, groundTruth[0], ns)
            maps += MapAt(rankedList, groundTruth[0], ns)
            ndcgs += nDCGAt(rankedList, groundTruth[0], ns)

            numDropped = self._num_item - len(rankedList)
            auc += AUC(rankedList, groundTruth[0], numDropped)

        precisions /= float(numberOfUsers)
        ndcgs /= float(numberOfUsers)
        maps /= float(numberOfUsers)
        auc /= float(numberOfUsers)

        print ("iterations: %3d, pre5: %.6f, pre10: %.6f, pre15: %.6f, ndcg5: %.6f, ndcg10: %.6f, ndcg15: %.6f, map5: %.6f, map10: %.6f, map15: %.6f, auc: %.6f" % \
            (iteration + 1, precisions[0], precisions[1], precisions[2], ndcgs[0], ndcgs[1], ndcgs[2], maps[0], maps[1], maps[2], auc))

        f = open("./result/resultAE.txt", 'a')

        data = ("iterations: %3d, pre5: %.6f, pre10: %.6f, pre15: %.6f, ndcg5: %.6f, ndcg10: %.6f, ndcg15: %.6f, map5: %.6f, map10: %.6f, map15: %.6f, auc: %.6f" % \
            (iteration + 1, precisions[0], precisions[1], precisions[2], ndcgs[0], ndcgs[1], ndcgs[2], maps[0], maps[1], maps[2], auc))

        f.write(data)
        f.close()
Пример #2
0
    def estimate(self, iterations=300, alpha_value=1, batch_size=943):

        index = T.lscalar()
        n_train_batches = self._num_user // batch_size

        output_matrix = np.asarray(self.output.eval())
        alpha_matrix = np.asarray(self.train.eval())

        alpha_matrix = output_matrix - alpha_matrix  # remains only imputated items
        alpha_matrix = alpha_matrix * (1 - alpha_value) * (
            -1) + 1  # imtutated items change to alpha and the others are 1

        alpha_matrix = shared(np.matrix(alpha_matrix,
                                        dtype=theano.config.floatX),
                              borrow=True)

        cost, updates = self.get_cost_ranking_updates(corruption_level=0.)

        train_da = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                self.x:
                self.train[index * batch_size:(index + 1) * batch_size],
                self.o:
                self.output[index * batch_size:(index + 1) * batch_size],
                self.alpha:
                alpha_matrix[index * batch_size:(index + 1) * batch_size]
            })

        for iteration in range(iterations):
            train_cost = 0
            for batch_index in range(n_train_batches):
                train_cost = train_da(batch_index)

            print ("iterations: %3d, cost: %.6f"  % \
                (iteration + 1, train_cost))

        train_preds = self.predict(self.train)

        #####################################
        ns = [5, 10, 15]
        precisions = np.zeros(len(ns))
        recalls = np.zeros(len(ns))
        ndcgs = np.zeros(len(ns))
        maps = np.zeros(len(ns))
        auc = 0
        numberOfUsers = 0

        for i in range(self._num_user):

            train_data = self.train[i].eval()
            missing_index = np.where(train_data == 0)
            test_data = self.test[i].eval()
            #groundTruth = np.where(test_data == 1)
            groundTruth = np.where(test_data > 0.5)

            if len(groundTruth[0]) == 0:
                continue

            if len(np.where(train_data > 0)) == 0:
                continue

            numberOfUsers += 1

            train_predicted = train_preds[i].eval()

            predicted = train_preds[i].eval()
            missing_index = missing_index[0]
            predicted_missingdata = predicted[missing_index]

            index = np.argsort(predicted_missingdata)
            missing_index = np.take(missing_index, index)

            rankedList = missing_index[::-1]  # reverse

            precisions += PrecAt(rankedList, groundTruth[0], ns)
            maps += MapAt(rankedList, groundTruth[0], ns)
            ndcgs += nDCGAt(rankedList, groundTruth[0], ns)

            numDropped = self._num_item - len(rankedList)
            auc += AUC(rankedList, groundTruth[0], numDropped)

        precisions /= float(numberOfUsers)
        ndcgs /= float(numberOfUsers)
        maps /= float(numberOfUsers)
        auc /= float(numberOfUsers)

        print ("iterations: %3d, pre5: %.6f, pre10: %.6f, pre15: %.6f, ndcg5: %.6f, ndcg10: %.6f, ndcg15: %.6f, map5: %.6f, map10: %.6f, map15: %.6f, auc: %.6f" % \
            (iteration + 1, precisions[0], precisions[1], precisions[2], ndcgs[0], ndcgs[1], ndcgs[2], maps[0], maps[1], maps[2], auc))

        f = open("./result/resultAE.txt", 'a')

        data = ("iterations: %3d, pre5: %.6f, pre10: %.6f, pre15: %.6f, ndcg5: %.6f, ndcg10: %.6f, ndcg15: %.6f, map5: %.6f, map10: %.6f, map15: %.6f, auc: %.6f" % \
            (iteration + 1, precisions[0], precisions[1], precisions[2], ndcgs[0], ndcgs[1], ndcgs[2], maps[0], maps[1], maps[2], auc))

        f.write(data)
        f.close()