예제 #1
0
def test_one_user(x):
    # user u's ratings for user u
    rating = x[0]
    #uid
    u = x[1]
    #user u's items in the training set
    item_num = x[2]

    data_generator = data_generator_all[x[3]]

    training_items = data_generator.train_items[u]
    #user u's items in the test set
    user_pos_test = data_generator.test_set[u]

    all_items = set(range(item_num))

    test_items = list(all_items - set(training_items))
    item_score = []
    for i in test_items:
        item_score.append((i, rating[i]))

    item_score = sorted(item_score, key=lambda x: x[1])
    item_score.reverse()
    item_sort = [x[0] for x in item_score]

    r = []
    for i in item_sort:
        if i in user_pos_test:
            r.append(1)
        else:
            r.append(0)

    recall_20 = ut.recall_at_k(r, 20, len(user_pos_test))
    recall_40 = ut.recall_at_k(r, 40, len(user_pos_test))
    recall_60 = ut.recall_at_k(r, 60, len(user_pos_test))
    recall_80 = ut.recall_at_k(r, 80, len(user_pos_test))
    recall_100 = ut.recall_at_k(r, 100, len(user_pos_test))

    ap_20 = ut.average_precision(r, 20)
    ap_40 = ut.average_precision(r, 40)
    ap_60 = ut.average_precision(r, 60)
    ap_80 = ut.average_precision(r, 80)
    ap_100 = ut.average_precision(r, 100)

    return np.array([
        recall_20, recall_40, recall_60, recall_80, recall_100, ap_20, ap_40,
        ap_60, ap_80, ap_100
    ])
예제 #2
0
def simple_test_one_user(id, reclist):
    """
    测试单个用户推荐的准确度, 结果是一个列表,包括top3, top5, top10的召回率、准确率、MRR、NDCG
    :param id: 用户id
    :param reclist: 生成的打分列表
    :return: 召回率、准确率、MRR、NDCG
    """
    global loader
    if id in trainset_dict.keys():
        for i in trainset_dict[id]:
            reclist[i] = 0
    recom = np.argsort(-reclist)
    gnd = set(testset_dict[id])
    pos = np.zeros(n_items)
    for i, e in enumerate(recom):
        if e in gnd:
            pos[i] = 1
    result = []
    for k in [3, 5, 10]:
        r = utils.recall_at_k(pos, k, len(gnd))
        p = utils.precision_at_k(pos, k)
        mrr = utils.mrr_at_k(pos, k)
        ndcg = utils.ndcg_at_k(pos, k)
        result.extend([r, p, mrr, ndcg])
    return result
예제 #3
0
def tfidf():
    print('Loading data ...')
    train_c, train_r, train_l, \
    dev_c, dev_r, dev_l, \
    test_c, test_r, test_l = load_data(ratio=0.05)

    n_dev = dev_c.shape[0]
    n_test = test_c.shape[0]

    # Transform to text corpus for tfidf vectorizer
    print('Transforming to text corpuses ...')
    _, inv_word_index, _ = load_dictionary()

    train_c_corpus = dataset_to_corpus(train_c, inv_word_index)
    train_r_corpus = dataset_to_corpus(train_r, inv_word_index)
    dev_c_corpus = dataset_to_corpus(dev_c, inv_word_index)
    dev_r_corpus = dataset_to_corpus(dev_r, inv_word_index)
    test_c_corpus = dataset_to_corpus(test_c, inv_word_index)
    test_r_corpus = dataset_to_corpus(test_r, inv_word_index)

    # Transform to tfidf vector
    print('Fitting a tfidf model ...')
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(train_c_corpus + train_r_corpus)

    print('Transforming to tfidf features ...')
    X_dev_c = vectorizer.transform(dev_c_corpus)
    X_dev_r = vectorizer.transform(dev_r_corpus)
    X_test_c = vectorizer.transform(test_c_corpus)
    X_test_r = vectorizer.transform(test_r_corpus)

    # Tfidf predict (cosine similarity)
    print('Predicting ...')

    def cos_sim(x, y):
        return x * y.transpose() / np.sqrt(
            x * x.transpose() * y * y.transpose())

    y_pred_dev = np.array(
        [cos_sim(X_dev_c[i, :], X_dev_r[i, :]) for i in range(n_dev)])
    y_pred_test = np.array(
        [cos_sim(X_test_c[i, :], X_test_r[i, :]) for i in range(n_test)])

    # Evaluate results
    print('Results:\n')
    #print('Validation set')
    #for group_size in [2, 10]:
    #    for k in [1, 2, 5]:
    #        if k >= group_size:
    #            break
    #        r = recall_at_k(y_pred_dev, k, group_size)
    #        print('recall@{} ({} options): {}'.format(k, group_size-1, r))
    #print('Testing set')
    for group_size in [2, 10]:
        for k in [1, 2, 5]:
            if k >= group_size:
                break
            r = recall_at_k(y_pred_test, k, group_size)
            print('recall@{} ({} options): {}'.format(k, group_size - 1, r))
예제 #4
0
 def get_full_sort_score(self, epoch, answers, pred_list):
     recall, ndcg = [], []
     for k in [5, 10, 15, 20]:
         recall.append(recall_at_k(answers, pred_list, k))
         ndcg.append(ndcg_k(answers, pred_list, k))
     post_fix = {
         "Epoch": epoch,
         "HIT@5": '{:.4f}'.format(recall[0]),
         "NDCG@5": '{:.4f}'.format(ndcg[0]),
         "HIT@10": '{:.4f}'.format(recall[1]),
         "NDCG@10": '{:.4f}'.format(ndcg[1]),
         "HIT@20": '{:.4f}'.format(recall[3]),
         "NDCG@20": '{:.4f}'.format(ndcg[3])
     }
     print(post_fix)
     with open(self.args.log_file, 'a') as f:
         f.write(str(post_fix) + '\n')
     return [recall[0], ndcg[0], recall[1], ndcg[1], recall[3],
             ndcg[3]], str(post_fix)
예제 #5
0
def simple_test_one_user(x):
    id, lst, idx = x[0], x[1], x[2]
    global loader
    if id in loader.trainset_dict.keys():
        for i in loader.trainset_dict[id]:
            lst[i] = 0
    recom = np.argsort(-lst)
    gnd = set(loader.testset_dict[id])
    pos = np.zeros(opt.n_item)
    for i, e in enumerate(recom):
        if e in gnd:
            pos[i] = 1
    result = []
    for k in [3, 5, 10]:
        r = utils.recall_at_k(pos, k, len(gnd))
        p = utils.precision_at_k(pos, k)
        mrr = utils.mrr_at_k(pos, k)
        ndcg = utils.ndcg_at_k(pos, k)
        result.extend([r, p, mrr, ndcg])
    return result