예제 #1
0
    def get_performance(user_pos_test, r, auc, Ks):
        '''

        :param user_pos_test:    user 测试集中真实交互的item
        :param r:                r = [1,0,1] 表示预测TOP-K是否命中
        :param auc:              auc =0 标量
        :param Ks:               TOP-K
        :return:
        '''
        precision, recall, ndcg, hit_ratio, MAP = [], [], [], [], []

        for K in Ks:
            precision.append(metrics.precision_at_k(r, K))
            recall.append(metrics.recall_at_k(r, K, len(user_pos_test)))
            ndcg.append(metrics.ndcg_at_k(r, K))
            hit_ratio.append(metrics.hit_at_k(r, K))
            MAP.append(metrics.AP_at_k(r, K, len(user_pos_test)))

        return {'recall': np.array(recall), 'precision': np.array(precision), 'ndcg': np.array(ndcg),
                'hit_ratio': np.array(hit_ratio), 'MAP': np.array(MAP), 'auc': auc}
예제 #2
0
                val_u_is[key] = random.sample(val, max_i_num)
        preds = {}
        for u in val_u_is.keys():
            val_u_is[u] = list(val_u_is[u])
            pred_rates = [algo.predict(u, i)[0] for i in val_u_is[u]]
            rec_idx = np.argsort(pred_rates)[::-1][:args.topk]
            top_n = np.array(val_u_is[u])[rec_idx]
            preds[u] = list(top_n)
        # get actual interaction info. of validation users
        ur = defaultdict(list)
        for u in val_set.user.unique():
            ur[u] = val_set.loc[val_set.user == u, 'item'].values.tolist()
        for u in preds.keys():
            preds[u] = [1 if e in ur[u] else 0 for e in preds[u]]
        val_kpi_k = np.mean(
            [precision_at_k(r, args.topk) for r in preds.values()])
        val_kpi.append(val_kpi_k)

        # get top-N list for test users
        preds = {}
        for u in test_u_is.keys():
            test_u_is[u] = list(test_u_is[u])
            pred_rates = [algo.predict(u, i)[0] for i in test_u_is[u]]
            rec_idx = np.argsort(pred_rates)[::-1][:args.topk]
            top_n = np.array(test_u_is[u])[rec_idx]
            preds[u] = list(top_n)
        # get actual interaction info. of test users
        test_ur = defaultdict(list)
        for u in test_set.user.unique():
            test_ur[u] = test_set.loc[test_set.user == u,
                                      'item'].values.tolist()
예제 #3
0
        item_pool = list(range(dataset.item_num))
        for u in tqdm(val_user_set):
            if len(candidates[u]) < max_i_num:
                actual_cands = set(candidates[u])
                neg_item_pool = set(range(dataset.train_list[fold].shape[1])) - set(ur[u])
                neg_cands = random.sample(neg_item_pool, max_i_num - len(candidates[u])) 
                cands = actual_cands | set(neg_cands)
            else:
                cands = random.sample(candidates[u], max_i_num)
            pred_rates = algo.user_vec[u, :].dot(algo.item_vec).toarray()[0, list(cands)]
            rec_idx = np.argsort(pred_rates)[::-1][:args.topk]
            preds[u] = list(np.array(list(cands))[rec_idx])
        for u in preds.keys():
            preds[u] = [1 if i in ur[u] else 0 for i in preds[u]]

        val_kpi_k = np.mean([precision_at_k(r, args.topk) for r in preds.values()])
        val_kpi.append(val_kpi_k)

        print('Start test kpi calculation......')
        # genereate top-N list for test user set
        test_user_set = dataset.test_users
        test_ur = defaultdict(list) # u的实际交互item
        index = dataset.test.nonzero()
        for u, i in zip(index[0], index[1]):
            test_ur[u].append(i)
        candidates = defaultdict(list)
        for u in test_user_set:
            unint = np.where(dataset.train_list[fold][u, :].toarray().reshape(-1) == 0)[0] # 未交互的物品
            candidates[u] = list(set(unint) & set(test_ur[u])) # 未交互的物品中属于后续已交互的物品

        max_i_num = 1000
예제 #4
0
    for fold in range(len(train_set_list)):
        print(f'Start train validation [{fold + 1}]')
        reco = MostPopRecommender(k)
        reco.fit(train_set_list[fold])
        # get top-N list for test users
        preds = reco.predict(test_set)
        # get actual interaction info. of test users
        test_ur = defaultdict(list)
        for u in test_set.user.unique():
            test_ur[u] = test_set.loc[test_set.user == u,
                                      'item'].values.tolist()
        for u in preds.keys():
            preds[u] = [1 if e in test_ur[u] else 0 for e in preds[u]]

        # calculate metrics
        precision_k = np.mean([precision_at_k(r, k) for r in preds.values()])
        fnl_precision.append(precision_k)

        recall_k = np.mean(
            [recall_at_k(r, len(test_ur[u]), k) for u, r in preds.items()])
        fnl_recall.append(recall_k)

        map_k = map_at_k(list(preds.values()))
        fnl_map.append(map_k)

        ndcg_k = np.mean([ndcg_at_k(r, k) for r in preds.values()])
        fnl_ndcg.append(ndcg_k)

        hr_k = hr_at_k(list(preds.values()), list(preds.keys()), test_ur)
        fnl_hr.append(hr_k)