def generate_reward(gold_index_list, answer_index_list):
    reward = 0
    ap = 0
    reciprocal_rank = 0
    answer_list = list(answer_index_list)
    size = len(answer_index_list)
    true = sum(gold_index_list > 0)
    inp = np.zeros(size)
    for rank, val in enumerate(gold_index_list):
        if val and rank in answer_list:
            inp[answer_list.index(rank)] = val
    maxk = sum(inp > 0)
    if true:
        ap = average_precision(inp) * (maxk / true)
    reciprocal_rank = mean_reciprocal_rank([inp])
    ndcg = ndcg_at_k(inp, min(10, size))
    dcg_five = dcg_at_k(inp, 5)
    reward = (ap + reciprocal_rank + ndcg + dcg_five) / 4
    ranks = [1, 3, 5, 10]
    reward_tuple = [reward, ap, reciprocal_rank, ndcg, dcg_five]
    for r in ranks:
        reward_tuple.append(precision_at_k(inp, min(r, len(inp))))
    for r in ranks:
        reward_tuple.append(ndcg_at_k(inp, min(r, len(inp))))
    return reward_tuple
示例#2
0
def evalResults(results, trueRelevance, noveltyList, trainModelIDs, rev_dict,
                uid, alg, params, rec, outFile, diversity, novelty):
    params = [str(i) for i in params]
    #calculate rating precision
    mmScaler = MinMaxScaler(copy=True)
    results = mmScaler.fit_transform(results.reshape(-1, 1))
    results = results.reshape((-1, ))
    r2Sc = r2_score(trueRelevance, results)
    mae = mean_absolute_error(trueRelevance, results)

    #calculate ranking scores
    idx = (-results).argsort()

    if diversity == "yes":
        reranked = mmr_sorted(range(len(results)), 0.8, results, rev_dict, 10)
        idx1 = [k for k, v in reranked.items()]
        idx2 = [i for i in idx if i not in idx1]
        idx1.extend(idx2)
        idx = idx1

    rankedRelevance = trueRelevance[idx]
    rankedNovelty = noveltyList[idx]

    #print(rankedRelevance)

    map = rank_metrics.average_precision(rankedRelevance)
    aucSc = roc_auc_score(trueRelevance, results)
    nDCG10 = rank_metrics.ndcg_at_k(rankedRelevance, 10)
    nDCG100 = rank_metrics.ndcg_at_k(rankedRelevance, 100)
    nDCG = rank_metrics.ndcg_at_k(rankedRelevance, len(rankedRelevance))

    p5 = prec_at_n(rankedRelevance, 5)
    r5 = rec_at_n(rankedRelevance, 5)
    n5 = meanNovelty_at_n(rankedNovelty, 5)
    un5 = user_novelty_at_n(idx, trainModelIDs, 5)
    ild5 = ild_at_n(idx, rev_dict, 5)
    p10 = prec_at_n(rankedRelevance, 10)
    r10 = rec_at_n(rankedRelevance, 10)
    n10 = meanNovelty_at_n(rankedNovelty, 10)
    ild10 = ild_at_n(idx, rev_dict, 10)
    un10 = user_novelty_at_n(idx, trainModelIDs, 10)

    mrr = rank_metrics.mean_reciprocal_rank([rankedRelevance])

    #print((uid, alg, ",".join(params), rec, r2Sc, mae, map, aucSc, mrr, p5, p10, r5, r10, nDCG10, nDCG100, nDCG))

    txt = "%s;%s;%s;%s;%s;%s;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f\n" % (
        uid, alg, ",".join(params), rec, diversity, novelty, r2Sc, mae, map,
        aucSc, mrr, p5, p10, r5, r10, nDCG10, nDCG100, nDCG, n5, n10, un5,
        un10, ild5, ild10)
    outFile.write(txt)
    return (r2Sc, mae, map, aucSc, mrr, p5, p10, r5, r10, nDCG10, nDCG100,
            nDCG, n5, n10, ild5, ild10)
    def test_ndcg(self):
        r = [3, 2, 3, 0, 1, 2]

        # Already best ranked
        r_best = [5, 5, 5, 4, 4, 3]
        self.assertLessEqual(ndcg_at_k(
            r,
            1,
        ), 1.0)
        self.assertAlmostEqual(ndcg_at_k(r, 6, method=0), 0.961, places=3)
        self.assertLessEqual(ndcg_at_k(
            r_best,
            4,
        ), 1.0)
示例#4
0
def per_user_rankings(test_data, test_label, scores):
    unique_users = np.unique(test_data[:, 0])
    user_array = test_data[:, 0]
    ndcg = []
    aupr_list = []
    auc_list = []
    for u in unique_users:
        indices_u = np.in1d(user_array, [u])
        labels_u = test_label[indices_u].astype(float)
        scores_u = scores[indices_u].astype(float)
        #ndcg is calculated only for the users with some positive examples
        if not all(i <= 0.001 for i in labels_u):
            tmp = np.c_[labels_u, scores_u]
            tmp = tmp[tmp[:, 1].argsort()[::-1], :]
            ordered_labels = tmp[:, 0]
            ndcg_u = rank.ndcg_at_k(ordered_labels, ordered_labels.shape[0], 1)
            ndcg.append(ndcg_u)

            prec, rec, thr = precision_recall_curve(labels_u, scores_u)
            aupr_val = auc(rec, prec)
            aupr_list.append(aupr_val)

            fpr, tpr, thr = roc_curve(labels_u, scores_u)
            auc_val = auc(fpr, tpr)
            auc_list.append(auc_val)
    return np.array([ndcg, aupr_list, auc_list])
示例#5
0
    def evals(self, x_test, y_test, top_n):
        user_item = {}
        m, n = x_test.shape
        for i in range(m):
            u, i, r = x_test[i][0], x_test[i][1], y_test[i]
            user_item.setdefault(u, {})
            user_item[u][i] = r
        recommend_dict = self.rec_top(top_n)

        ndcg = []
        for u in recommend_dict:
            temp = []
            for i in recommend_dict[u]:
                if i in user_item[u]:
                    temp.append(user_item[u][i])
                else:
                    temp.append(0)
            ndcg.append(rank_metrics.ndcg_at_k(temp, top_n))
        print('ndcg:%f' % np.mean(ndcg))
        p, r = 0., 0.
        for u in recommend_dict:
            cm_users = set(user_item[u]) & set(recommend_dict[u])
            p += len(cm_users) / top_n
            r += len(cm_users) / len(user_item[u])
        precision = p / len(recommend_dict)
        recall = r / len(recommend_dict)
        print("precision=%f\nrecall=%f" % (precision, recall))
示例#6
0
def compute_metrics(model, criterion, loader, k=5):
    global GPU_AVAILABLE

    loss = 0.
    p_at_1 = 0
    p_at_k = 0
    ndcg = 0

    for X, Y in loader:
        X = Variable(X)
        Y = [Variable(y) for y in Y]
        if GPU_AVAILABLE:
            X = X.cuda()
            Y = [y.cuda() for y in Y]

        outputs = model(Y)

        loss += criterion(outputs).data.item()

        if GPU_AVAILABLE:
            outputs = [out.cpu() for out in outputs]

        outputs = [out.data.numpy().squeeze() for out in outputs]

        idxs = [np.argsort(out)[::-1] for out in outputs]
        p_at_1 += sum([np.mean(idx[:1] < 1) for idx in idxs])
        p_at_k += sum([np.mean(idx[:k] < k) for idx in idxs])
        ndcg += sum(
            [ndcg_at_k(out.tolist(), k=k, method=0) for out in outputs])

    N = len(loader.dataset)
    return loss / N, p_at_1 / N, p_at_k / N, ndcg / N
示例#7
0
def compute_metrics(ranked_judgements, pr_atk, threshold_grade):
    """
    Given the ranked judgements compute the metrics for a query.
    :param ranked_judgements: list(int); graded or binary relevances in rank order.
    :param pr_atk: int; the @K value to use for computing precision and recall.
    :param threshold_grade: int; Assuming 0-3 graded relevances, threshold at some point
        and convert graded to binary relevance.
    :return:
    """
    graded_judgements = ranked_judgements
    ranked_judgements = [
        1 if rel >= threshold_grade else 0 for rel in graded_judgements
    ]
    # Use the full set of candidate not the pr_atk.
    ndcg = rm.ndcg_at_k(graded_judgements, len(ranked_judgements))
    ndcg_pr = rm.ndcg_at_k(graded_judgements,
                           int(0.20 * len(ranked_judgements)))
    ndcg_20 = rm.ndcg_at_k(graded_judgements, 20)
    max_total_relevant = sum(ranked_judgements)
    recall = recall_at_k(ranked_rel=ranked_judgements,
                         atk=pr_atk,
                         max_total_relevant=max_total_relevant)
    precision = rm.precision_at_k(r=ranked_judgements, k=pr_atk)
    r_precision = rm.r_precision(r=ranked_judgements)
    f1 = 2 * precision * recall / (precision + recall) if (precision +
                                                           recall) > 0 else 0.0
    av_precision = rm.average_precision(r=ranked_judgements)
    reciprocal_rank = rm.mean_reciprocal_rank(rs=[ranked_judgements])
    metrics = {
        'recall': float(recall),
        'precision': float(precision),
        'f1': float(f1),
        'r_precision': float(r_precision),
        'av_precision': float(av_precision),
        'reciprocal_rank': float(reciprocal_rank),
        'ndcg': ndcg,
        'ndcg@20': ndcg_20,
        'ndcg%20': ndcg_pr
    }
    return metrics
示例#8
0
def compute_metrics(model, loader, k=5, mode='bilinear'):
    global GPU_AVAILABLE

    p_at_1 = 0
    p_at_k = 0
    ndcg = 0

    for X, Y in loader:
        X = Variable(X)
        Y = [Variable(y) for y in Y]
        if GPU_AVAILABLE:
            X = X.cuda()
            Y = [y.cuda() for y in Y]

        if mode == "bilinear":
            outputs = model(X, Y)
            if GPU_AVAILABLE:
                outputs = [out.cpu() for out in outputs]
            outputs = [out.data.numpy().squeeze() for out in outputs]

        elif mode == "project_x":
            X_proj = model.project_x(X).data.numpy()
            X_proj = normalize_rows(X_proj)
            Y = [y.data.numpy() for y in Y]
            outputs = [
                x.reshape(1, -1).dot(np.atleast_2d(y).T).squeeze()
                for x, y in zip(X_proj, Y)
            ]

        elif mode == "project_y":
            Y_proj = [model.project_y(y).data.numpy() for y in Y]
            Y_proj = [normalize_rows(y) for y in Y_proj]
            X = X.data.numpy()
            outputs = [
                x.reshape(1, -1).dot(np.atleast_2d(y).T).squeeze()
                for x, y in zip(X, Y_proj)
            ]

        elif mode == "random":
            outputs = [np.random.random(len(y)) for y in Y]

        else:
            raise ValueError("not a valid mode")

        idxs = [np.argsort(out)[::-1] for out in outputs]
        p_at_1 += sum([np.mean(idx[:1] < 1) for idx in idxs])
        p_at_k += sum([np.mean(idx[:k] < k) for idx in idxs])
        ndcg += sum(
            [ndcg_at_k(out.tolist(), k=k, method=0) for out in outputs])

    N = len(loader.dataset)
    return p_at_1 / N, p_at_k / N, ndcg / N
示例#9
0
def calc_NDCG(prediction, target, k=5):
	"""This needs to work out the relevances and once the NDCG is calculated
	then all of the NDCG for each query is added together and divided by 
	the total amount of queries giving a mean NDCG score. All the inputs need
	to be numpy.ndarray"""
	
	rel = prediction == target
	all_NDCG = np.zeros(target.shape)

	for i, relevance in enumerate(rel):
		all_NDCG[i,0] = ndcg_at_k(relevance, k, 1)

	NDCG = all_NDCG.sum()/len(all_NDCG)

	return NDCG
示例#10
0
def calc_NDCG(prediction, target, k=5):
    """This needs to work out the relevances and once the NDCG is calculated
	then all of the NDCG for each query is added together and divided by 
	the total amount of queries giving a mean NDCG score. All the inputs need
	to be numpy.ndarray"""

    rel = prediction == target
    all_NDCG = np.zeros(target.shape)

    for i, relevance in enumerate(rel):
        all_NDCG[i, 0] = ndcg_at_k(relevance, k, 1)

    NDCG = all_NDCG.sum() / len(all_NDCG)

    return NDCG
示例#11
0
def normalized_discounted_cummulative_gain(test_data, test_label, scores):
    unique_users = np.unique(test_data[:, 0])
    user_array = test_data[:, 0]
    ndcg = []
    for u in unique_users:
        indices_u = np.in1d(user_array, [u])
        labels_u = test_label[indices_u].astype(float)
        scores_u = scores[indices_u].astype(float)
        #ndcg is calculated only for the users with some positive examples
        if not all(i <= 0.001 for i in labels_u):
            tmp = np.c_[labels_u, scores_u]
            tmp = tmp[tmp[:, 1].argsort()[::-1], :]
            ordered_labels = tmp[:, 0]
            ndcg_u = rank.ndcg_at_k(ordered_labels, ordered_labels.shape[0], 1)
            ndcg.append(ndcg_u)
    return np.mean(ndcg)
示例#12
0
def generate_reward(gold_index_list, answer_index_list, reward_type):
    reward = 0
    ap = 0.
    reciprocal_rank = 0
    answer_list = list(answer_index_list)
    size = len(answer_index_list)
    true = sum(gold_index_list > 0)
    inp = np.zeros(size)
    for rank, val in enumerate(gold_index_list):
        if val and rank in answer_list:
            inp[answer_list.index(rank)] = val
    maxk = sum(inp > 0)
    if true:
        ap = average_precision(inp) * (maxk / true)
    reciprocal_rank = mean_reciprocal_rank([inp])
    ndcg = ndcg_at_k(inp, min(10, size))
    dcg_five = dcg_at_k(inp, 5)
    reward = rewards[reward_type - 1](inp, ap, reciprocal_rank, ndcg, dcg_five)
    return reward, ap, reciprocal_rank, ndcg, dcg_five
示例#13
0
def evaluate(model, data, logdir, epoch, out_f, gpu):
    get_embedding(model, data, logdir, gpu, test=False)
    img_embeddings, img_fns, gel_embeddings, gel_fns = get_embedding(model,
                                                                     data,
                                                                     logdir,
                                                                     gpu,
                                                                     test=True)
    precision = get_score(img_embeddings, img_fns, gel_embeddings, gel_fns)
    return precision

    nb_img = len(img_embeddings)
    nb_gel = len(gel_embeddings)
    distance_matrix = np.zeros((nb_gel, nb_img))
    img_embeddings = np.array(img_embeddings)
    gel_embeddings = np.array(gel_embeddings)
    dim_embedding = img_embeddings.shape[-1]
    img_embeddings = img_embeddings.reshape((nb_img, dim_embedding))
    gel_embeddings = gel_embeddings.reshape((nb_gel, dim_embedding))

    scores = []
    for i in range(nb_gel):
        distance_matrix[i, :] = np.mean(np.square(img_embeddings -
                                                  gel_embeddings[i, :]),
                                        axis=1).T

        r = []
        for j in range(nb_img):
            if (get_gel_id(img_fns[j]) == get_gel_id(gel_fns[i])):
                r.append(1)
            else:
                r.append(0)
        d = distance_matrix[i, :].tolist()
        a = zip(d, r)
        a = sorted(a, key=lambda d: d[0])
        r = [x[1] for x in a]
        ndcg = [rank_metrics.ndcg_at_k(r, k) for k in [10, 20, 30]]
        precision = [rank_metrics.precision_at_k(r, k) for k in [10, 20, 30]]
        scores.append(ndcg + precision)

    scores = np.array(scores)
    scores = np.mean(scores, axis=0)
    print "ndcg & precision", scores
    print >> out_f, "ndcg & precision", scores
示例#14
0
def evaluate_retrieval(query_dct, corpus_dct, inverted_index, method_type):
    '''
    Given a query dictionary and a corpus dictionary, go through each query and
    determine the NDCG for its retrieval with the disease labels as relevance
    measures.
    '''
    metric_dct = {}

    for query_key in query_dct:
        doc_score_dct = {}

        q_disease_list, q_symptom_list, q_herb_list = query_dct[query_key]

        for doc_key in corpus_dct:
            d_disease_list, d_symptom_list, d_herb_list = corpus_dct[doc_key]

            # With no query expansion, our document is just the set of symptoms.
            document = d_symptom_list[:]
            if 'mixed' in method_type or 'synonym' in method_type:
                document += d_herb_list

            # If expanded, q_symptom list might also contain herbs.
            doc_score = okapi_bm25(q_symptom_list, document, inverted_index,
                                   len(corpus_dct))
            # Compute the relevance judgement.
            relevance = get_rel_score(q_disease_list, d_disease_list)
            doc_score_dct[(doc_key, relevance)] = doc_score

        sorted_scores = sorted(doc_score_dct.items(),
                               key=operator.itemgetter(1),
                               reverse=True)
        # Get the relevance rankings.
        rel_list = [pair[0][1] for pair in sorted_scores]

        # Compute different rank metrics for different values of k.
        for k in k_list:
            if k not in metric_dct:
                metric_dct[k] = []
            if rank_metric == 'ndcg':
                metric_dct[k] += [ndcg_at_k(rel_list, k)]
            elif rank_metric == 'precision':
                metric_dct[k] += [precision_at_k(rel_list, k)]
    return metric_dct
示例#15
0
 def summarize(self):
     """Give summary statistics about the tournament."""
     res = self.run()
     # res = self.results
     # champ should be undefeated
     champ = list(np.where(res.strength == max(res.strength))[0])
     copeland = (res.wins[champ] == self.n_rounds)
     # top-k
     ranks = pd.DataFrame(data=np.transpose([
         res.strength.rank(ascending=False),
         res.wins.rank(ascending=False), res.wins
     ]),
                          columns=["str_rank", "win_rank", "wins"])
     ranks['relevant'] = ranks['str_rank'] <= self.k
     borda = (ranks.win_rank[champ] == ranks.win_rank.min())
     top_k_df = ranks.loc[ranks['str_rank'] <= self.k]
     top_k = sum(top_k_df['wins'] >= self.n_rounds - 2) / self.k
     tau, k_p = scipy.stats.kendalltau(ranks.str_rank, ranks.win_rank)
     rho, sp_p = scipy.stats.spearmanr(ranks.str_rank, ranks.win_rank)
     ranks.sort_values(by="win_rank")
     # using rank_metrics
     rel_vec = ranks.relevant.values
     prec = rank_metrics.r_precision(rel_vec)
     prec_at_k = rank_metrics.precision_at_k(rel_vec, self.k)
     avg_prec = rank_metrics.average_precision(rel_vec)
     dcg = rank_metrics.dcg_at_k(rel_vec, self.k)
     ndcg = rank_metrics.ndcg_at_k(rel_vec, self.k)
     df = pd.DataFrame(data=[
         list([
             int(copeland),
             int(borda),
             float(top_k), prec, prec_at_k, avg_prec, dcg, ndcg,
             float(tau),
             float(rho)
         ])
     ],
                       columns=[
                           'undef_champ', 'top_champ', 'top_k_found',
                           'precision', 'precision_at_k', 'avg_prec', 'dcg',
                           'ndcg', 'tau', 'rho'
                       ])
     return df
示例#16
0
    def evaluate(self, ratings: Dict[int, List[int]], negatives: Dict[int, List[int]], topN: int):
        """
        evaluate performance of models
        :param ratings: key: user, value: list of positive items
        :param negatives: key: user, value: list of negative items
        :param topN: int
        :return:
        """
        ndcgs, apks, recalls = [], [], []
        for user in sorted(ratings.keys()):
            pos_items = ratings[user]
            neg_items = negatives[user]
            assert type(pos_items) == list and type(neg_items) == list

            items = neg_items + pos_items
            users = np.full(len(items), user, dtype=np.int64)
            items = np.asarray(items)
            predictions = self.predict(users, items)
            labels = [0.0] * len(neg_items) + [1.0] * len(pos_items)
            labels = np.array(labels)
            # compute metric here

            indices = np.argsort(-predictions)[:topN]  # indices of items with highest scores
            ranklist = labels[indices]
            ndcg = rank_metrics.ndcg_at_k(ranklist, topN)
            _, recall = rank_metrics._compute_precision_recall(ranklist, topN)
            apk = rank_metrics.average_precision(ranklist[:topN])
            ndcgs.append(ndcg)
            apks.append(apk)
            recalls.append(recall)

        results = {}
        results["ndcg"] = np.nanmean(ndcgs)
        results["ndcg_list"] = ndcgs
        results["map"] = np.nanmean(apks)
        results["maps_list"] = apks
        results["recall"] = np.nanmean(recalls)
        results["recalls_list"] = recalls

        return results
def evaluate_ndcg_at_k(testing_data, k=3):
    '''
    calc the similarity based on merging term and topic model
    :param: k, top k results accounted for calculating NDCG
    :param: lambda_ratio, the ratio for blending, final_similarity = lambda*term_similarity + (1-lambda)*topic_model_similarity
            lambda_ratio = 0 means only topic_model_similarity.
            lambda_ratio = 1 means only term_similarity
    :return: return NDCG@k
    '''
    ndcg_total = 0
    query_number = 0
    '''
    Get the final similarity rank
    '''
    # print('Getting the final similarity rank')
    for iir_name, mapping_dict in testing_data.items():
        if not iir_name in similarity_matrix:
            # print ('%s not found' % iir_name)
            continue

        weighted_similarities = similarity_matrix[iir_name]
        weighted_similarities = sorted(weighted_similarities,
                                       key=lambda item: item[1],
                                       reverse=True)

        r_array = []  # array used as input of nDCG
        for entry in weighted_similarities:
            if entry[0] in mapping_dict:
                r_array.append(mapping_dict[entry[0]])
            else:
                r_array.append(0)
                # print(entry[0], entry[0], entry[1])
        ndcg = ndcg_at_k(r_array, k)
        # print(r_array)
        # print(ndcg)
        ndcg_total += ndcg
    return float(ndcg_total) / len(testing_data)
示例#18
0
def main(args):
    global verbose
    verbose = args.verbose

    scores = read_scores(args.path_input,
                         col=args.col,
                         reverse=args.flag_reverse)

    rel2facts = {}
    ndcgs = {}
    for fact_en in scores.index.unique():
        ss = scores.loc[fact_en]
        try:
            rel = ss['rel'].values[0]
        except:
            logger.warning('Only one Japanese fact')
            continue
        try:
            rel2facts[rel].append(fact_en)
        except KeyError:
            rel2facts[rel] = [fact_en]
        ndcgs[fact_en] = ndcg_at_k(ss['label'].values, args.k, method=0)

    if args.path_output:
        with open(args.path_output, 'w') as f:
            for k, v in sorted(ndcgs.items(), key=lambda t: t[0]):
                f.write('{}\t{}\n'.format(k, v))

    if args.flag_by_relation:
        for rel, facts in sorted(rel2facts.items(), key=lambda t: t[0]):
            l = [ndcgs[fact_en] for fact_en in facts]
            print('{}\t{}\t{}'.format(rel, sum(l) / len(l), len(l)))

    ndcg = sum(ndcgs.values()) / len(ndcgs)
    if verbose:
        logger.info('nDCG@{}: {}'.format(args.k, ndcg))
    return ndcg
示例#19
0
def rew6(inp, ap, reciprocal_rank, ndcg, dcg_five):
    return (ap + precision_at_k(inp, 3) + precision_at_k(inp, 5) +
            ndcg_at_k(inp, 3) + ndcg_at_k(inp, 5)) / 5
示例#20
0
# In[36]:

import numpy as np
import rank_metrics
import sys
relevanceVector = np.loadtxt(open(sys.argv[1] + "/rv/relevanceVector_" +
                                  sys.argv[2]),
                             delimiter=" ")
f = open(sys.argv[1] + '/em/evalMetrics_' + sys.argv[2], 'w')
for k in range(1, 16):
    total_precision_k = 0
    total_dcg_k = 0
    total_ndcg_k = 0
    for row in relevanceVector:
        precision_k = rank_metrics.precision_at_k(row, k)
        dcg_k = rank_metrics.dcg_at_k(row, k, 0)
        ndcg_k = rank_metrics.ndcg_at_k(row, k, 0)
        total_precision_k = total_precision_k + precision_k
        total_dcg_k = total_dcg_k + dcg_k
        total_ndcg_k = total_ndcg_k + ndcg_k
    f.write("precision@" + str(k) + ": " + str(total_precision_k) + "\n")
    f.write("dcg@" + str(k) + ": " + str(total_dcg_k) + "\n")
    f.write("ndcg@" + str(k) + ": " + str(total_ndcg_k) + "\n")

mrr = rank_metrics.mean_reciprocal_rank(relevanceVector)
f.write("Mean Reciprocal Rank: " + str(mrr) + "\n")
maP = rank_metrics.mean_average_precision(relevanceVector)
f.write("Mean Average Precision: " + str(maP) + "\n")
f.close()
    def test_limitation_1(self):
        r_one = [1, 1, 1]
        r_two = [1, 1, 1, 0]

        self.assertEqual(ndcg_at_k(r_one, 3, method=1),
                         ndcg_at_k(r_two, 4, method=1))
示例#22
0
 if ent not in ent_vec:
     ent_skip_count += 1
 else:
     tmp_can_count = 0
     for can in eval_query[ent]:
         if can in ent_vec:
             tmp_can_count += 1
             a = ent_vec[ent]*ent_vec[can]
             sim.append((can, a.sum()))
     if tmp_can_count > 1:
         sim_rank = sorted(sim, key=lambda sim : sim[1], reverse=True)
         r = []
         for item in sim_rank:
             r.append(eval_query[ent][item[0]])
         if len(r) >1:
             tmp_n1 = rm.ndcg_at_k(r, 1, 1)
         else:
             tmp_n1 = rm.ndcg_at_k(r, len(r), 1)
         if len(r) >5:
             tmp_n5 = rm.ndcg_at_k(r, 5, 1)
         else:
             tmp_n5 = rm.ndcg_at_k(r, len(r), 1)
         if len(r) >10:
             tmp_n10 = rm.ndcg_at_k(r, 10, 1)
         else:
             tmp_n10 = rm.ndcg_at_k(r, len(r), 1)
         tmp_ap = rm.average_precision(r)
         ndcg1_sum += tmp_n1
         ndcg5_sum += tmp_n5
         ndcg10_sum += tmp_n10
         map_sum += tmp_ap
示例#23
0
文件: run.py 项目: zyy598/TMER
def rec_net(train_loader, test_loader, node_emb, sequence_tensor):
    best_hit_1 = 0.0
    best_hit_5 = 0.0
    best_hit_10 = 0.0
    best_hit_20 = 0.0
    best_hit_50 = 0.0
    best_ndcg_1 = 0.0
    best_ndcg_5 = 0.0
    best_ndcg_10 = 0.0
    best_ndcg_20 = 0.0
    best_ndcg_50 = 0.0
    all_pos = []
    all_neg = []
    test_data.numpy()
    for index in range(test_data.shape[0]):
        user = test_data[index][0].item()
        item = test_data[index][1].item()
        link = test_data[index][2].item()
        if link == 1:
            all_pos.append((index, user, item))
        else:
            all_neg.append((index, user, item))
    recommendation = Recommendation(100).to(device)
    optimizer = torch.optim.Adam(recommendation.parameters(), lr=1e-3)
    for epoch in range(100):
        train_start_time = time.time()
        running_loss = 0.0
        for step, batch in enumerate(train_loader):
            batch_item_emb = node_emb[batch[:, 1]].reshape(
                (batch.shape[0], 1, 100)).to(device)
            batch_labels = batch[:, 2].to(device)
            batch_sequence_tensor = sequence_tensor[batch[:, 0]].reshape(
                (batch.shape[0], 9, 100)).to(device)
            optimizer.zero_grad()
            prediction = recommendation(batch_item_emb,
                                        batch_sequence_tensor).to(device)
            loss_train = torch.nn.functional.cross_entropy(
                prediction, batch_labels).to(device)
            loss_train.backward()
            optimizer.step()
            running_loss += loss_train.item()
        train_time = time.time() - train_start_time
        print(
            f'epoch: {epoch}, training loss: {running_loss}, train time: {train_time}'
        )

        if (epoch + 1) % 50 != 0:
            continue

        testing_start_time = time.time()

        hit_num_1 = 0
        hit_num_5 = 0
        hit_num_10 = 0
        hit_num_20 = 0
        hit_num_50 = 0
        all_ndcg_1 = 0
        all_ndcg_5 = 0
        all_ndcg_10 = 0
        all_ndcg_20 = 0
        all_ndcg_50 = 0
        for i, u_v_p in enumerate(all_pos):
            start = N * i
            end = N * i + N
            p_and_n_seq = all_neg[start:end]
            p_and_n_seq.append(tuple(u_v_p))  # N+1 items

            # 找到embedding,求出score
            scores = []
            for index, userid, itemid in p_and_n_seq:
                # calculate score of user and item
                user_emb = node_emb[userid].reshape((1, 1, 100)).to(device)
                this_item_emb = node_emb[itemid].reshape(
                    (1, 1, 100)).to(device)
                this_sequence_tensor = sequence_tensor[userid].reshape(
                    (1, 9, 100)).to(device)
                score = recommendation(this_item_emb,
                                       this_sequence_tensor)[:, -1].to(device)
                scores.append(score.item())
            normalized_scores = [
                ((u_i_score - min(scores)) / (max(scores) - min(scores)))
                for u_i_score in scores
            ]
            pos_id = len(scores) - 1
            s = np.array(scores)
            sorted_s = np.argsort(-s)

            if sorted_s[0] == pos_id:
                hit_num_1 += 1
                hit_num_5 += 1
                hit_num_10 += 1
                hit_num_20 += 1
                hit_num_50 += 1
            elif pos_id in sorted_s[1:5]:
                hit_num_5 += 1
                hit_num_10 += 1
                hit_num_20 += 1
                hit_num_50 += 1
            elif pos_id in sorted_s[5:10]:
                hit_num_10 += 1
                hit_num_20 += 1
                hit_num_50 += 1
            elif pos_id in sorted_s[10:20]:
                hit_num_20 += 1
                hit_num_50 += 1
            elif pos_id in sorted_s[20:50]:
                hit_num_50 += 1
            ndcg_1 = ndcg_at_k(normalized_scores, 1, 0)
            ndcg_5 = ndcg_at_k(normalized_scores, 5, 0)
            ndcg_10 = ndcg_at_k(normalized_scores, 10, 0)
            ndcg_20 = ndcg_at_k(normalized_scores, 20, 0)
            ndcg_50 = ndcg_at_k(normalized_scores, 50, 0)
            all_ndcg_1 += ndcg_1
            all_ndcg_5 += ndcg_5
            all_ndcg_10 += ndcg_10
            all_ndcg_20 += ndcg_20
            all_ndcg_50 += ndcg_50
        all_pos_num = len(all_pos)
        hit_rate_1 = hit_num_1 / all_pos_num
        hit_rate_5 = hit_num_5 / all_pos_num
        hit_rate_10 = hit_num_10 / all_pos_num
        hit_rate_20 = hit_num_20 / all_pos_num
        hit_rate_50 = hit_num_50 / all_pos_num
        all_ndcg_1 = all_ndcg_1 / all_pos_num
        all_ndcg_5 = all_ndcg_5 / all_pos_num
        all_ndcg_10 = all_ndcg_10 / all_pos_num
        all_ndcg_20 = all_ndcg_20 / all_pos_num
        all_ndcg_50 = all_ndcg_50 / all_pos_num

        if best_hit_1 < hit_rate_1:
            best_hit_1 = hit_rate_1
        if best_hit_5 < hit_rate_5:
            best_hit_5 = hit_rate_5
        if best_ndcg_1 < all_ndcg_1:
            best_ndcg_1 = all_ndcg_1
        if best_hit_10 < hit_rate_10:
            best_hit_10 = hit_rate_10
        if best_hit_20 < hit_rate_20:
            best_hit_20 = hit_rate_20
        if best_hit_50 < hit_rate_50:
            best_hit_50 = hit_rate_50
        if best_ndcg_5 < all_ndcg_5:
            best_ndcg_5 = all_ndcg_5
        if best_ndcg_10 < all_ndcg_10:
            best_ndcg_10 = all_ndcg_10
        if best_ndcg_20 < all_ndcg_20:
            best_ndcg_20 = all_ndcg_20
        if best_ndcg_50 < all_ndcg_50:
            best_ndcg_50 = all_ndcg_50

        testing_time = time.time() - testing_start_time
        print(
            f"epo:{epoch}|"
            f"HR@1:{hit_rate_1:.4f} | HR@5:{hit_rate_5:.4f} | HR@10:{hit_rate_10:.4f} | HR@20:{hit_rate_20:.4f} | HR@50:{hit_rate_50:.4f} |"
            f" NDCG@1:{all_ndcg_1:.4f} | NDCG@5:{all_ndcg_5:.4f} | NDCG@10:{all_ndcg_10:.4f}| NDCG@20:{all_ndcg_20:.4f}| NDCG@50:{all_ndcg_50:.4f}|"
            f" best_HR@1:{best_hit_1:.4f} | best_HR@5:{best_hit_5:.4f} | best_HR@10:{best_hit_10:.4f} | best_HR@20:{best_hit_20:.4f} | best_HR@50:{best_hit_50:.4f} |"
            f" best_NDCG@1:{best_ndcg_1:.4f} | best_NDCG@5:{best_ndcg_5:.4f} | best_NDCG@10:{best_ndcg_10:.4f} | best_NDCG@20:{best_ndcg_20:.4f} | best_NDCG@50:{best_ndcg_50:.4f} |"
            f" train_time:{train_time:.2f} | test_time:{testing_time:.2f}")
    print('training finish')
示例#24
0
def evaluate_retrieval(query_dct, corpus_dct):
    '''
    Given a query dictionary and a corpus dictionary, go through each query and
    determine the NDCG for its retrieval with the disease labels as relevance
    measures.
    '''
    # Map each symptom and herb to the number of patient visits it appears in.
    inverted_index, avg_doc_len = get_inverted_index(corpus_dct)
    corpus_size = len(corpus_dct)

    metric_dct = {}
    for query_key in query_dct:
        doc_score_dct = {}
        # Ignore the query herb set. q_disease is label, q_symptom is query.
        q_disease_set, q_symptom_set, q_herb_set = query_dct[query_key]

        for doc_key in corpus_dct:
            d_disease_set, d_symptom_set, d_herb_set = corpus_dct[doc_key]

            # With no query expansion, our document is just the set of symptoms.
            document = d_symptom_set
            # If synonym or herbs/mixed expansions, add herb list into document.
            if args.method == 'synonym' or args.term_type in [
                    'herbs', 'mixed'
            ]:
                document = document.union(d_herb_set)

            # Get the score between the query and the document.
            doc_score = okapi_bm25(q_symptom_set, document, inverted_index,
                                   corpus_size, avg_doc_len)
            # Compute the relevance judgement.
            relevance = get_rel_score(q_disease_set, d_disease_set)
            doc_score_dct[(doc_key, relevance)] = doc_score

        sorted_scores = sorted(doc_score_dct.items(),
                               key=operator.itemgetter(1),
                               reverse=True)
        # Get the relevance rankings.
        rel_list = [pair[0][1] for pair in sorted_scores]

        # Compute different rank metrics for different values of k.
        for k in k_list:
            if k not in metric_dct:
                metric_dct[k] = []
            if args.rank_metric == 'ndcg':
                metric_dct[k] += [ndcg_at_k(rel_list, k)]
            elif args.rank_metric == 'precision':
                # metric_dct[k] += [precision_at_k(rel_list, k)]
                metric_dct[k] += [sum(rel_list[:k]) / float(k)]
            elif args.rank_metric == 'recall':
                metric_dct[k] += [sum(rel_list[:k]) / float(sum(rel_list))]
            elif args.rank_metric == 'f1':
                precision = sum(rel_list[:k]) / float(k)
                recall = sum(rel_list[:k]) / float(sum(rel_list))
                if precision == 0:
                    metric_dct[k] += [0]
                else:
                    metric_dct[k] += [
                        2 * precision * recall / (precision + recall)
                    ]
            elif args.rank_metric == 'map':

                r = np.asarray(rel_list[:k]) != 0
                out = [precision_at_k(r, i + 1) for i in range(r.size) if r[i]]
                if not out:
                    metric_dct[k] += [0.0]
                else:
                    metric_dct[k] += [sum(out) / sum(rel_list)]
    return metric_dct