Пример #1
0
def generate_reward(gold_index_list, answer_index_list):
    reward = 0
    ap = 0
    reciprocal_rank = 0
    answer_list = list(answer_index_list)
    size = len(answer_index_list)
    true = sum(gold_index_list > 0)
    inp = np.zeros(size)
    for rank, val in enumerate(gold_index_list):
        if val and rank in answer_list:
            inp[answer_list.index(rank)] = val
    maxk = sum(inp > 0)
    if true:
        ap = average_precision(inp) * (maxk / true)
    reciprocal_rank = mean_reciprocal_rank([inp])
    ndcg = ndcg_at_k(inp, min(10, size))
    dcg_five = dcg_at_k(inp, 5)
    reward = (ap + reciprocal_rank + ndcg + dcg_five) / 4
    ranks = [1, 3, 5, 10]
    reward_tuple = [reward, ap, reciprocal_rank, ndcg, dcg_five]
    for r in ranks:
        reward_tuple.append(precision_at_k(inp, min(r, len(inp))))
    for r in ranks:
        reward_tuple.append(ndcg_at_k(inp, min(r, len(inp))))
    return reward_tuple
Пример #2
0
def evaluate(model, data, logdir, epoch, out_f, gpu):
    get_embedding(model, data, logdir, gpu, test=False)
    img_embeddings, img_fns, gel_embeddings, gel_fns = get_embedding(model,
                                                                     data,
                                                                     logdir,
                                                                     gpu,
                                                                     test=True)
    precision = get_score(img_embeddings, img_fns, gel_embeddings, gel_fns)
    return precision

    nb_img = len(img_embeddings)
    nb_gel = len(gel_embeddings)
    distance_matrix = np.zeros((nb_gel, nb_img))
    img_embeddings = np.array(img_embeddings)
    gel_embeddings = np.array(gel_embeddings)
    dim_embedding = img_embeddings.shape[-1]
    img_embeddings = img_embeddings.reshape((nb_img, dim_embedding))
    gel_embeddings = gel_embeddings.reshape((nb_gel, dim_embedding))

    scores = []
    for i in range(nb_gel):
        distance_matrix[i, :] = np.mean(np.square(img_embeddings -
                                                  gel_embeddings[i, :]),
                                        axis=1).T

        r = []
        for j in range(nb_img):
            if (get_gel_id(img_fns[j]) == get_gel_id(gel_fns[i])):
                r.append(1)
            else:
                r.append(0)
        d = distance_matrix[i, :].tolist()
        a = zip(d, r)
        a = sorted(a, key=lambda d: d[0])
        r = [x[1] for x in a]
        ndcg = [rank_metrics.ndcg_at_k(r, k) for k in [10, 20, 30]]
        precision = [rank_metrics.precision_at_k(r, k) for k in [10, 20, 30]]
        scores.append(ndcg + precision)

    scores = np.array(scores)
    scores = np.mean(scores, axis=0)
    print "ndcg & precision", scores
    print >> out_f, "ndcg & precision", scores
Пример #3
0
def evaluate_retrieval(query_dct, corpus_dct, inverted_index, method_type):
    '''
    Given a query dictionary and a corpus dictionary, go through each query and
    determine the NDCG for its retrieval with the disease labels as relevance
    measures.
    '''
    metric_dct = {}

    for query_key in query_dct:
        doc_score_dct = {}

        q_disease_list, q_symptom_list, q_herb_list = query_dct[query_key]

        for doc_key in corpus_dct:
            d_disease_list, d_symptom_list, d_herb_list = corpus_dct[doc_key]

            # With no query expansion, our document is just the set of symptoms.
            document = d_symptom_list[:]
            if 'mixed' in method_type or 'synonym' in method_type:
                document += d_herb_list

            # If expanded, q_symptom list might also contain herbs.
            doc_score = okapi_bm25(q_symptom_list, document, inverted_index,
                                   len(corpus_dct))
            # Compute the relevance judgement.
            relevance = get_rel_score(q_disease_list, d_disease_list)
            doc_score_dct[(doc_key, relevance)] = doc_score

        sorted_scores = sorted(doc_score_dct.items(),
                               key=operator.itemgetter(1),
                               reverse=True)
        # Get the relevance rankings.
        rel_list = [pair[0][1] for pair in sorted_scores]

        # Compute different rank metrics for different values of k.
        for k in k_list:
            if k not in metric_dct:
                metric_dct[k] = []
            if rank_metric == 'ndcg':
                metric_dct[k] += [ndcg_at_k(rel_list, k)]
            elif rank_metric == 'precision':
                metric_dct[k] += [precision_at_k(rel_list, k)]
    return metric_dct
Пример #4
0
 def summarize(self):
     """Give summary statistics about the tournament."""
     res = self.run()
     # res = self.results
     # champ should be undefeated
     champ = list(np.where(res.strength == max(res.strength))[0])
     copeland = (res.wins[champ] == self.n_rounds)
     # top-k
     ranks = pd.DataFrame(data=np.transpose([
         res.strength.rank(ascending=False),
         res.wins.rank(ascending=False), res.wins
     ]),
                          columns=["str_rank", "win_rank", "wins"])
     ranks['relevant'] = ranks['str_rank'] <= self.k
     borda = (ranks.win_rank[champ] == ranks.win_rank.min())
     top_k_df = ranks.loc[ranks['str_rank'] <= self.k]
     top_k = sum(top_k_df['wins'] >= self.n_rounds - 2) / self.k
     tau, k_p = scipy.stats.kendalltau(ranks.str_rank, ranks.win_rank)
     rho, sp_p = scipy.stats.spearmanr(ranks.str_rank, ranks.win_rank)
     ranks.sort_values(by="win_rank")
     # using rank_metrics
     rel_vec = ranks.relevant.values
     prec = rank_metrics.r_precision(rel_vec)
     prec_at_k = rank_metrics.precision_at_k(rel_vec, self.k)
     avg_prec = rank_metrics.average_precision(rel_vec)
     dcg = rank_metrics.dcg_at_k(rel_vec, self.k)
     ndcg = rank_metrics.ndcg_at_k(rel_vec, self.k)
     df = pd.DataFrame(data=[
         list([
             int(copeland),
             int(borda),
             float(top_k), prec, prec_at_k, avg_prec, dcg, ndcg,
             float(tau),
             float(rho)
         ])
     ],
                       columns=[
                           'undef_champ', 'top_champ', 'top_k_found',
                           'precision', 'precision_at_k', 'avg_prec', 'dcg',
                           'ndcg', 'tau', 'rho'
                       ])
     return df
Пример #5
0
def compute_metrics(ranked_judgements, pr_atk, threshold_grade):
    """
    Given the ranked judgements compute the metrics for a query.
    :param ranked_judgements: list(int); graded or binary relevances in rank order.
    :param pr_atk: int; the @K value to use for computing precision and recall.
    :param threshold_grade: int; Assuming 0-3 graded relevances, threshold at some point
        and convert graded to binary relevance.
    :return:
    """
    graded_judgements = ranked_judgements
    ranked_judgements = [
        1 if rel >= threshold_grade else 0 for rel in graded_judgements
    ]
    # Use the full set of candidate not the pr_atk.
    ndcg = rm.ndcg_at_k(graded_judgements, len(ranked_judgements))
    ndcg_pr = rm.ndcg_at_k(graded_judgements,
                           int(0.20 * len(ranked_judgements)))
    ndcg_20 = rm.ndcg_at_k(graded_judgements, 20)
    max_total_relevant = sum(ranked_judgements)
    recall = recall_at_k(ranked_rel=ranked_judgements,
                         atk=pr_atk,
                         max_total_relevant=max_total_relevant)
    precision = rm.precision_at_k(r=ranked_judgements, k=pr_atk)
    r_precision = rm.r_precision(r=ranked_judgements)
    f1 = 2 * precision * recall / (precision + recall) if (precision +
                                                           recall) > 0 else 0.0
    av_precision = rm.average_precision(r=ranked_judgements)
    reciprocal_rank = rm.mean_reciprocal_rank(rs=[ranked_judgements])
    metrics = {
        'recall': float(recall),
        'precision': float(precision),
        'f1': float(f1),
        'r_precision': float(r_precision),
        'av_precision': float(av_precision),
        'reciprocal_rank': float(reciprocal_rank),
        'ndcg': ndcg,
        'ndcg@20': ndcg_20,
        'ndcg%20': ndcg_pr
    }
    return metrics
Пример #6
0
# In[36]:

import numpy as np
import rank_metrics
import sys
relevanceVector = np.loadtxt(open(sys.argv[1] + "/rv/relevanceVector_" +
                                  sys.argv[2]),
                             delimiter=" ")
f = open(sys.argv[1] + '/em/evalMetrics_' + sys.argv[2], 'w')
for k in range(1, 16):
    total_precision_k = 0
    total_dcg_k = 0
    total_ndcg_k = 0
    for row in relevanceVector:
        precision_k = rank_metrics.precision_at_k(row, k)
        dcg_k = rank_metrics.dcg_at_k(row, k, 0)
        ndcg_k = rank_metrics.ndcg_at_k(row, k, 0)
        total_precision_k = total_precision_k + precision_k
        total_dcg_k = total_dcg_k + dcg_k
        total_ndcg_k = total_ndcg_k + ndcg_k
    f.write("precision@" + str(k) + ": " + str(total_precision_k) + "\n")
    f.write("dcg@" + str(k) + ": " + str(total_dcg_k) + "\n")
    f.write("ndcg@" + str(k) + ": " + str(total_ndcg_k) + "\n")

mrr = rank_metrics.mean_reciprocal_rank(relevanceVector)
f.write("Mean Reciprocal Rank: " + str(mrr) + "\n")
maP = rank_metrics.mean_average_precision(relevanceVector)
f.write("Mean Average Precision: " + str(maP) + "\n")
f.close()
Пример #7
0
def evaluate_retrieval(query_dct, corpus_dct):
    '''
    Given a query dictionary and a corpus dictionary, go through each query and
    determine the NDCG for its retrieval with the disease labels as relevance
    measures.
    '''
    # Map each symptom and herb to the number of patient visits it appears in.
    inverted_index, avg_doc_len = get_inverted_index(corpus_dct)
    corpus_size = len(corpus_dct)

    metric_dct = {}
    for query_key in query_dct:
        doc_score_dct = {}
        # Ignore the query herb set. q_disease is label, q_symptom is query.
        q_disease_set, q_symptom_set, q_herb_set = query_dct[query_key]

        for doc_key in corpus_dct:
            d_disease_set, d_symptom_set, d_herb_set = corpus_dct[doc_key]

            # With no query expansion, our document is just the set of symptoms.
            document = d_symptom_set
            # If synonym or herbs/mixed expansions, add herb list into document.
            if args.method == 'synonym' or args.term_type in [
                    'herbs', 'mixed'
            ]:
                document = document.union(d_herb_set)

            # Get the score between the query and the document.
            doc_score = okapi_bm25(q_symptom_set, document, inverted_index,
                                   corpus_size, avg_doc_len)
            # Compute the relevance judgement.
            relevance = get_rel_score(q_disease_set, d_disease_set)
            doc_score_dct[(doc_key, relevance)] = doc_score

        sorted_scores = sorted(doc_score_dct.items(),
                               key=operator.itemgetter(1),
                               reverse=True)
        # Get the relevance rankings.
        rel_list = [pair[0][1] for pair in sorted_scores]

        # Compute different rank metrics for different values of k.
        for k in k_list:
            if k not in metric_dct:
                metric_dct[k] = []
            if args.rank_metric == 'ndcg':
                metric_dct[k] += [ndcg_at_k(rel_list, k)]
            elif args.rank_metric == 'precision':
                # metric_dct[k] += [precision_at_k(rel_list, k)]
                metric_dct[k] += [sum(rel_list[:k]) / float(k)]
            elif args.rank_metric == 'recall':
                metric_dct[k] += [sum(rel_list[:k]) / float(sum(rel_list))]
            elif args.rank_metric == 'f1':
                precision = sum(rel_list[:k]) / float(k)
                recall = sum(rel_list[:k]) / float(sum(rel_list))
                if precision == 0:
                    metric_dct[k] += [0]
                else:
                    metric_dct[k] += [
                        2 * precision * recall / (precision + recall)
                    ]
            elif args.rank_metric == 'map':

                r = np.asarray(rel_list[:k]) != 0
                out = [precision_at_k(r, i + 1) for i in range(r.size) if r[i]]
                if not out:
                    metric_dct[k] += [0.0]
                else:
                    metric_dct[k] += [sum(out) / sum(rel_list)]
    return metric_dct
Пример #8
0
def rew6(inp, ap, reciprocal_rank, ndcg, dcg_five):
    return (ap + precision_at_k(inp, 3) + precision_at_k(inp, 5) +
            ndcg_at_k(inp, 3) + ndcg_at_k(inp, 5)) / 5