Exemplo n.º 1
0
 def test_parse_measure(self):
     tests = {
         'AP': AP,
         AP: AP,
         'MAP': AP,
         MAP: MAP,
         'P@10': P @ 10,
         P @ 10: P @ 10,
         'nDCG@10': nDCG @ 10,
         'P(rel=2)@10': P(rel=2) @ 10,
         'nDCG(dcg="exp-log2")@10': nDCG(dcg='exp-log2') @ 10,
         'nDCG(dcg="exp-log2", cutoff=20)@10': nDCG(dcg='exp-log2') @ 10,
         'nDCG(dcg="exp-log2", cutoff=20)': nDCG(dcg='exp-log2') @ 20,
         'nDCG(gains={0:1,1:2})': nDCG(gains={
             0: 1,
             1: 2
         }),
         'nDCG(gains={1: 2, 0: 1})': nDCG(gains={
             0: 1,
             1: 2
         }),
         'nDCG(gains={0:1,1:2})@5': nDCG(gains={
             0: 1,
             1: 2
         }) @ 5,
         '[email protected]': IPrec @ 0.2,
         'IPrec(rel=2)@0.2': IPrec(rel=2) @ 0.2,
         'IPrec(rel=2, recall=0.4)@0.2': IPrec(rel=2) @ 0.2,
         'IPrec(rel=2, recall=0.4)': IPrec(rel=2) @ 0.4,
         IPrec(rel=2) @ 0.4: IPrec(rel=2) @ 0.4,
     }
     for key, value in tests.items():
         with self.subTest(key):
             self.assertEqual(ir_measures.parse_measure(key), value)
Exemplo n.º 2
0
 def supports(self, metric):
     try:
         measure = ir_measures.parse_measure(str(metric))
         return True
     except ValueError:
         return False
     except NameError:
         return False
Exemplo n.º 3
0
 def calc_metrics(self, qrels, run, metrics, verbose=False):
     measures = {ir_measures.parse_measure(str(m)): str(m) for m in metrics}
     results = {}
     for metric in ir_measures.iter_calc(list(measures), qrels, run):
         measure = measures[metric.measure]
         if measure not in results:
             results[measure] = {}
         results[measure][metric.query_id] = metric.value
     return results
Exemplo n.º 4
0
    def _query_differences(self, run1, run2, *args, **kwargs):
        """
        :param run1: TREC run. Has the format {qid: {docid: score}, ...}
        :param run2: Same as above
        :param args:
        :param kwargs: Expects a 'dataset' parameter. This is an instance of ir-datasets
        :return: A list of qids that differ the most in the metric
        """
        assert "dataset" in kwargs, "Dataset object not supplied for qrel measure"
        dataset = kwargs["dataset"]
        assert dataset.has_qrels(
        ), "Dataset object does not have the qrels files"
        overlapping_keys = set(run1.keys()).intersection(set(run2.keys()))
        run1 = {
            qid: doc_id_to_score
            for qid, doc_id_to_score in run1.items() if qid in overlapping_keys
        }
        run2 = {
            qid: doc_id_to_score
            for qid, doc_id_to_score in run2.items() if qid in overlapping_keys
        }

        qrels = dataset.qrels_dict()
        try:
            metric = parse_measure(self.metric)
        except NameError:
            print(
                "Unknown measure: {}. Please provide a measure supported by https://ir-measur.es/"
                .format(self.metric))
            sys.exit(1)

        topk = self.topk
        eval_run_1 = self.convert_to_nested_dict(
            iter_calc([metric], qrels, run1))
        eval_run_2 = self.convert_to_nested_dict(
            iter_calc([metric], qrels, run2))

        query_ids = eval_run_1.keys() & eval_run_2.keys()
        query_ids = sorted(
            query_ids,
            key=lambda x: abs(eval_run_1[x][metric] - eval_run_2[x][metric]),
            reverse=True)
        query_ids = query_ids[:topk]
        id2diff = {
            x: abs(eval_run_1[x][metric] - eval_run_2[x][metric])
            for x in query_ids
        }
        id2qrelscores = {
            x: [eval_run_1[x][metric], eval_run_2[x][metric]]
            for x in query_ids
        }
        return query_ids, id2diff, self.metric, id2qrelscores
Exemplo n.º 5
0
def _get_measures(args):
    measures, errors = [], []
    for mstr in args.measures:
        for m in mstr.split():
            try:
                measure = ir_measures.parse_measure(m)
                if measure not in measures:
                    measures.append(measure)
            except ValueError:
                errors.append(f'syntax error: {m}')
            except NameError:
                errors.append(f'unknown measure: {m}')
    if errors:
        sys.stderr.write('\n'.join(['error parsing measures'] + errors + ['']))
        sys.exit(-1)
    return measures