Пример #1
0
 def test_prf1_metrics(self) -> None:
     self.assertMetricsAlmostEqual(
         compute_classification_metrics(PREDICTIONS1,
                                        LABEL_NAMES1,
                                        loss=2.0,
                                        average_precisions=False),
         ClassificationMetrics(
             accuracy=0.5,
             macro_prf1_metrics=MacroPRF1Metrics(
                 per_label_scores={
                     # label1: TP = 1, FP = 0, FN = 1
                     "label1": PRF1Scores(1, 0, 1, 1.0, 0.5, 2.0 / 3),
                     # label2: TP = 1, FP = 1, FN = 1
                     "label2": PRF1Scores(1, 1, 1, 0.5, 0.5, 0.5),
                     # label3: TP = 0, FP = 1, FN = 0
                     "label3": PRF1Scores(0, 1, 0, 0.0, 0.0, 0.0),
                 },
                 macro_scores=MacroPRF1Scores(3, 0.5, 1.0 / 3, 7.0 / 18),
             ),
             per_label_soft_scores=None,
             mcc=None,
             roc_auc=None,
             loss=2.0,
         ),
     )
Пример #2
0
 def test_compute_mcc(self) -> None:
     metrics = compute_classification_metrics(PREDICTIONS2,
                                              LABEL_NAMES2,
                                              loss=5.0)
     self.assertAlmostEqual(metrics.mcc, 1.0 / 6)
     # Just to test the metrics print without errors
     metrics.print_metrics()
Пример #3
0
 def calculate_metric(self):
     return compute_classification_metrics(
         [
             LabelPrediction(scores, pred, expect) for scores, pred, expect
             in zip(self.all_scores, self.all_preds, self.all_targets)
         ],
         self.label_names,
         self.calculate_loss(),
     )
Пример #4
0
 def calculate_metric(self):
     return compute_classification_metrics(
         list(
             itertools.chain.from_iterable(
                 (LabelPrediction(s, p, e)
                  for s, p, e in zip(scores, pred, expect))
                 for scores, pred, expect in zip(
                     self.all_scores, self.all_preds, self.all_targets))),
         self.label_names,
         self.calculate_loss(),
     )
Пример #5
0
 def calculate_metric(self):
     # If we are running in memory efficient mode, then scores in
     # LabelPrediction should be an empty list
     label_predictions = [
         LabelPrediction(scores, pred, expect)
         for scores, pred, expect in zip_longest(
             self.all_scores, self.all_preds, self.all_targets, fillvalue=[]
         )
     ]
     return compute_classification_metrics(
         label_predictions,
         self.label_names,
         self.calculate_loss(),
         # Compute soft-metrics only if self.is_memory_efficient is False
         average_precisions=(not self.is_memory_efficient),
         recall_at_precision_thresholds=self.recall_at_precision_thresholds,
     )
Пример #6
0
def compute_length_metrics(
    all_target_lens: List[int],
    all_target_length_preds: List[List[int]],
    select_length_beam,
    log_per_label_metrics: bool = True,
):
    length_metrics = {}
    length_report = {}
    if all_target_length_preds:
        all_length_pred_agg = {}
        beam = len(all_target_length_preds[0])
        for i in range(beam):
            all_length_pred_agg[i] = []
        for label, preds in zip(all_target_lens, all_target_length_preds):
            for l in range(beam):
                if label in preds[0 : l + 1]:
                    all_length_pred_agg[l].append(label)
                else:
                    all_length_pred_agg[l].append(preds[0])
        for i in range(beam):
            length_metrics[i] = accuracy_score(all_target_lens, all_length_pred_agg[i])

        max_len = max(all_target_lens + all_length_pred_agg[select_length_beam])
        all_pairs = [
            LabelPrediction(
                [1 if idx == pred else 0 for idx in range(max_len + 1)], pred, expect
            )
            for pred, expect in zip(
                all_length_pred_agg[select_length_beam], all_target_lens
            )
        ]

        length_report = compute_classification_metrics(
            all_pairs,
            [str(l) for l in range(max_len + 1)],
            0.0,  # Placeholder loss
            log_per_label_metrics=log_per_label_metrics,
        )

    return length_metrics, length_report
Пример #7
0
    def calculate_metric(self):
        all_rows = zip(
            self.all_context[self.ROW_INDEX],
            self.all_context[self.ANSWERS_COLUMN],
            self.all_context[self.QUES_COLUMN],
            self.all_context[self.DOC_COLUMN],
            self.all_pred_answers,
            self.all_start_pos_preds,
            self.all_end_pos_preds,
            self.all_has_answer_preds,
            self.all_start_pos_targets,
            self.all_end_pos_targets,
            self.all_has_answer_targets,
            self.all_start_pos_scores,
            self.all_end_pos_scores,
            self.all_has_answer_scores,
        )

        all_rows_dict = {}
        for row in all_rows:
            try:
                all_rows_dict[row[0]].append(row)
            except KeyError:
                all_rows_dict[row[0]] = [row]

        all_rows = []
        for rows in all_rows_dict.values():
            argmax = np.argmax([row[11] + row[12] for row in rows])
            all_rows.append(rows[argmax])

        sorted(all_rows, key=lambda x: int(x[0]))

        (
            self.all_context[self.ROW_INDEX],
            self.all_context[self.ANSWERS_COLUMN],
            self.all_context[self.QUES_COLUMN],
            self.all_context[self.DOC_COLUMN],
            self.all_pred_answers,
            self.all_start_pos_preds,
            self.all_end_pos_preds,
            self.all_has_answer_preds,
            self.all_start_pos_targets,
            self.all_end_pos_targets,
            self.all_has_answer_targets,
            self.all_start_pos_scores,
            self.all_end_pos_scores,
            self.all_has_answer_scores,
        ) = zip(*all_rows)

        exact_matches = self._compute_exact_matches(
            self.all_pred_answers,
            self.all_context[self.ANSWERS_COLUMN],
            self.all_has_answer_preds,
            self.all_has_answer_targets,
        )
        f1_score = self._compute_f1_score(
            self.all_pred_answers,
            self.all_context[self.ANSWERS_COLUMN],
            self.all_has_answer_preds,
            self.all_has_answer_targets,
        )
        count = len(self.all_has_answer_preds)
        self.all_preds = (
            self.all_pred_answers,
            self.all_start_pos_preds,
            self.all_end_pos_preds,
            self.all_has_answer_preds,
        )
        self.all_targets = (
            self.all_context[self.ANSWERS_COLUMN],
            self.all_start_pos_targets,
            self.all_end_pos_targets,
            self.all_has_answer_targets,
        )
        self.all_scores = (
            self.all_start_pos_scores,
            self.all_end_pos_scores,
            self.all_has_answer_scores,
        )
        label_predictions = None
        if not self.ignore_impossible:
            label_predictions = [
                LabelPrediction(scores, pred, expect)
                for scores, pred, expect in zip_longest(
                    self.all_has_answer_scores,
                    self.all_has_answer_preds,
                    self.all_has_answer_targets,
                    fillvalue=[],
                )
            ]

        metrics = SquadMetrics(
            exact_matches=100.0 * exact_matches / count,
            f1_score=100.0 * f1_score / count,
            num_examples=count,
            classification_metrics=compute_classification_metrics(
                label_predictions,
                self.has_answer_labels,
                self.calculate_loss(),
            ) if label_predictions else None,
        )
        return metrics
Пример #8
0
 def test_compute_roc_auc(self) -> None:
     metrics = compute_classification_metrics(PREDICTIONS2,
                                              LABEL_NAMES2,
                                              loss=5.0)
     self.assertAlmostEqual(metrics.roc_auc, 1.0 / 6)