示例#1
0
 def compute_metrics(pred_labels) -> Dict:
     preds, labels = zip(*pred_labels)
     preds = utils.expand_like(preds)
     labels = utils.expand_like(labels)
     preds = np.squeeze(preds) if is_regression else np.argmax(preds,
                                                               axis=1)
     if self.hparams.finetuning_task is not None:
         result = metric.compute(predictions=preds, references=labels)
         if len(result) > 1:
             result["combined_score"] = np.mean(list(
                 result.values())).item()
         return result
     elif is_regression:
         return {"mse": ((preds - labels)**2).mean().item()}
     else:
         return {
             "accuracy":
             (preds == labels).astype(np.float32).mean().item()
         }
示例#2
0
def compute_metrics(label_list: List[Any], pred_labels: List[Any]) -> Dict:
    predictions, labels = zip(*pred_labels)
    predictions = utils.expand_like(predictions)
    predictions = np.argmax(predictions, axis=2)

    labels = utils.expand_like(labels)

    # Remove ignored index (special tokens)
    true_predictions = [[
        label_list[pr] for (pr, la) in zip(prediction, label) if la != -100
    ] for prediction, label in zip(predictions, labels)]
    true_labels = [[
        label_list[la] for (pr, la) in zip(prediction, label) if la != -100
    ] for prediction, label in zip(predictions, labels)]

    return {
        "accuracy_score": seq_metrics.accuracy_score(true_labels,
                                                     true_predictions),
        "precision": seq_metrics.precision_score(true_labels,
                                                 true_predictions),
        "recall": seq_metrics.recall_score(true_labels, true_predictions),
        "f1": seq_metrics.f1_score(true_labels, true_predictions),
    }
示例#3
0
def compute_metrics(
    data_config,
    column_names,
    post_processing_function,
    raw_datasets,
    tokenized_datasets,
    model,
    metric,
    predictions,
):
    inds, predictions = zip(*predictions)
    inds = np.hstack(inds)
    sorted_inds = np.argsort(inds)
    predictions = zip(*predictions)
    predictions = [utils.expand_like(p) for p in predictions]
    predictions = [p[sorted_inds] for p in predictions]

    # We need to add back in columns needed for validation.
    tokenized_datasets["validation"].set_format(
        type=tokenized_datasets["validation"].format["type"],
        columns=list(tokenized_datasets["validation"].features.keys()),
    )
    output = post_processing_function(
        examples=raw_datasets["validation"],
        features=tokenized_datasets["validation"],
        predictions=predictions,
        data_args=data_config,
        column_names=column_names,
        prefix="eval",
        model=model,
    )
    result = metric.compute(predictions=output.predictions,
                            references=output.label_ids)
    # Then remove them again so that data collation doesn't break.
    hf.remove_unused_columns(model, tokenized_datasets["validation"])
    return result
 def compute_metrics(predictions):
     predictions = zip(*predictions)
     predictions = [utils.expand_like(p) for p in predictions]
     # We need to add back in columns needed for validation.
     self.tokenized_datasets["validation"].set_format(
         type=self.tokenized_datasets["validation"].format["type"],
         columns=list(
             self.tokenized_datasets["validation"].features.keys()),
     )
     output = self.data_processors.post_processing_function(
         examples=self.raw_datasets["validation"],
         features=self.tokenized_datasets["validation"],
         predictions=predictions,
         data_args=self.data_config,
         column_names=self.column_names,
         prefix="eval",
         model=self.model,
     )
     result = metric.compute(predictions=output.predictions,
                             references=output.label_ids)
     # Then remove them again so that data collation doesn't break.
     hf.remove_unused_columns(self.model,
                              self.tokenized_datasets["validation"])
     return result
示例#5
0
 def compute_metrics(pred_labels) -> Dict:
     preds, labels = zip(*pred_labels)
     preds = utils.expand_like(preds)
     labels = utils.expand_like(labels)
     preds = np.argmax(preds, axis=1)
     return metric.compute(predictions=preds, references=labels)
示例#6
0
def test_expand_like() -> None:
    array_list = [np.array([[1, 2], [3, 4]]), np.array([[2, 3, 4], [3, 4, 5]])]
    result = utils.expand_like(array_list)
    assert np.array_equal(result, np.array([[1, 2, -100], [3, 4, -100], [2, 3, 4], [3, 4, 5]]))