Пример #1
0
 def eval_f1(self, outputs):
     target_list: StrListList = []
     preds_list: StrListList = []
     for output in outputs:
         golds_batch, preds_batch = self.convert_labels_pair(
             output["target"], output["prediction"])
         target_list.extend(golds_batch)
         preds_list.extend(preds_batch)
     accuracy = accuracy_score(target_list, preds_list)
     if self.bilou:
         precision, recall, f1, support = precision_recall_fscore_support_v1(
             target_list, preds_list, scheme=BILOU, average="micro")
     else:
         precision, recall, f1, support = precision_recall_fscore_support(
             target_list, preds_list, average="micro")
     return accuracy, precision, recall, f1, support
Пример #2
0
def recall_score(y_true: List[List[str]], y_pred: List[List[str]],
                 *,
                 average: Optional[str] = 'micro',
                 suffix: bool = False,
                 mode: Optional[str] = None,
                 sample_weight: Optional[List[int]] = None,
                 zero_division: str = 'warn',
                 scheme: Optional[Type[Token]] = None):
    """Compute the recall.

    The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
    true positives and ``fn`` the number of false negatives. The recall is
    intuitively the ability of the classifier to find all the positive samples.

    The best value is 1 and the worst value is 0.

    Args:
        y_true : 2d array. Ground truth (correct) target values.

        y_pred : 2d array. Estimated targets as returned by a tagger.

        average : string, [None, 'micro' (default), 'macro', 'weighted']
            If ``None``, the scores for each class are returned. Otherwise, this
            determines the type of averaging performed on the data:
            ``'micro'``:
                Calculate metrics globally by counting the total true positives,
                false negatives and false positives.
            ``'macro'``:
                Calculate metrics for each label, and find their unweighted
                mean.  This does not take label imbalance into account.
            ``'weighted'``:
                Calculate metrics for each label, and find their average weighted
                by support (the number of true instances for each label). This
                alters 'macro' to account for label imbalance; it can result in an
                F-score that is not between precision and recall.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        zero_division : "warn", 0 or 1, default="warn"
            Sets the value to return when there is a zero division:
               - recall: when there are no positive labels
               - precision: when there are no positive predictions
               - f-score: both

            If set to "warn", this acts as 0, but warnings are also raised.

        mode : str, [None (default), `strict`].
            if ``None``, the score is compatible with conlleval.pl. Otherwise,
            the score is calculated strictly.

        scheme : Token, [IOB2, IOE2, IOBES]

        suffix : bool, False by default.

    Returns:
        score : float.

    Example:
        >>> from seqeval.metrics import recall_score
        >>> y_true = [['O', 'O', 'B-MISC', 'I-MISC', 'B-MISC', 'O', 'O'], ['B-PER', 'I-PER', 'O']]
        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'B-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
        >>> recall_score(y_true, y_pred, average=None)
        array([0.5, 1. ])
        >>> recall_score(y_true, y_pred, average='micro')
        0.6666666666666666
        >>> recall_score(y_true, y_pred, average='macro')
        0.75
        >>> recall_score(y_true, y_pred, average='weighted')
        0.6666666666666666
    """
    if mode == 'strict' and scheme:
        _, r, _, _ = precision_recall_fscore_support_v1(y_true, y_pred,
                                                        average=average,
                                                        warn_for=('recall',),
                                                        sample_weight=sample_weight,
                                                        zero_division=zero_division,
                                                        scheme=scheme,
                                                        suffix=suffix)
    else:
        _, r, _, _ = precision_recall_fscore_support(y_true, y_pred,
                                                     average=average,
                                                     warn_for=('recall',),
                                                     sample_weight=sample_weight,
                                                     zero_division=zero_division,
                                                     suffix=suffix)
    return r