Пример #1
0
def _cer_update(
    preds: Union[str, List[str]],
    target: Union[str, List[str]],
) -> Tuple[Tensor, Tensor]:
    """Update the cer score with the current set of references and predictions.

    Args:
        preds: Transcription(s) to score as a string or list of strings
        target: Reference(s) for each speech input as a string or list of strings
    Returns:
        Number of edit operations to get from the reference to the prediction, summed over all samples
        Number of character overall references
    """
    if isinstance(preds, str):
        preds = [preds]
    if isinstance(target, str):
        target = [target]
    errors = tensor(0, dtype=torch.float)
    total = tensor(0, dtype=torch.float)
    for pred, tgt in zip(preds, target):
        pred_tokens = pred
        tgt_tokens = tgt
        errors += _edit_distance(list(pred_tokens), list(tgt_tokens))
        total += len(tgt_tokens)
    return errors, total
Пример #2
0
def _wip_update(
    preds: Union[str, List[str]],
    target: Union[str, List[str]],
) -> Tuple[Tensor, Tensor, Tensor]:
    """Update the wip score with the current set of references and predictions.

    Args:
        preds: Transcription(s) to score as a string or list of strings
        target: Reference(s) for each speech input as a string or list of strings
    Returns:
        Number of edit operations to get from the reference to the prediction, summed over all samples
        Number of words overall references
        Number of words overall prediction
    """
    if isinstance(preds, str):
        preds = [preds]
    if isinstance(target, str):
        target = [target]
    total = tensor(0.0)
    errors = tensor(0.0)
    target_total = tensor(0.0)
    preds_total = tensor(0.0)
    for pred, tgt in zip(preds, target):
        pred_tokens = pred.split()
        target_tokens = tgt.split()
        errors += _edit_distance(pred_tokens, target_tokens)
        target_total += len(target_tokens)
        preds_total += len(pred_tokens)
        total += max(len(target_tokens), len(pred_tokens))

    return errors - total, target_total, preds_total