>>> wer_score = wer.compute(predictions=predictions, references=references) >>> print(wer_score) 0.5 """ class AddSpacesToPunctuation(tr.AbstractTransform): def process_string(self, s: str): return re.sub("(['.,:;?!&])", r" \1 ", s) _transform = tr.Compose( [ AddSpacesToPunctuation(), tr.RemoveMultipleSpaces(), tr.Strip(), tr.SentencesToListOfWords(), tr.RemoveEmptyStrings(), ] ) class WER_punctuation(datasets.Metric): def _info(self): return datasets.MetricInfo( description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, features=datasets.Features( { "predictions": datasets.Value("string", id="sequence"),
def __init__(self, sentence_delimiter: str = " "): self.sentence_delimiter = sentence_delimiter def process_string(self, s: str): return list(s) def process_list(self, inp: List[str]): chars = [] for sent_idx, sentence in enumerate(inp): chars.extend(self.process_string(sentence)) if self.sentence_delimiter is not None and self.sentence_delimiter != "" and sent_idx < len(inp) - 1: chars.append(self.sentence_delimiter) return chars cer_transform = tr.Compose( [tr.RemoveMultipleSpaces(), tr.Strip(), SentencesToListOfCharacters(SENTENCE_DELIMITER)] ) else: cer_transform = tr.Compose( [ tr.RemoveMultipleSpaces(), tr.Strip(), tr.ReduceToSingleSentence(SENTENCE_DELIMITER), tr.ReduceToListOfListOfChars(), ] ) _CITATION = """\ @inproceedings{inproceedings, author = {Morris, Andrew and Maier, Viktoria and Green, Phil},