def test_fmeasure_error(self): with self.assertRaises(Exception) as context: metrics.SegmentationFMeasureEvaluator().evaluate_one_sent( "aaa b".split(), "aaa".split())
t = line.strip() if post_process is not None: t = post_process(t) data.append(t) return data eval_shortcuts = { "bleu": lambda: metrics.BLEUEvaluator(), "gleu": lambda: metrics.GLEUEvaluator(), "wer": lambda: metrics.WEREvaluator(), "cer": lambda: metrics.CEREvaluator(), "recall": lambda: metrics.RecallEvaluator(), "accuracy": lambda: metrics.SequenceAccuracyEvaluator(), "fmeasure": lambda: metrics.FMeasureEvaluator(), "seg_fmeasure": lambda: metrics.SegmentationFMeasureEvaluator(), "rnng_parse_fmeasure": lambda: metrics.RNNGParseFMeasureEvaluator(), } def xnmt_evaluate(ref_file: Union[str, Sequence[str]], hyp_file: Union[str, Sequence[str]], evaluators: Sequence[metrics.Evaluator], desc: Any = None) -> Sequence[metrics.EvalScore]: """"Returns the eval score (e.g. BLEU) of the hyp sents using reference trg sents Args: ref_file: path of the reference file hyp_file: path of the hypothesis trg file evaluators: Evaluation metrics. Can be a list of evaluator objects, or a shortcut string desc: descriptive string passed on to evaluators
def test_fmeasure(self): self.assertEqual( metrics.SegmentationFMeasureEvaluator().evaluate_one_sent( "ab c def".split(), "a bc def".split()).value(), 0.80)