def score(self, target, prediction): """Calculates rouge scores between the target and prediction. Args: target: Text containing the target (ground truth) text. prediction: Text containing the predicted text. Returns: A dict mapping each rouge type to a Score object. Raises: ValueError: If an invalid rouge type is encountered. """ target_tokens = tokenize.tokenize(target, self._stemmer) prediction_tokens = tokenize.tokenize(prediction, self._stemmer) result = {} for rouge_type in self.rouge_types: if rouge_type == "rougeL": # Rouge from longest common subsequences. scores = _score_lcs(target_tokens, prediction_tokens) elif re.match(r"rouge[0-9]$", rouge_type): # Rouge from n-grams. n = int(rouge_type[5:]) if n <= 0: raise ValueError("rougen requires positive n: %s" % rouge_type) target_ngrams = _create_ngrams(target_tokens, n) prediction_ngrams = _create_ngrams(prediction_tokens, n) scores = _score_ngrams(target_ngrams, prediction_ngrams) else: raise ValueError("Invalid rouge type: %s" % rouge_type) result[rouge_type] = scores return result
def score(self, target, prediction): """Calculates rouge scores between the target and prediction. Args: target: Text containing the target (ground truth) text. prediction: Text containing the predicted text. Returns: A dict mapping each rouge type to a Score object. Raises: ValueError: If an invalid rouge type is encountered. """ # Pre-compute target tokens and prediction tokens for use by different # types, except if only "rougeLsum" is requested. if len(self.rouge_types) == 1 and self.rouge_types[0] == "rougeLsum": target_tokens = None prediction_tokens = None else: target_tokens = tokenize.tokenize(target, self._stemmer) prediction_tokens = tokenize.tokenize(prediction, self._stemmer) result = {} for rouge_type in self.rouge_types: if rouge_type == "rougeL": # Rouge from longest common subsequences. scores = _score_lcs(target_tokens, prediction_tokens) elif rouge_type == "rougeLsum": # Note: Does not support multi-line text. def get_sents(text): # Assume sentences are separated by newline. sents = six.ensure_str(text).split("\n") sents = [x for x in sents if len(x)] return sents target_tokens_list = [ tokenize.tokenize(s, self._stemmer) for s in get_sents(target)] prediction_tokens_list = [ tokenize.tokenize(s, self._stemmer) for s in get_sents(prediction)] scores = _summary_level_lcs(target_tokens_list, prediction_tokens_list) elif re.match(r"rouge[0-9]$", six.ensure_str(rouge_type)): # Rouge from n-grams. n = int(rouge_type[5:]) if n <= 0: raise ValueError("rougen requires positive n: %s" % rouge_type) target_ngrams = _create_ngrams(target_tokens, n) prediction_ngrams = _create_ngrams(prediction_tokens, n) scores = _score_ngrams(target_ngrams, prediction_ngrams) else: raise ValueError("Invalid rouge type: %s" % rouge_type) result[rouge_type] = scores return result
def test_give_me_a_name(self): self.assertEqual(['one', 'two', 'three'], tokenize.tokenize('one Two three', None)) self.assertEqual(['one', 'two', 'three'], tokenize.tokenize('one\n Two \nthree', None))
def tokenize(self, text): return tokenize.tokenize(text, self._stemmer)