def test_modified_precision_empty(): for k in range(1, 5): n, d = modified_precision([[]], [], k) assert n == 0 and d == 0 n, d = modified_precision([[]], [0], k) assert n == 0 and d == (k == 1) n, d = modified_precision([[0]], [], k) assert n == 0 and d == 0 n, d = modified_precision([[]], list(range(k)), k) assert n == 0 and d == 1 n, d = modified_precision([list(range(k))], [], k) assert n == 0 and d == 0
def _n_gram_counter( self, references: Sequence[Sequence[Sequence[Any]]], candidates: Sequence[Sequence[Any]], p_numerators: torch.Tensor, p_denominators: torch.Tensor, ) -> Tuple[int, int]: if len(references) != len(candidates): raise ValueError( f"nb of candidates should be equal to nb of reference lists ({len(candidates)} != " f"{len(references)})") hyp_lengths = 0 ref_lengths = 0 # Iterate through each hypothesis and their corresponding references. for refs, hyp in zip(references, candidates): # For each order of ngram, calculate the numerator and # denominator for the corpus-level modified precision. for i in range(1, self.ngrams_order + 1): numerator, denominator = modified_precision(refs, hyp, i) p_numerators[i] += numerator p_denominators[i] += denominator # Calculate the hypothesis lengths hyp_lengths += len(hyp) # Calculate the closest reference lengths. ref_lengths += _closest_ref_length(refs, len(hyp)) return hyp_lengths, ref_lengths
def _corpus_bleu( self, references: Sequence[Sequence[Any]], candidates: Sequence[Sequence[Any]], ) -> float: p_numerators: Counter = Counter() p_denominators: Counter = Counter() if len(references) != len(candidates): raise ValueError( f"nb of candidates should be equal to nb of reference lists ({len(candidates)} != " f"{len(references)})") # Iterate through each hypothesis and their corresponding references. for refs, hyp in zip(references, candidates): # For each order of ngram, calculate the numerator and # denominator for the corpus-level modified precision. for i in range(1, self.ngrams_order + 1): numerator, denominator = modified_precision(refs, hyp, i) p_numerators[i] += numerator p_denominators[i] += denominator # Returns 0 if there's no matching n-grams # We only need to check for p_numerators[1] == 0, since if there's # no unigrams, there won't be any higher order ngrams. if p_numerators[1] == 0: return 0 # If no smoother, returns 0 if there's at least one a not matching n-grams if self.smoother.smooth == "no_smooth" and min( p_numerators.values()) == 0: return 0 # Calculate the hypothesis lengths hyp_lengths = [len(hyp) for hyp in candidates] # Calculate the closest reference lengths. ref_lengths = [ _closest_ref_length(refs, hyp_len) for refs, hyp_len in zip(references, hyp_lengths) ] # Sum of hypothesis and references lengths hyp_len = sum(hyp_lengths) ref_len = sum(ref_lengths) # Calculate corpus-level brevity penalty. if hyp_len < ref_len: bp = math.exp(1 - ref_len / hyp_len) if hyp_len > 0 else 0.0 else: bp = 1.0 # Smoothing p_n = self.smoother(p_numerators, p_denominators) # Compute the geometric mean s = [w_i * math.log(p_i) for w_i, p_i in zip(self.weights, p_n)] gm = bp * math.exp(math.fsum(s)) return gm
def test_modified_precision(references, candidate, expected): for n, (e_n, e_d) in enumerate(expected, start=1): n, d = modified_precision(references, candidate, n) assert n == e_n and d == e_d