def test_bleu_4(self): # example from the README of https://github.com/neural-dialogue-metrics/Bleu candidates = [ 'It is to insure the troops forever hearing the activity guidebook that party direct' .lower().split(' '), 'It is a guide to action which ensures that the military always obeys the commands of the party' .lower().split(' '), ] references = [ 'It is a guide to action that ensures that the military will forever heed Party commands' .lower().split(' '), 'It is the guiding principle which guarantees the military forces always being under the command of the Party' .lower().split(' '), 'It is the practical guide for the army always to heed the directions of the party' .lower().split(' '), ] expected_lst = [0.1327211341271203, 0.5401725898595141] for candidate, expected in zip(candidates, expected_lst): vocabulary = add_to_vocabulary(candidate) vocabulary = add_to_vocabulary(references, vocabulary) word_to_cls_table, cls_to_word_table = build_conversions_tables( vocabulary) candidate_tensor = sentence_to_tensor(candidate, word_to_cls_table) references_tensor = [ sentence_to_tensor(reference, word_to_cls_table) for reference in references ] bleu_metric = Bleu(4) score = bleu_metric([candidate_tensor], [references_tensor]).item() self.assertAlmostEqual(score, expected)
def test_tf_idf(self): # example from https://fr.wikipedia.org/wiki/TF-IDF tf_idf = TFIDF() word = 'qui' documents = [ 'Son nom est célébré par le bocage qui frémit, et par le ruisseau qui murmure, les vents l’emportent ' 'jusqu’à l’arc céleste, l’arc de grâce et de consolation que sa main tendit dans les nuages.' .lower().replace('.', "").replace(',', "").replace('’', ' ').replace( '!', "").replace(';', "").replace(' ', ' ').split(' '), 'À peine distinguait-on deux buts à l’extrémité de la carrière : des chênes ombrageaient l’un, autour de ' 'l’autre des palmiers se dessinaient dans l’éclat du soir.'.lower( ).replace('.', "").replace(',', "").replace('’', ' ').replace( '!', "").replace(';', "").replace(' ', ' ').split(' '), 'Ah ! le beau temps de mes travaux poétiques ! les beaux jours que j’ai passés près de toi ! Les premiers, ' 'inépuisables de joie, de paix et de liberté ; les derniers, empreints d’une mélancolie qui eut bien aussi ' 'ses charmes.'.lower().replace('.', "").replace(',', "").replace( '’', ' ').replace('!', "").replace(';', "").replace(' ', ' ').split(' '), ] vocabulary = add_to_vocabulary(word) vocabulary = add_to_vocabulary(documents, vocabulary) word_to_cls_table, cls_to_word_table = build_conversions_tables( vocabulary) word = sentence_to_tensor([word], word_to_cls_table) documents = [ sentence_to_tensor(document, word_to_cls_table) for document in documents ] scores = tf_idf(word, documents) idf = torch.scalar_tensor(3 / 2) expected = torch.as_tensor([ 2 / 38 * idf.log(), 0.0, 1 / 40 * idf.log(), ]) self.assertTrue(scores.eq(expected).all())
def test_2(self): candidate = 'a b c d e f'.lower().split(' ') references = [candidate, 'a g b e d'] vocabulary = add_to_vocabulary(candidate) vocabulary = add_to_vocabulary(references, vocabulary) word_to_cls_table, cls_to_word_table = build_conversions_tables( vocabulary) candidate = sentence_to_tensor(candidate, word_to_cls_table) references = [ sentence_to_tensor(reference, word_to_cls_table) for reference in references ] bleu_metric = Bleu(1) expected = 1.0 score = bleu_metric([candidate], [references]).item() self.assertAlmostEqual(score, expected)
def test_1(self): # example from the original paper https://www.aclweb.org/anthology/P02-1040.pdf candidate = ['the'] * 7 references = [ 'The cat is on the mat'.lower().split(' '), 'There is a cat on the mat'.lower().split(' '), ] vocabulary = add_to_vocabulary(candidate) vocabulary = add_to_vocabulary(references, vocabulary) word_to_cls_table, cls_to_word_table = build_conversions_tables( vocabulary) candidate = sentence_to_tensor(candidate, word_to_cls_table) references = [ sentence_to_tensor(reference, word_to_cls_table) for reference in references ] bleu_metric = Bleu(1) expected = 2.0 / 7.0 score = bleu_metric([candidate], [references]).item() self.assertAlmostEqual(score, expected)
def test_3(self): candidates = [ 'It is a guide to action which ensures that the military always obeys the commands of the party' .lower().split(' '), # 'It is to insure the troops forever hearing the activity guidebook that party direct'.lower().split(' '), ] references = [ 'It is a guide to action that ensures that the military will forever heed Party commands' .lower().split(' '), 'It is the guiding principle which guarantees the military forces always being under the command of the Party' .lower().split(' '), 'It is the practical guide for the army always to heed the directions of the party' .lower().split(' '), ] expected_lst = [ 17 / 18, # 8 / 14, # no, its precision but without bp ] bleu_metric = Bleu(1) for candidate, expected in zip(candidates, expected_lst): vocabulary = add_to_vocabulary(candidate) vocabulary = add_to_vocabulary(references, vocabulary) word_to_cls_table, cls_to_word_table = build_conversions_tables( vocabulary) candidate_tensor = sentence_to_tensor(candidate, word_to_cls_table) references_tensor = [ sentence_to_tensor(reference, word_to_cls_table) for reference in references ] score = bleu_metric([candidate_tensor], [references_tensor]).item() self.assertAlmostEqual(score, expected)