Exemplo n.º 1
0
 def test_dictionary_lemmatizer_no_guess(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     test_sentence = 'Næs him fruma æfre, or geworden, ne nu ende cymþ ecean'
     lemmatized_sentence = lemmatizer.lemmatize(test_sentence,
                                                best_guess=False)
     self.assertCountEqual(lemmatized_sentence[0][1], ['nesan', 'næs'])
     self.assertEqual(lemmatized_sentence[-1], ('ecean', []))
Exemplo n.º 2
0
 def test_dictionary_lemmatizer_evaluate(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     test_file = os.path.expanduser(
         '~/cltk_data/old_english/model/old_english_models_cltk/texts/oe/beowulf.txt'
     )
     coverage = lemmatizer.evaluate(test_file)
     self.assertTrue(coverage > 0.5)
Exemplo n.º 3
0
 def test_dictionary_lemmatizer_list(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     test_sentence = 'Him ða Scyld gewat to gescæphwile'
     target = [('Him', 'he'), ('ða', 'þa'), ('Scyld', 'scyld'),
               ('gewat', 'gewitan'), ('to', 'to'),
               ('gescæphwile', 'gescæphwile')]
     self.assertEqual(lemmatizer.lemmatize(test_sentence.split()), target)
Exemplo n.º 4
0
 def test_dictionary_lemmatizer(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     test_sentence = 'Næs him fruma æfre, or geworden, ne nu ende cymþ ecean'
     target = [('Næs', 'næs'), ('him', 'he'), ('fruma', 'fruma'), ('æfre', 'æfre'), 
         (',', ','), ('or', 'or'), ('geworden', 'weorþan'), (',', ','), ('ne', 'ne'), 
         ('nu', 'nu'), ('ende', 'ende'), ('cymþ', 'cuman'), ('ecean', 'ecean')]
     self.assertEqual(lemmatizer.lemmatize(test_sentence), target)
Exemplo n.º 5
0
 def test_dictionary_lemmatizer_frequencies_no_guess(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     test_sentence = 'Him ða Scyld gewat to gescæphwile'
     lemmas = lemmatizer.lemmatize(test_sentence,
                                   return_frequencies=True,
                                   best_guess=False)
     # log relative frequenties always less than zero
     self.assertTrue(lemmas[0][1][0][1] < 0)
Exemplo n.º 6
0
 def test_dictionary_lemmatizer(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     test_sentence = 'Næs him fruma æfre, or geworden, ne nu ende cymþ ecean'
     target = [('Næs', 'næs'), ('him', 'he'), ('fruma', 'fruma'),
               ('æfre', 'æfre'), (',', ','), ('or', 'or'),
               ('geworden', 'weorþan'), (',', ','), ('ne', 'ne'),
               ('nu', 'nu'), ('ende', 'ende'), ('cymþ', 'cuman'),
               ('ecean', 'ecean')]
     self.assertEqual(lemmatizer.lemmatize(test_sentence), target)
Exemplo n.º 7
0
 def test_dictionary_lemmatizer_invalid_input(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     with self.assertRaises(TypeError):
         lemmatizer.lemmatize(1)
Exemplo n.º 8
0
 def test_dictionary_lemmatizer_frequencies_no_guess(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     test_sentence = 'Him ða Scyld gewat to gescæphwile'
     lemmas = lemmatizer.lemmatize(test_sentence, return_frequencies=True, best_guess=False)
     # log relative frequenties always less than zero
     self.assertTrue(lemmas[0][1][0][1] < 0)
Exemplo n.º 9
0
 def test_dictionary_lemmatizer_evaluate(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     test_file = os.path.expanduser('~/cltk_data/old_english/model/old_english_models_cltk/texts/oe/beowulf.txt')
     coverage = lemmatizer.evaluate(test_file)
     self.assertTrue(coverage > 0.5)
Exemplo n.º 10
0
 def test_dictionary_lemmatizer_invalid_input(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     with self.assertRaises(TypeError):
         lemmatizer.lemmatize(1)
Exemplo n.º 11
0
 def test_dictionary_lemmatizer_list(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     test_sentence = 'Him ða Scyld gewat to gescæphwile'
     target = [('Him', 'he'), ('ða', 'þa'), ('Scyld', 'scyld'), 
     ('gewat', 'gewitan'), ('to', 'to'), ('gescæphwile', 'gescæphwile')]
     self.assertEqual(lemmatizer.lemmatize(test_sentence.split()), target)
Exemplo n.º 12
0
 def test_dictionary_lemmatizer_no_guess(self):
     lemmatizer = OldEnglishDictionaryLemmatizer()
     test_sentence = 'Næs him fruma æfre, or geworden, ne nu ende cymþ ecean'
     lemmatized_sentence = lemmatizer.lemmatize(test_sentence, best_guess=False)
     self.assertCountEqual(lemmatized_sentence[0][1], ['nesan', 'næs'])
     self.assertEqual(lemmatized_sentence[-1], ('ecean', []))