Exemplo n.º 1
0
    def macronize(self, mode='tag_ngram_123_backoff'):
        """Adds macrons (long vowel marks).

        Macrons distinguish long vowels from short. Distinguishing them is
        critical for the study of Latin poetry and occasionally is important
        in prose. Note that once you add macrons, long vowels are, for all
        intents and purposes, different letters than their short equivalents.

        Args:
            mode (:obj:`str`, optional) POS tagging method to use, 'tag_ngram_123_backoff', 'tag_tnt', or 'tag_crf'

        Returns:
            :obj:`self.__class__` New text with macrons added to long vowels

        Example:
            >>> text = LatinText('Arma virumque cano, Troiae qui primus ab oris')
            >>> print(text.macronize())
            arma virumque cano , trojae quī prīmus ab ōrīs
        """ # noqa
        from cltk.prosody.latin.macronizer import Macronizer
        mode = mode.lower()
        if (
            mode != 'tag_ngram_123_backoff' and
            mode != 'tag_tnt' and
            mode != 'tag_crf'
        ):
            return False
        return self.__class__(
            Macronizer(tagger=mode).macronize_text(self.data),
            self.options
        )
Exemplo n.º 2
0
 def test_retrieve_morpheus_entry(self):
     """ Text Macronizer()._retrieve_morpheus_tag()"""
     correct = [('n-s---fb-', 'puella', 'puellā'),
                ('n-s---fn-', 'puella', 'puella'),
                ('n-s---fv-', 'puella', 'puella')]
     current = Macronizer("tag_ngram_123_backoff")._retrieve_morpheus_entry(
         "puella")
     self.assertEqual(current, correct)
Exemplo n.º 3
0
 def test_macronize_tags(self):
     """Test Macronizer().macronize_tags()"""
     text = "Quo usque tandem, O Catilina, abutere nostra patientia?"
     correct = [('quo', 'd--------', 'quō'),
                ('usque', 'd--------', 'usque'),
                ('tandem', 'd--------', 'tandem'), (',', 'u--------', ','),
                ('o', 'e--------', 'ō'),
                ('catilina', 'n-s---mb-', 'catilīnā'),
                (',', 'u--------', ','),
                ('abutere', 'v2sfip---', 'abūtēre'),
                ('nostra', 'a-s---fb-', 'nostrā'),
                ('patientia', 'n-s---fn-', 'patientia'), ('?', None, '?')]
     current = Macronizer("tag_ngram_123_backoff").macronize_tags(text)
     self.assertEqual(current, correct)
Exemplo n.º 4
0
 def test_macronize_text(self):
     """Test Macronizer().macronize_text()"""
     text = "Quo usque tandem, O Catilina, abutere nostra patientia?"
     correct = "quō usque tandem , ō catilīnā , abūtēre nostrā patientia ?"
     current = Macronizer("tag_ngram_123_backoff").macronize_text(text)
     self.assertEqual(current, correct)
Exemplo n.º 5
0
 def test_macronize_word(self):
     """Test Macronizer()._macronize_word()"""
     correct = ('flumine', 'n-s---nb-', 'flūmine')
     current = Macronizer("tag_ngram_123_backoff")._macronize_word(
         ('flumine', 'n-s---nb-'))
     self.assertEqual(current, correct)
from cltk.stem.latin.syllabifier import Syllabifier
from cltk.tokenize.word import WordTokenizer

cato_agri_praef = "Est interdum praestare mercaturis rem quaerere, nisi tam periculosum sit, et item foenerari, si tam honestum. Maiores nostri sic habuerunt et ita in legibus posiverunt: furem dupli condemnari, foeneratorem quadrupli. Quanto peiorem civem existimarint foeneratorem quam furem, hinc licet existimare. Et virum bonum quom laudabant, ita laudabant: bonum agricolam bonumque colonum; amplissime laudari existimabatur qui ita laudabatur. Mercatorem autem strenuum studiosumque rei quaerendae existimo, verum, ut supra dixi, periculosum et calamitosum. At ex agricolis et viri fortissimi et milites strenuissimi gignuntur, maximeque pius quaestus stabilissimusque consequitur minimeque invidiosus, minimeque male cogitantes sunt qui in eo studio occupati sunt. Nunc, ut ad rem redeam, quod promisi institutum principium hoc erit."
word_tokenizer = WordTokenizer('latin')
cato_word_tokens = word_tokenizer.tokenize(cato_agri_praef)
cato_word_tokens_no_punt = [
    token for token in cato_word_tokens if token not in ['.', ',', ':', ';']
]

#print(cato_word_tokens_no_punt)

syllabifier = Syllabifier()

#for word in cato_word_tokens_no_punt:
#syllables = syllabifier.syllabify(word)
#print(word, syllables)

############################################################

#use the macronizer
from cltk.prosody.latin.macronizer import Macronizer

macronizer = Macronizer('tag_ngram_123_backoff')

text = 'Quo usque tandem, O Catilina, abutere nostra patientia?'

prose_text = macronizer.macronize_text(text)
print(prose_text)