class OldNorseSyllabifier: """ Syllabifier for Old Norse """ def __init__(self): self.syllabifier = Syllabifier(language="non", break_geminants=True) self.syllabifier.set_invalid_onsets(ons.invalid_onsets) def syllabify(self, word: str) -> List[str]: """ >>> non_syllabifier = OldNorseSyllabifier() >>> non_syllabifier.syllabify('Miðgarðr'.lower()) ['mið', 'garðr'] :param word: word to syllabify :return: syllabified word """ return self.syllabifier.syllabify(word) def __repr__(self): return f"<OldNorseScanner>" def __call__(self, word: str) -> List[str]: return self.syllabify(word)
def syllabify(self): """ Syllables may play a role in verse classification. """ if len(self.long_lines) == 0: logger.error("No text was imported") self.syllabified_text = [] else: syllabifier = Syllabifier(language="old_norse", break_geminants=True) syllabified_text = [] for i, line in enumerate(self.long_lines): syllabified_text.append([]) for j, viisuordh in enumerate(line): syllabified_text[i].append([]) words = [] for word in tokenize_old_norse_words(viisuordh): # punctuation is not necessary here word = word.replace(",", "") word = word.replace(".", "") word = word.replace(";", "") word = word.replace("!", "") word = word.replace("?", "") word = word.replace("-", "") word = word.replace(":", "") if word != '': words.append(syllabifier.syllabify(word.lower())) syllabified_text[i][j].append(words) self.syllabified_text = syllabified_text
def test_syllable_length_1(self): syllabifier = Syllabifier(language="old_norse_ipa") word = [ont.a, ont.s, ont.g, ont.a, ont.r, ont.dh, ont.r] # asgarðr (normally it is ásgarðr) syllabified_word = syllabifier.syllabify_phonemes(word) lengths = [] for syllable in syllabified_word: lengths.append(ont.measure_old_norse_syllable(syllable)) self.assertListEqual(lengths, [ut.Length.short, ut.Length.long])
def test_middle_high_german_syllabification(self): """ Test Middle High German syllabification """ mhg_syllabifier = Syllabifier(language='middle_high_german') syllabified = mhg_syllabifier.syllabify('lobebæren') target = ['lo', 'be', 'bæ', 'ren'] self.assertEqual(syllabified, target)
def test_syllabification_old_norse(self): old_norse_syllabifier = Syllabifier(language="old_norse", break_geminants=True) text = "Gefjun dró frá Gylfa glöð djúpröðul óðla, svá at af rennirauknum rauk, Danmarkar auka. Báru öxn ok átta" \ " ennitungl, þars gengu fyrir vineyjar víðri valrauf, fjögur höfuð." words = tokenize_old_norse_words(text) syllabified_words = [old_norse_syllabifier.legal_onsets(old_norse_syllabifier.syllabify_SSP(word.lower()), invalid_onsets) for word in words if word not in ",."] target = [['gef', 'jun'], ['dró'], ['frá'], ['gyl', 'fa'], ['glöð'], ['djúp', 'rö', 'ðul'], ['óðl', 'a'], ['svá'], ['at'], ['af'], ['ren', 'ni', 'rauk', 'num'], ['rauk'], ['dan', 'mar', 'kar'], ['auk', 'a'], ['bár', 'u'], ['öxn'], ['ok'], ['át', 'ta'], ['en', 'ni', 'tungl'], ['þars'], ['geng', 'u'], ['fy', 'rir'], ['vi', 'ney', 'jar'], ['víðr', 'i'], ['val', 'rauf'], ['fjö', 'gur'], ['hö', 'fuð']] self.assertListEqual(syllabified_words, target)
def test_syllabification_old_norse(self): old_norse_syllabifier = Syllabifier(language="non", break_geminants=True) text = ( "Gefjun dró frá Gylfa glöð djúpröðul óðla, svá at af rennirauknum rauk, Danmarkar auka. Báru öxn ok " "átta ennitungl, þars gengu fyrir vineyjar víðri valrauf, fjögur höfuð." ) tokenizer = OldNorseWordTokenizer() words = tokenizer.tokenize(text) old_norse_syllabifier.set_invalid_onsets(invalid_onsets) syllabified_words = [ old_norse_syllabifier.syllabify_ssp(word.lower()) for word in words if word not in ",." ] target = [ ["gef", "jun"], ["dró"], ["frá"], ["gyl", "fa"], ["glöð"], ["djúp", "rö", "ðul"], ["óðl", "a"], ["svá"], ["at"], ["af"], ["ren", "ni", "rauk", "num"], ["rauk"], ["dan", "mar", "kar"], ["auk", "a"], ["bár", "u"], ["öxn"], ["ok"], ["át", "ta"], ["en", "ni", "tungl"], ["þars"], ["geng", "u"], ["fy", "rir"], ["vi", "ney", "jar"], ["víðr", "i"], ["val", "rauf"], ["fjö", "gur"], ["hö", "fuð"], ] self.assertListEqual(syllabified_words, target)
def syllabify(self, hierarchy): """ Syllables may play a role in verse classification. """ if len(self.long_lines) == 0: logger.error("No text was imported") self.syllabified_text = [] else: syllabifier = Syllabifier(language="old_norse", break_geminants=True) syllabifier.set_hierarchy(hierarchy) syllabified_text = [] for i, long_line in enumerate(self.long_lines): syllabified_text.append([]) for short_line in long_line: assert isinstance(short_line, ShortLine) or isinstance(short_line, LongLine) short_line.syllabify(syllabifier) syllabified_text[i].append(short_line.syllabified) self.syllabified_text = syllabified_text
def syllabify(self, hierarchy: Dict[str, int]): """ >>> stanza = "Ein sat hon úti,\\nþá er inn aldni kom\\nyggjungr ása\\nok í augu leit.\\nHvers fregnið mik?\\nHví freistið mín?\\nAllt veit ek, Óðinn,\\nhvar þú auga falt,\\ní inum mæra\\nMímisbrunni.\\nDrekkr mjöð Mímir\\nmorgun hverjan\\naf veði Valföðrs.\\nVituð ér enn - eða hvat?" >>> us = UnspecifiedStanza() >>> us.from_short_lines_text(stanza) >>> us.syllabify(old_norse_syllabifier.hierarchy) >>> us.syllabified_text [[['ein'], ['sat'], ['hon'], ['út', 'i']], [['þá'], ['er'], ['inn'], ['al', 'dni'], ['kom']], [['yg', 'gjungr'], ['ás', 'a']], [['ok'], ['í'], ['aug', 'u'], ['leit']], [['hvers'], ['freg', 'nið'], ['mik']], [['hví'], ['freis', 'tið'], ['mín']], [['allt'], ['veit'], ['ek'], ['ó', 'ðinn']], [['hvar'], ['þú'], ['aug', 'a'], ['falt']], [['í'], ['i', 'num'], ['mær', 'a']], [['mí', 'mis', 'brun', 'ni']], [['drekkr'], ['mjöð'], ['mí', 'mir']], [['mor', 'gun'], ['hver', 'jan']], [['af'], ['veð', 'i'], ['val', 'föðrs']], [['vi', 'tuð'], ['ér'], ['enn'], ['eð', 'a'], ['hvat']]] :param hierarchy: phonetic hierarchy :return: """ syllabifier = Syllabifier(language="non", break_geminants=True) syllabifier.set_hierarchy(hierarchy) syllabified_text = [] for short_line in self.short_lines: assert isinstance(short_line, ShortLine) short_line.syllabify(syllabifier) syllabified_text.append(short_line.syllabified) self.syllabified_text = syllabified_text
def syllabify(self, hierarchy): """ >>> stanza = "Ein sat hon úti,\\nþá er inn aldni kom\\nyggjungr ása\\nok í augu leit.\\nHvers fregnið mik?\\nHví freistið mín?\\nAllt veit ek, Óðinn,\\nhvar þú auga falt,\\ní inum mæra\\nMímisbrunni.\\nDrekkr mjöð Mímir\\nmorgun hverjan\\naf veði Valföðrs.\\nVituð ér enn - eða hvat?" >>> us = UnspecifiedStanza() >>> us.from_short_lines_text(stanza) >>> us.syllabify(old_norse_syllabifier.hierarchy) >>> us.syllabified_text [[['ein'], ['sat'], ['hon'], ['út', 'i']], [['þá'], ['er'], ['inn'], ['al', 'dni'], ['kom']], [['yg', 'gjungr'], ['ás', 'a']], [['ok'], ['í'], ['aug', 'u'], ['leit']], [['hvers'], ['freg', 'nið'], ['mik']], [['hví'], ['freis', 'tið'], ['mín']], [['allt'], ['veit'], ['ek'], ['ó', 'ðinn']], [['hvar'], ['þú'], ['aug', 'a'], ['falt']], [['í'], ['i', 'num'], ['mær', 'a']], [['mí', 'mis', 'brun', 'ni']], [['drekkr'], ['mjöð'], ['mí', 'mir']], [['mor', 'gun'], ['hver', 'jan']], [['af'], ['veð', 'i'], ['val', 'föðrs']], [['vi', 'tuð'], ['ér'], ['enn'], ['eð', 'a'], ['hvat']]] :param hierarchy: :return: """ syllabifier = Syllabifier(language="old_norse", break_geminants=True) syllabifier.set_hierarchy(hierarchy) syllabified_text = [] for short_line in self.short_lines: assert isinstance(short_line, ShortLine) short_line.syllabify(syllabifier) syllabified_text.append(short_line.syllabified) self.syllabified_text = syllabified_text
def test_syllabify_phonemes(self): vowels = ["a", "ɛ", "i", "ɔ", "ɒ", "ø", "u", "y", "œ", "e", "o", "j"] ipa_hierarchy = [vowels, ["r"], ["l"], ["m", "n"], ["f", "v", "θ", "ð", "s", "h"], ["b", "d", "g", "k", "p", "t"]] syllabifier = Syllabifier() syllabifier.set_hierarchy(ipa_hierarchy) syllabifier.set_vowels(vowels) word = [ont.a, ont.s, ont.g, ont.a, ont.r, ont.dh, ont.r] syllabified_word = syllabifier.syllabify_phonemes(word) self.assertListEqual(syllabified_word, [[ont.a, ont.s], [ont.g, ont.a, ont.r, ont.dh, ont.r]])
def test_middle_english_syllabify(self): """Test syllabification for middle english""" words = ["marchall", "content", "thyne", "greef", "commaundyd"] syllabifier = Syllabifier(language="enm") syllabified = [syllabifier.syllabify(w, mode="MOP") for w in words] target_syllabified = [ ["mar", "chall"], ["con", "tent"], ["thyne"], ["greef"], ["com", "mau", "ndyd"], ] self.assertListEqual(syllabified, target_syllabified) syllabifier = Syllabifier(language="enm", sep=".") syllabified_str = [syllabifier.syllabify(w, "MOP") for w in words] target_syllabified_str = [ "mar.chall", "con.tent", "thyne", "greef", "com.mau.ndyd", ] self.assertListEqual(syllabified_str, target_syllabified_str)
class MiddleEnglishSyllabifier: """ Middle English syllabifier """ def __init__(self): self.syllabifier = Syllabifier(language="enm") def syllabify(self, word: str) -> List[str]: return self.syllabifier.syllabify(word) def __repr__(self): return f"<MiddleEnglishSyllabifier>" def __call__(self, word: str) -> List[str]: return self.syllabify(word)
class MiddleHighGermanSyllabifier: """ Middle High German syllabifier based on sonority phoneme hierarchy for MHG. Source: Resonances in Middle High German: New Methodologies in Prosody, Christopher Leo Hench, 2017 """ def __init__(self): self.syllabifier = Syllabifier(language="gmh") def syllabify(self, word: str) -> List[str]: """ >>> MiddleHighGermanSyllabifier().syllabify("Gunther") ['Gunt', 'her'] :param word: word to syllabify :return: syllabified word """ return self.syllabifier.syllabify(word, mode="MOP") def __repr__(self): return f"<MiddleHighGermanSyllabifier>" def __call__(self, word): return self.syllabify(word)
class OldEnglishSyllabifier: """ Old English syllabifier """ def __init__(self): self.syllabifier = Syllabifier(language="ang") def syllabify(self, word: str) -> List[str]: """ >>> ang_syllabifier = OldEnglishSyllabifier() >>> ang_syllabifier.syllabify("Beowulf".lower()) ['beo', 'wulf'] :param word: word to syllabify :return: syllabified word """ return self.syllabifier.syllabify(word) def __repr__(self): return f"<OldEnglishSyllabifier>" def __call__(self, word): return self.syllabify(word)
def __init__(self): self.syllabifier = Syllabifier(language="old_norse_ipa") self.tr = Transcriber(DIPHTHONGS_IPA, DIPHTHONGS_IPA_class, IPA_class, old_norse_rules) self.tagger = POSTag('old_norse')
from cltk.phonology import utils as phu from cltk.phonology.old_norse import transcription as ont from cltk.phonology.syllabify import Syllabifier from cltk.tokenize.word import WordTokenizer from cltk.corpus.old_norse.syllabifier import hierarchy, invalid_onsets from cltk.text_reuse.levenshtein import Levenshtein from zoegas.constants import postags, dictionary_name, pos_verbose # phonetic transcriber phonetic_transcriber = phu.Transcriber(ont.DIPHTHONGS_IPA, ont.DIPHTHONGS_IPA_class, ont.IPA_class, ont.old_norse_rules) # Old Norse syllabifier s = Syllabifier(language="old_norse", break_geminants=True) s.set_invalid_onsets(invalid_onsets) s.set_hierarchy(hierarchy) old_norse_word_tokenizer = WordTokenizer("old_norse") def clean(text: str) -> Optional[str]: """ :param text: :return: """ if text is not None: text = re.sub(r"\t", "", text) text = re.sub(r"\n", "", text)
def test_syllabification_old_english(self): s = Syllabifier(language='old_english') self.assertEqual(s.syllabify('geardagum'), ['gear', 'da', 'gum'])
Commented doctests do not work as expected, because there is no way, for now, to guess correctly all the forms. """ import cltk.inflection.utils as decl_utils from cltk.phonology.syllabify import Syllabifier, Syllable from cltk.corpus.old_norse.syllabifier import invalid_onsets, BACK_TO_FRONT_VOWELS, VOWELS, CONSONANTS from cltk.inflection.old_norse.phonemic_rules import extract_common_stem, apply_u_umlaut, has_u_umlaut __author__ = ["Clément Besnier <*****@*****.**>", ] sumar = [["sumar", "sumar", "sumri", "sumars"], ["sumur", "sumur", "sumrum", "sumra"]] noun_sumar = decl_utils.Noun("sumar", decl_utils.Gender.neuter) noun_sumar.set_declension(sumar) s = Syllabifier(language="old_norse", break_geminants=True) s.set_invalid_onsets(invalid_onsets) class OldNorseNoun(decl_utils.Noun): def __init__(self, name: str, gender: decl_utils.Gender): super().__init__(name, gender) def set_representative_cases(self, ns, gs, np): """ >>> armr = OldNorseNoun("armr", decl_utils.Gender.masculine) >>> armr.set_representative_cases("armr", "arms", "armar") >>> armr.declension [['armr', '', '', 'arms'], ['armar', '', '', '']]
def __init__(self): self.syllabifier = Syllabifier(language="gmh")
from cltk.phonology.syllabify import Syllabifier, Syllable from cltk.phonology.non.syllabifier import invalid_onsets, BACK_TO_FRONT_VOWELS, VOWELS, CONSONANTS from zoegas.inflection.old_norse.phonemic_rules import extract_common_stem, apply_u_umlaut, has_u_umlaut import zoegas.inflection.utils as decl_utils __author__ = [ "Clément Besnier <*****@*****.**>", ] sumar = [["sumar", "sumar", "sumri", "sumars"], ["sumur", "sumur", "sumrum", "sumra"]] noun_sumar = decl_utils.Noun("sumar", decl_utils.Gender.neuter) noun_sumar.set_declension(sumar) s = Syllabifier(language="non", break_geminants=True) s.set_invalid_onsets(invalid_onsets) class OldNorseNoun(decl_utils.Noun): def __init__(self, name: str, gender: decl_utils.Gender): super().__init__(name, gender) def set_representative_cases(self, ns, gs, np): """ >>> armr = OldNorseNoun("armr", decl_utils.Gender.masculine) >>> armr.set_representative_cases("armr", "arms", "armar") >>> armr.declension [['armr', '', '', 'arms'], ['armar', '', '', '']]
"""Module for calculating rhyme scheme for a MHG stanza.""" from cltk.alphabet.gmh import normalize_middle_high_german as normalizer from cltk.phonology.gmh.transcription import Transcriber from cltk.phonology.syllabify import Syllabifier syllabifier = Syllabifier(language="gmh") class Verse: """Calculate rhyme scheme for a MHG stanza.""" def __init__(self, text): self.text = [ normalizer(line, to_lower_all=True, punct=True, alpha_conv=True).split(" ") for line in text ] self.syllabified = [[syllabifier.syllabify(w) for w in line] for line in self.text] self.transcribed_phonetics = None def to_phonetics(self): """Transcribe phonetics.""" tr = Transcriber() self.transcribed_phonetics = [ tr.transcribe(line) for line in self.text ] def rhyme_scheme(self, rhyme_size=3): """ Calculates the rhyme scheme of a given stanza. It doesn't yet support phonetical rhyming (homophones) and thus is still error-prone
from typing import List from cltk.phonology.old_norse.transcription import measure_old_norse_syllable, DIPHTHONGS_IPA, DIPHTHONGS_IPA_class, \ IPA_class, old_norse_rules from cltk.phonology.syllabify import Syllabifier, Syllable from cltk.corpus.old_norse.syllabifier import invalid_onsets, VOWELS, CONSONANTS, LONG_VOWELS, BACK_TO_FRONT_VOWELS from cltk.inflection.utils import Number from cltk.phonology.utils import Length, Transcriber from cltk.inflection.old_norse.phonemic_rules import apply_i_umlaut, apply_u_umlaut, add_r_ending __author__ = [ "Clément Besnier <*****@*****.**>", ] s = Syllabifier(language="old_norse", break_geminants=True) s.set_invalid_onsets(invalid_onsets) s_ipa = Syllabifier(language="old_norse_ipa", break_geminants=True) s_ipa.set_invalid_onsets(invalid_onsets) transcriber = Transcriber(DIPHTHONGS_IPA, DIPHTHONGS_IPA_class, IPA_class, old_norse_rules) class Person(Enum): first = auto() second = auto() third = auto()
def __init__(self): self.syllabifier = Syllabifier(language="non", break_geminants=True) self.syllabifier.set_invalid_onsets(ons.invalid_onsets)