示例#1
0
class OldNorseSyllabifier:
    """
    Syllabifier for Old Norse
    """
    def __init__(self):
        self.syllabifier = Syllabifier(language="non", break_geminants=True)

        self.syllabifier.set_invalid_onsets(ons.invalid_onsets)

    def syllabify(self, word: str) -> List[str]:
        """
        >>> non_syllabifier = OldNorseSyllabifier()
        >>> non_syllabifier.syllabify('Miðgarðr'.lower())
        ['mið', 'garðr']

        :param word: word to syllabify
        :return: syllabified word
        """
        return self.syllabifier.syllabify(word)

    def __repr__(self):
        return f"<OldNorseScanner>"

    def __call__(self, word: str) -> List[str]:
        return self.syllabify(word)
示例#2
0
文件: verse.py 项目: TylerKirby/cltk
 def syllabify(self):
     """
     Syllables may play a role in verse classification.
     """
     if len(self.long_lines) == 0:
         logger.error("No text was imported")
         self.syllabified_text = []
     else:
         syllabifier = Syllabifier(language="old_norse", break_geminants=True)
         syllabified_text = []
         for i, line in enumerate(self.long_lines):
             syllabified_text.append([])
             for j, viisuordh in enumerate(line):
                 syllabified_text[i].append([])
                 words = []
                 for word in tokenize_old_norse_words(viisuordh):
                     # punctuation is not necessary here
                     word = word.replace(",", "")
                     word = word.replace(".", "")
                     word = word.replace(";", "")
                     word = word.replace("!", "")
                     word = word.replace("?", "")
                     word = word.replace("-", "")
                     word = word.replace(":", "")
                     if word != '':
                         words.append(syllabifier.syllabify(word.lower()))
                 syllabified_text[i][j].append(words)
         self.syllabified_text = syllabified_text
示例#3
0
文件: verse.py 项目: snerus11/cltk
 def syllabify(self):
     """
     Syllables may play a role in verse classification.
     """
     if len(self.long_lines) == 0:
         logger.error("No text was imported")
         self.syllabified_text = []
     else:
         syllabifier = Syllabifier(language="old_norse",
                                   break_geminants=True)
         syllabified_text = []
         for i, line in enumerate(self.long_lines):
             syllabified_text.append([])
             for j, viisuordh in enumerate(line):
                 syllabified_text[i].append([])
                 words = []
                 for word in tokenize_old_norse_words(viisuordh):
                     # punctuation is not necessary here
                     word = word.replace(",", "")
                     word = word.replace(".", "")
                     word = word.replace(";", "")
                     word = word.replace("!", "")
                     word = word.replace("?", "")
                     word = word.replace("-", "")
                     word = word.replace(":", "")
                     if word != '':
                         words.append(syllabifier.syllabify(word.lower()))
                 syllabified_text[i][j].append(words)
         self.syllabified_text = syllabified_text
示例#4
0
 def test_syllable_length_1(self):
     syllabifier = Syllabifier(language="old_norse_ipa")
     word = [ont.a, ont.s, ont.g, ont.a, ont.r, ont.dh, ont.r]  # asgarðr (normally it is ásgarðr)
     syllabified_word = syllabifier.syllabify_phonemes(word)
     lengths = []
     for syllable in syllabified_word:
         lengths.append(ont.measure_old_norse_syllable(syllable))
     self.assertListEqual(lengths, [ut.Length.short, ut.Length.long])
示例#5
0
 def test_syllable_length_1(self):
     syllabifier = Syllabifier(language="old_norse_ipa")
     word = [ont.a, ont.s, ont.g, ont.a, ont.r, ont.dh, ont.r]  # asgarðr (normally it is ásgarðr)
     syllabified_word = syllabifier.syllabify_phonemes(word)
     lengths = []
     for syllable in syllabified_word:
         lengths.append(ont.measure_old_norse_syllable(syllable))
     self.assertListEqual(lengths, [ut.Length.short, ut.Length.long])
示例#6
0
    def test_middle_high_german_syllabification(self):
        """
        Test Middle High German syllabification
        """
        mhg_syllabifier = Syllabifier(language='middle_high_german')
        syllabified = mhg_syllabifier.syllabify('lobebæren')
        target = ['lo', 'be', 'bæ', 'ren']

        self.assertEqual(syllabified, target)
示例#7
0
    def test_middle_high_german_syllabification(self):
        """
        Test Middle High German syllabification
        """
        mhg_syllabifier = Syllabifier(language='middle_high_german')
        syllabified = mhg_syllabifier.syllabify('lobebæren')
        target = ['lo', 'be', 'bæ', 'ren']

        self.assertEqual(syllabified, target)
示例#8
0
    def test_syllabification_old_norse(self):
        old_norse_syllabifier = Syllabifier(language="old_norse", break_geminants=True)
        text = "Gefjun dró frá Gylfa glöð djúpröðul óðla, svá at af rennirauknum rauk, Danmarkar auka. Báru öxn ok átta" \
               " ennitungl, þars gengu fyrir vineyjar víðri valrauf, fjögur höfuð."
        words = tokenize_old_norse_words(text)
        syllabified_words = [old_norse_syllabifier.legal_onsets(old_norse_syllabifier.syllabify_SSP(word.lower()), invalid_onsets)
                             for word in words if word not in ",."]

        target = [['gef', 'jun'], ['dró'], ['frá'], ['gyl', 'fa'], ['glöð'], ['djúp', 'rö', 'ðul'], ['óðl', 'a'],
                  ['svá'], ['at'], ['af'], ['ren', 'ni', 'rauk', 'num'], ['rauk'], ['dan', 'mar', 'kar'], ['auk', 'a'],
                  ['bár', 'u'], ['öxn'], ['ok'], ['át', 'ta'], ['en', 'ni', 'tungl'], ['þars'], ['geng', 'u'],  ['fy', 'rir'],
                  ['vi', 'ney', 'jar'], ['víðr', 'i'], ['val', 'rauf'], ['fjö', 'gur'], ['hö', 'fuð']]
        self.assertListEqual(syllabified_words, target)
示例#9
0
    def test_syllabification_old_norse(self):
        old_norse_syllabifier = Syllabifier(language="non", break_geminants=True)
        text = (
            "Gefjun dró frá Gylfa glöð djúpröðul óðla, svá at af rennirauknum rauk, Danmarkar auka. Báru öxn ok "
            "átta ennitungl, þars gengu fyrir vineyjar víðri valrauf, fjögur höfuð."
        )
        tokenizer = OldNorseWordTokenizer()
        words = tokenizer.tokenize(text)
        old_norse_syllabifier.set_invalid_onsets(invalid_onsets)

        syllabified_words = [
            old_norse_syllabifier.syllabify_ssp(word.lower())
            for word in words
            if word not in ",."
        ]

        target = [
            ["gef", "jun"],
            ["dró"],
            ["frá"],
            ["gyl", "fa"],
            ["glöð"],
            ["djúp", "rö", "ðul"],
            ["óðl", "a"],
            ["svá"],
            ["at"],
            ["af"],
            ["ren", "ni", "rauk", "num"],
            ["rauk"],
            ["dan", "mar", "kar"],
            ["auk", "a"],
            ["bár", "u"],
            ["öxn"],
            ["ok"],
            ["át", "ta"],
            ["en", "ni", "tungl"],
            ["þars"],
            ["geng", "u"],
            ["fy", "rir"],
            ["vi", "ney", "jar"],
            ["víðr", "i"],
            ["val", "rauf"],
            ["fjö", "gur"],
            ["hö", "fuð"],
        ]
        self.assertListEqual(syllabified_words, target)
示例#10
0
文件: verse.py 项目: cltk/cltk
 def syllabify(self, hierarchy):
     """
     Syllables may play a role in verse classification.
     """
     if len(self.long_lines) == 0:
         logger.error("No text was imported")
         self.syllabified_text = []
     else:
         syllabifier = Syllabifier(language="old_norse", break_geminants=True)
         syllabifier.set_hierarchy(hierarchy)
         syllabified_text = []
         for i, long_line in enumerate(self.long_lines):
             syllabified_text.append([])
             for short_line in long_line:
                 assert isinstance(short_line, ShortLine) or isinstance(short_line, LongLine)
                 short_line.syllabify(syllabifier)
                 syllabified_text[i].append(short_line.syllabified)
         self.syllabified_text = syllabified_text
示例#11
0
 def syllabify(self, hierarchy):
     """
     Syllables may play a role in verse classification.
     """
     if len(self.long_lines) == 0:
         logger.error("No text was imported")
         self.syllabified_text = []
     else:
         syllabifier = Syllabifier(language="old_norse", break_geminants=True)
         syllabifier.set_hierarchy(hierarchy)
         syllabified_text = []
         for i, long_line in enumerate(self.long_lines):
             syllabified_text.append([])
             for short_line in long_line:
                 assert isinstance(short_line, ShortLine) or isinstance(short_line, LongLine)
                 short_line.syllabify(syllabifier)
                 syllabified_text[i].append(short_line.syllabified)
         self.syllabified_text = syllabified_text
示例#12
0
    def syllabify(self, hierarchy: Dict[str, int]):
        """
        >>> stanza = "Ein sat hon úti,\\nþá er inn aldni kom\\nyggjungr ása\\nok í augu leit.\\nHvers fregnið mik?\\nHví freistið mín?\\nAllt veit ek, Óðinn,\\nhvar þú auga falt,\\ní inum mæra\\nMímisbrunni.\\nDrekkr mjöð Mímir\\nmorgun hverjan\\naf veði Valföðrs.\\nVituð ér enn - eða hvat?"
        >>> us = UnspecifiedStanza()
        >>> us.from_short_lines_text(stanza)
        >>> us.syllabify(old_norse_syllabifier.hierarchy)
        >>> us.syllabified_text
        [[['ein'], ['sat'], ['hon'], ['út', 'i']], [['þá'], ['er'], ['inn'], ['al', 'dni'], ['kom']], [['yg', 'gjungr'], ['ás', 'a']], [['ok'], ['í'], ['aug', 'u'], ['leit']], [['hvers'], ['freg', 'nið'], ['mik']], [['hví'], ['freis', 'tið'], ['mín']], [['allt'], ['veit'], ['ek'], ['ó', 'ðinn']], [['hvar'], ['þú'], ['aug', 'a'], ['falt']], [['í'], ['i', 'num'], ['mær', 'a']], [['mí', 'mis', 'brun', 'ni']], [['drekkr'], ['mjöð'], ['mí', 'mir']], [['mor', 'gun'], ['hver', 'jan']], [['af'], ['veð', 'i'], ['val', 'föðrs']], [['vi', 'tuð'], ['ér'], ['enn'], ['eð', 'a'], ['hvat']]]

        :param hierarchy: phonetic hierarchy
        :return:
        """
        syllabifier = Syllabifier(language="non", break_geminants=True)
        syllabifier.set_hierarchy(hierarchy)
        syllabified_text = []
        for short_line in self.short_lines:
            assert isinstance(short_line, ShortLine)
            short_line.syllabify(syllabifier)
            syllabified_text.append(short_line.syllabified)
        self.syllabified_text = syllabified_text
示例#13
0
文件: verse.py 项目: cltk/cltk
    def syllabify(self, hierarchy):
        """
        >>> stanza = "Ein sat hon úti,\\nþá er inn aldni kom\\nyggjungr ása\\nok í augu leit.\\nHvers fregnið mik?\\nHví freistið mín?\\nAllt veit ek, Óðinn,\\nhvar þú auga falt,\\ní inum mæra\\nMímisbrunni.\\nDrekkr mjöð Mímir\\nmorgun hverjan\\naf veði Valföðrs.\\nVituð ér enn - eða hvat?"
        >>> us = UnspecifiedStanza()
        >>> us.from_short_lines_text(stanza)
        >>> us.syllabify(old_norse_syllabifier.hierarchy)
        >>> us.syllabified_text
        [[['ein'], ['sat'], ['hon'], ['út', 'i']], [['þá'], ['er'], ['inn'], ['al', 'dni'], ['kom']], [['yg', 'gjungr'], ['ás', 'a']], [['ok'], ['í'], ['aug', 'u'], ['leit']], [['hvers'], ['freg', 'nið'], ['mik']], [['hví'], ['freis', 'tið'], ['mín']], [['allt'], ['veit'], ['ek'], ['ó', 'ðinn']], [['hvar'], ['þú'], ['aug', 'a'], ['falt']], [['í'], ['i', 'num'], ['mær', 'a']], [['mí', 'mis', 'brun', 'ni']], [['drekkr'], ['mjöð'], ['mí', 'mir']], [['mor', 'gun'], ['hver', 'jan']], [['af'], ['veð', 'i'], ['val', 'föðrs']], [['vi', 'tuð'], ['ér'], ['enn'], ['eð', 'a'], ['hvat']]]

        :param hierarchy:
        :return:
        """
        syllabifier = Syllabifier(language="old_norse", break_geminants=True)
        syllabifier.set_hierarchy(hierarchy)
        syllabified_text = []
        for short_line in self.short_lines:
            assert isinstance(short_line, ShortLine)
            short_line.syllabify(syllabifier)
            syllabified_text.append(short_line.syllabified)
        self.syllabified_text = syllabified_text
示例#14
0
 def test_syllabify_phonemes(self):
     vowels = ["a", "ɛ", "i", "ɔ", "ɒ", "ø", "u", "y", "œ", "e", "o", "j"]
     ipa_hierarchy = [vowels, ["r"], ["l"], ["m", "n"], ["f", "v", "θ", "ð", "s", "h"],
                      ["b", "d", "g", "k", "p", "t"]]
     syllabifier = Syllabifier()
     syllabifier.set_hierarchy(ipa_hierarchy)
     syllabifier.set_vowels(vowels)
     word = [ont.a, ont.s, ont.g, ont.a, ont.r, ont.dh, ont.r]
     syllabified_word = syllabifier.syllabify_phonemes(word)
     self.assertListEqual(syllabified_word, [[ont.a, ont.s], [ont.g, ont.a, ont.r, ont.dh, ont.r]])
示例#15
0
    def test_middle_english_syllabify(self):
        """Test syllabification for middle english"""

        words = ["marchall", "content", "thyne", "greef", "commaundyd"]
        syllabifier = Syllabifier(language="enm")
        syllabified = [syllabifier.syllabify(w, mode="MOP") for w in words]
        target_syllabified = [
            ["mar", "chall"],
            ["con", "tent"],
            ["thyne"],
            ["greef"],
            ["com", "mau", "ndyd"],
        ]

        self.assertListEqual(syllabified, target_syllabified)

        syllabifier = Syllabifier(language="enm", sep=".")
        syllabified_str = [syllabifier.syllabify(w, "MOP") for w in words]
        target_syllabified_str = [
            "mar.chall",
            "con.tent",
            "thyne",
            "greef",
            "com.mau.ndyd",
        ]

        self.assertListEqual(syllabified_str, target_syllabified_str)
示例#16
0
class MiddleEnglishSyllabifier:
    """
    Middle English syllabifier
    """
    def __init__(self):
        self.syllabifier = Syllabifier(language="enm")

    def syllabify(self, word: str) -> List[str]:
        return self.syllabifier.syllabify(word)

    def __repr__(self):
        return f"<MiddleEnglishSyllabifier>"

    def __call__(self, word: str) -> List[str]:
        return self.syllabify(word)
示例#17
0
 def test_syllabify_phonemes(self):
     vowels = ["a", "ɛ", "i", "ɔ", "ɒ", "ø", "u", "y", "œ", "e", "o", "j"]
     ipa_hierarchy = [vowels, ["r"], ["l"], ["m", "n"], ["f", "v", "θ", "ð", "s", "h"],
                      ["b", "d", "g", "k", "p", "t"]]
     syllabifier = Syllabifier()
     syllabifier.set_hierarchy(ipa_hierarchy)
     syllabifier.set_vowels(vowels)
     word = [ont.a, ont.s, ont.g, ont.a, ont.r, ont.dh, ont.r]
     syllabified_word = syllabifier.syllabify_phonemes(word)
     self.assertListEqual(syllabified_word, [[ont.a, ont.s], [ont.g, ont.a, ont.r, ont.dh, ont.r]])
示例#18
0
class MiddleHighGermanSyllabifier:
    """
    Middle High German syllabifier based on sonority phoneme hierarchy for MHG.
    Source: Resonances in Middle High German: New Methodologies in Prosody, Christopher Leo Hench, 2017
    """
    def __init__(self):
        self.syllabifier = Syllabifier(language="gmh")

    def syllabify(self, word: str) -> List[str]:
        """
        >>> MiddleHighGermanSyllabifier().syllabify("Gunther")
        ['Gunt', 'her']

        :param word: word to syllabify
        :return: syllabified word
        """
        return self.syllabifier.syllabify(word, mode="MOP")

    def __repr__(self):
        return f"<MiddleHighGermanSyllabifier>"

    def __call__(self, word):
        return self.syllabify(word)
示例#19
0
class OldEnglishSyllabifier:
    """
    Old English syllabifier
    """
    def __init__(self):
        self.syllabifier = Syllabifier(language="ang")

    def syllabify(self, word: str) -> List[str]:
        """
        >>> ang_syllabifier = OldEnglishSyllabifier()
        >>> ang_syllabifier.syllabify("Beowulf".lower())
        ['beo', 'wulf']

        :param word: word to syllabify
        :return: syllabified word
        """
        return self.syllabifier.syllabify(word)

    def __repr__(self):
        return f"<OldEnglishSyllabifier>"

    def __call__(self, word):
        return self.syllabify(word)
示例#20
0
 def __init__(self):
     self.syllabifier = Syllabifier(language="old_norse_ipa")
     self.tr = Transcriber(DIPHTHONGS_IPA, DIPHTHONGS_IPA_class, IPA_class,
                           old_norse_rules)
     self.tagger = POSTag('old_norse')
示例#21
0
from cltk.phonology import utils as phu
from cltk.phonology.old_norse import transcription as ont
from cltk.phonology.syllabify import Syllabifier
from cltk.tokenize.word import WordTokenizer
from cltk.corpus.old_norse.syllabifier import hierarchy, invalid_onsets
from cltk.text_reuse.levenshtein import Levenshtein

from zoegas.constants import postags, dictionary_name, pos_verbose

# phonetic transcriber
phonetic_transcriber = phu.Transcriber(ont.DIPHTHONGS_IPA,
                                       ont.DIPHTHONGS_IPA_class, ont.IPA_class,
                                       ont.old_norse_rules)

# Old Norse syllabifier
s = Syllabifier(language="old_norse", break_geminants=True)
s.set_invalid_onsets(invalid_onsets)
s.set_hierarchy(hierarchy)

old_norse_word_tokenizer = WordTokenizer("old_norse")


def clean(text: str) -> Optional[str]:
    """

    :param text:
    :return:
    """
    if text is not None:
        text = re.sub(r"\t", "", text)
        text = re.sub(r"\n", "", text)
示例#22
0
 def test_syllabification_old_english(self):
     s = Syllabifier(language='old_english')
     self.assertEqual(s.syllabify('geardagum'), ['gear', 'da', 'gum'])
示例#23
0
文件: nouns.py 项目: cltk/cltk
Commented doctests do not work as expected, because there is no way, for now, to guess correctly all the forms.

"""

import cltk.inflection.utils as decl_utils
from cltk.phonology.syllabify import Syllabifier, Syllable
from cltk.corpus.old_norse.syllabifier import invalid_onsets, BACK_TO_FRONT_VOWELS, VOWELS, CONSONANTS
from cltk.inflection.old_norse.phonemic_rules import extract_common_stem, apply_u_umlaut, has_u_umlaut

__author__ = ["Clément Besnier <*****@*****.**>", ]

sumar = [["sumar", "sumar", "sumri", "sumars"], ["sumur", "sumur", "sumrum", "sumra"]]
noun_sumar = decl_utils.Noun("sumar", decl_utils.Gender.neuter)
noun_sumar.set_declension(sumar)

s = Syllabifier(language="old_norse", break_geminants=True)
s.set_invalid_onsets(invalid_onsets)


class OldNorseNoun(decl_utils.Noun):
    def __init__(self, name: str, gender: decl_utils.Gender):

        super().__init__(name, gender)

    def set_representative_cases(self, ns, gs, np):
        """
        >>> armr = OldNorseNoun("armr", decl_utils.Gender.masculine)
        >>> armr.set_representative_cases("armr", "arms", "armar")

        >>> armr.declension
        [['armr', '', '', 'arms'], ['armar', '', '', '']]
示例#24
0
 def __init__(self):
     self.syllabifier = Syllabifier(language="gmh")
示例#25
0
from cltk.phonology.syllabify import Syllabifier, Syllable
from cltk.phonology.non.syllabifier import invalid_onsets, BACK_TO_FRONT_VOWELS, VOWELS, CONSONANTS

from zoegas.inflection.old_norse.phonemic_rules import extract_common_stem, apply_u_umlaut, has_u_umlaut
import zoegas.inflection.utils as decl_utils

__author__ = [
    "Clément Besnier <*****@*****.**>",
]

sumar = [["sumar", "sumar", "sumri", "sumars"],
         ["sumur", "sumur", "sumrum", "sumra"]]
noun_sumar = decl_utils.Noun("sumar", decl_utils.Gender.neuter)
noun_sumar.set_declension(sumar)

s = Syllabifier(language="non", break_geminants=True)
s.set_invalid_onsets(invalid_onsets)


class OldNorseNoun(decl_utils.Noun):
    def __init__(self, name: str, gender: decl_utils.Gender):

        super().__init__(name, gender)

    def set_representative_cases(self, ns, gs, np):
        """
        >>> armr = OldNorseNoun("armr", decl_utils.Gender.masculine)
        >>> armr.set_representative_cases("armr", "arms", "armar")

        >>> armr.declension
        [['armr', '', '', 'arms'], ['armar', '', '', '']]
示例#26
0
文件: gmh.py 项目: yelircaasi/cltk
"""Module for calculating rhyme scheme for a MHG stanza."""

from cltk.alphabet.gmh import normalize_middle_high_german as normalizer
from cltk.phonology.gmh.transcription import Transcriber
from cltk.phonology.syllabify import Syllabifier

syllabifier = Syllabifier(language="gmh")


class Verse:
    """Calculate rhyme scheme for a MHG stanza."""
    def __init__(self, text):
        self.text = [
            normalizer(line, to_lower_all=True, punct=True,
                       alpha_conv=True).split(" ") for line in text
        ]
        self.syllabified = [[syllabifier.syllabify(w) for w in line]
                            for line in self.text]
        self.transcribed_phonetics = None

    def to_phonetics(self):
        """Transcribe phonetics."""
        tr = Transcriber()
        self.transcribed_phonetics = [
            tr.transcribe(line) for line in self.text
        ]

    def rhyme_scheme(self, rhyme_size=3):
        """
        Calculates the rhyme scheme of a given stanza. It doesn't yet support
        phonetical rhyming (homophones) and thus is still error-prone
示例#27
0
from typing import List

from cltk.phonology.old_norse.transcription import measure_old_norse_syllable, DIPHTHONGS_IPA, DIPHTHONGS_IPA_class, \
    IPA_class, old_norse_rules

from cltk.phonology.syllabify import Syllabifier, Syllable
from cltk.corpus.old_norse.syllabifier import invalid_onsets, VOWELS, CONSONANTS, LONG_VOWELS, BACK_TO_FRONT_VOWELS
from cltk.inflection.utils import Number
from cltk.phonology.utils import Length, Transcriber
from cltk.inflection.old_norse.phonemic_rules import apply_i_umlaut, apply_u_umlaut, add_r_ending

__author__ = [
    "Clément Besnier <*****@*****.**>",
]

s = Syllabifier(language="old_norse", break_geminants=True)
s.set_invalid_onsets(invalid_onsets)

s_ipa = Syllabifier(language="old_norse_ipa", break_geminants=True)
s_ipa.set_invalid_onsets(invalid_onsets)

transcriber = Transcriber(DIPHTHONGS_IPA, DIPHTHONGS_IPA_class, IPA_class,
                          old_norse_rules)


class Person(Enum):
    first = auto()
    second = auto()
    third = auto()

示例#28
0
 def test_syllabification_old_english(self):
     s = Syllabifier(language='old_english')
     self.assertEqual(s.syllabify('geardagum'), ['gear', 'da', 'gum'])
示例#29
0
    def __init__(self):
        self.syllabifier = Syllabifier(language="non", break_geminants=True)

        self.syllabifier.set_invalid_onsets(ons.invalid_onsets)