예제 #1
0
 def test_zajtso(self):
     word = run_through_module(
         TextPhonotypes('zajtšo',
                        [CONS, VOWEL, SONOR, SUBUNIT, CONS, VOWEL]))
     self.assertEqual(word.get_syllables(), ['zaj', 'tšo'])
     self.assertEqual(word.get_phonotypes(),
                      [[CONS, VOWEL, SONOR], [SUBUNIT, CONS, VOWEL]])
예제 #2
0
 def test_sahac(self):
     word = run_through_module(
         TextPhonotypes('sahać', [CONS, VOWEL, CONS, VOWEL, CONS]))
     self.assertEqual(word.get_syllables(), ['sa', 'hać'])
     self.assertEqual(word.get_phonotypes(), [[
         CONS,
         VOWEL,
     ], [CONS, VOWEL, CONS]])
예제 #3
0
 def test_neurcity(self):
     word = run_through_module(
         TextPhonotypes(
             'neurčitý',
             [SONOR, VOWEL, VOWEL, SONOR, CONS, VOWEL, CONS, VOWEL]))
     self.assertEqual(word.get_syllables(), ['ne', 'ur', 'či', 'tý'])
     self.assertEqual(
         word.get_phonotypes(),
         [[SONOR, VOWEL], [VOWEL, SONOR], [CONS, VOWEL], [CONS, VOWEL]])
예제 #4
0
 def test_агульным(self):
     word = run_through_module(
         TextPhonotypes(
             'агульным',
             [VOWEL, CONS, VOWEL, SONOR, SPEC, SONOR, VOWEL, SONOR]))
     self.assertEqual(word.get_syllables(), ['а', 'гу', 'льным'])
     self.assertEqual(
         word.get_phonotypes(),
         [[VOWEL], [CONS, VOWEL], [SONOR, SPEC, SONOR, VOWEL, SONOR]])
예제 #5
0
 def test_колькасць(self):
     word = run_through_module(
         TextPhonotypes(
             'колькасць',
             [CONS, VOWEL, SONOR, SPEC, CONS, VOWEL, CONS, CONS, SPEC]))
     self.assertEqual(word.get_syllables(), ['коль', 'касць'])
     self.assertEqual(
         word.get_phonotypes(),
         [[CONS, VOWEL, SONOR, SPEC], [CONS, VOWEL, CONS, CONS, SPEC]])
예제 #6
0
 def test_srozpadem(self):
     word = run_through_module(
         TextPhonotypes(
             'srozpadem',
             [CONS, SONOR, VOWEL, CONS, CONS, VOWEL, CONS, VOWEL, SONOR]))
     self.assertEqual(word.get_syllables(), ['sro', 'zpa', 'dem'])
     self.assertEqual(
         word.get_phonotypes(),
         [[CONS, SONOR, VOWEL], [CONS, CONS, VOWEL], [CONS, VOWEL, SONOR]])
예제 #7
0
 def test_automaticky(self):
     word = run_through_module(
         TextPhonotypes('automatický', [
             SUBUNIT, VOWEL, CONS, VOWEL, SONOR, VOWEL, CONS, VOWEL, CONS,
             CONS, VOWEL
         ]))
     self.assertEqual(word.get_syllables(), ['au', 'to', 'ma', 'ti', 'cký'])
     self.assertEqual(word.get_phonotypes(),
                      [[SUBUNIT, VOWEL], [CONS, VOWEL], [SONOR, VOWEL],
                       [CONS, VOWEL], [CONS, CONS, VOWEL]])
예제 #8
0
 def test_bijologije(self):
     word = run_through_module(
         TextPhonotypes('bijologije', [
             CONS, VOWEL, SONOR, VOWEL, SONOR, VOWEL, CONS, VOWEL, SONOR,
             VOWEL
         ]))
     self.assertEqual(word.get_syllables(), ['bi', 'jo', 'lo', 'gi', 'je'])
     self.assertEqual(word.get_phonotypes(),
                      [[CONS, VOWEL], [SONOR, VOWEL], [SONOR, VOWEL],
                       [CONS, VOWEL], [SONOR, VOWEL]])
예제 #9
0
 def test_зяўляецца(self):
     word = run_through_module(
         TextPhonotypes('з’яўляецца', [
             CONS, SPEC, VOWEL, SONOR, SONOR, VOWEL, VOWEL, CONS, CONS,
             VOWEL
         ]))
     self.assertEqual(word.get_syllables(), ['з’я', 'ўля', 'е', 'цца'])
     self.assertEqual(word.get_phonotypes(),
                      [[CONS, SPEC, VOWEL], [SONOR, SONOR, VOWEL], [VOWEL],
                       [CONS, CONS, VOWEL]])
예제 #10
0
    def set_phonotypes(self, word) -> TextPhonotypes:
        cluster_letters = self.cluster_letters
        phono_changes = self.phono_changes
        signs = self.signs

        word_text = self.text_changes_func(word.get_text())  # TEXT CHANGES
        word_length = len(word_text)

        cluster_exception = False
        cluster_croatian_exception = False

        for i in range(word_length):

            if cluster_exception:
                cluster_exception = False
                continue

            if cluster_croatian_exception:
                cluster_croatian_exception = False
                continue

            current_to_subunit_next_unchanged = phonotype_exception = False

            previous_letter = word_text[i - 1] if i - 1 >= 0 else ""
            current_letter = word_text[i]
            next_letter = word_text[i + 1] if i + 1 < word_length else ""
            exception_next_letter = word_text[
                i +
                2] if i + 2 < word_length else ""  # used because of exception in croatian language (ije -> [SUBUNIT, SUBUNIT, VOWEL])

            if current_letter in cluster_letters:  # CLUSTERS

                if exception_next_letter != "" and next_letter + exception_next_letter in cluster_letters[
                        current_letter]:
                    cluster_exception, current_to_subunit_next_unchanged = \
                        self.clusters_func(word_text, current_letter, next_letter, exception_next_letter)
                    cluster_croatian_exception = True

                if next_letter in cluster_letters[current_letter]:
                    cluster_exception, current_to_subunit_next_unchanged = \
                        self.clusters_func(word_text, current_letter, next_letter, "")

            elif current_letter in phono_changes:  # PHONOTYPE CHANGES
                phonotype_exception = self.phonotype_changes_func(
                    word_text, previous_letter, current_letter, next_letter)

            no_exceptions = not current_to_subunit_next_unchanged and not cluster_exception and not phonotype_exception

            if no_exceptions:
                self.phonotypes.append(signs[current_letter])

        phonotypes = self.phonotypes
        self.phonotypes = []

        return TextPhonotypes(word_text, phonotypes)
예제 #11
0
 def test_українськогож(self):
     word = run_through_module(
         TextPhonotypes('українськогож', [
             VOWEL, CONS, SONOR, VOWEL, VOWEL, SONOR, CONS, SPEC, CONS,
             VOWEL, CONS, VOWEL, CONS
         ]))
     self.assertEqual(word.get_syllables(),
                      ['у', 'кра', 'їн', 'сько', 'гож'])
     self.assertEqual(word.get_phonotypes(),
                      [[VOWEL], [CONS, SONOR, VOWEL], [VOWEL, SONOR],
                       [CONS, SPEC, CONS, VOWEL], [CONS, VOWEL, CONS]])
예제 #12
0
 def test_farmaceuticky(self):
     word = run_through_module(
         TextPhonotypes('farmaceutický', [
             CONS, VOWEL, SONOR, SONOR, VOWEL, CONS, SUBUNIT, VOWEL, CONS,
             VOWEL, CONS, CONS, VOWEL
         ]))
     self.assertEqual(word.get_syllables(),
                      ['fa', 'rma', 'ceu', 'ti', 'cký'])
     self.assertEqual(
         word.get_phonotypes(),
         [[CONS, VOWEL], [SONOR, SONOR, VOWEL], [CONS, SUBUNIT, VOWEL],
          [CONS, VOWEL], [CONS, CONS, VOWEL]])
예제 #13
0
    def set_phonotypes(self, word) -> TextPhonotypes:
        cluster_letters = self.cluster_letters
        phono_changes = self.phono_changes
        signs = self.signs

        word_text = self.text_changes_func(word.get_text())  # TEXT CHANGES
        word_length = len(word_text)

        cluster_exception = False

        for i in range(word_length):

            if cluster_exception:
                cluster_exception = False
                continue

            current_to_subunit_next_unchanged = phonotype_exception = False

            previous_sign = word_text[i - 1] if i - 1 >= 0 else None
            current_sign = word_text[i]
            next_sign = word_text[i + 1] if i + 1 < word_length else None

            if current_sign in cluster_letters and next_sign in cluster_letters[
                    current_sign]:  # CLUSTERS
                cluster_exception, current_to_subunit_next_unchanged = \
                    self.clusters_func(word_text, current_sign, next_sign)

            elif current_sign in phono_changes:  # PHONOTYPE CHANGES
                phonotype_exception = self.phonotype_changes_func(
                    word_text, previous_sign, current_sign, next_sign)

            no_exceptions = not current_to_subunit_next_unchanged and \
                            not cluster_exception and not phonotype_exception

            if no_exceptions:
                self.phonotypes.append(signs[current_sign])

        phonotypes = self.phonotypes
        self.phonotypes = []

        return TextPhonotypes(word_text, phonotypes)
예제 #14
0
 def test_sneh(self):
     word = run_through_module(
         TextPhonotypes('sněh', [CONS, SONOR, VOWEL, CONS]))
     self.assertEqual(word.get_syllables(), ['sněh'])
     self.assertEqual(word.get_phonotypes(), [[CONS, SONOR, VOWEL, CONS]])
예제 #15
0
 def test_osm(self):
     word = run_through_module(TextPhonotypes('osm', [VOWEL, CONS, VOWEL]))
     self.assertEqual(word.get_syllables(), ['o', 'sm'])
     self.assertEqual(word.get_phonotypes(), [[VOWEL], [CONS, VOWEL]])
예제 #16
0
 def test_rz(self):
     word = run_through_module(TextPhonotypes('rž', [VOWEL, CONS]))
     self.assertEqual(word.get_syllables(), ['rž'])
     self.assertEqual(word.get_phonotypes(), [[VOWEL, CONS]])
예제 #17
0
 def test_maria(self):
     word = run_through_module(
         TextPhonotypes('maria', [SONOR, VOWEL, SONOR, SUBUNIT, VOWEL]))
     self.assertEqual(word.get_syllables(), ['ma', 'ria'])
     self.assertEqual(word.get_phonotypes(),
                      [[SONOR, VOWEL], [SONOR, SUBUNIT, VOWEL]])
예제 #18
0
 def test_vlna(self):
     word = run_through_module(
         TextPhonotypes('vlna', [CONS, VOWEL, SONOR, VOWEL]))
     self.assertEqual(word.get_syllables(), ['vl', 'na'])
     self.assertEqual(word.get_phonotypes(),
                      [[CONS, VOWEL], [SONOR, VOWEL]])
예제 #19
0
 def test_późniejszy(self):
     word = run_through_module(
         TextPhonotypes('późniejszy', [0, 2, 0, 1, 4, 2, 1, 4, 0, 2]))
     self.assertEqual(word.get_syllables(), ['pó', 'źniej', 'szy'])
     self.assertEqual(word.get_phonotypes(),
                      [[0, 2], [0, 1, 4, 2, 1], [4, 0, 2]])
예제 #20
0
 def test_pouzil(self):
     word = run_through_module(
         TextPhonotypes('použil', [CONS, VOWEL, VOWEL, CONS, VOWEL, SONOR]))
     self.assertEqual(word.get_syllables(), ['po', 'u', 'žil'])
     self.assertEqual(word.get_phonotypes(),
                      [[CONS, VOWEL], [VOWEL], [CONS, VOWEL, SONOR]])
예제 #21
0
 def test_pouze(self):
     word = run_through_module(
         TextPhonotypes('pouze', [CONS, SUBUNIT, VOWEL, CONS, VOWEL]))
     self.assertEqual(word.get_syllables(), ['pou', 'ze'])
     self.assertEqual(word.get_phonotypes(),
                      [[CONS, SUBUNIT, VOWEL], [CONS, VOWEL]])
예제 #22
0
from config_data import ConfigData
from pipe import Pipe
from queue import Queue
from threading import Condition
from count_module import CountModule
from syllabify_module import SyllabifyModule
from word import TextPhonotypes, SyllablesLengths
from constants import SONOR, CONS, VOWEL, SPEC, SUBUNIT
from end import End
import unittest
import time

words_to_do = [
    TextPhonotypes('farmaceutický', [CONS, VOWEL, SONOR, SONOR, VOWEL, CONS,
                                     SUBUNIT, VOWEL, CONS, VOWEL, CONS, CONS, VOWEL]),
    TextPhonotypes('pouze', [CONS, SUBUNIT, VOWEL, CONS, VOWEL]),
    TextPhonotypes('použil', [CONS, VOWEL, VOWEL, CONS, VOWEL, SONOR]),
    TextPhonotypes('neurčitý', [SONOR, VOWEL, VOWEL, SONOR, CONS, VOWEL, CONS, VOWEL]),
    TextPhonotypes('automatický', [SUBUNIT, VOWEL, CONS, VOWEL, SONOR, VOWEL, CONS, VOWEL, CONS, CONS, VOWEL]),
    TextPhonotypes('vlna', [CONS, VOWEL, SONOR, VOWEL]),
    TextPhonotypes('osm', [VOWEL, CONS, VOWEL]),
    TextPhonotypes('bijologije', [CONS, VOWEL, SONOR, VOWEL, SONOR, VOWEL, CONS, VOWEL, SONOR, VOWEL]),
    TextPhonotypes('srozpadem', [CONS, SONOR, VOWEL, CONS, CONS, VOWEL, CONS, VOWEL, SONOR]),
    TextPhonotypes('vědomí', [CONS, VOWEL, CONS, VOWEL, SONOR, VOWEL]),
    TextPhonotypes('sex', [CONS, VOWEL, CONS])
        ]

syllables = [['fa', 'rma', 'ceu', 'ti', 'cký'], ['pou', 'ze'], ['po', 'u', 'žil'], ['ne', 'ur', 'či', 'tý'],
             ['au', 'to', 'ma', 'ti', 'cký'], ['vl', 'na'], ['o', 'sm'], ['bi', 'jo', 'lo', 'gi', 'je'],
             ['sro', 'zpa', 'dem'], ['vě', 'do', 'mí'], ['sex']]
예제 #23
0
 def test_wrobl(self):
     word = run_through_module(
         TextPhonotypes('wrobl', [SONOR, SONOR, VOWEL, CONS, VOWEL]))
     self.assertEqual(word.get_syllables(), ['wro', 'bl'])
     self.assertEqual(word.get_phonotypes(),
                      [[SONOR, SONOR, VOWEL], [CONS, VOWEL]])
예제 #24
0
 def text_zastojnstwa(self):
     word = run_through_module(
         TextPhonotypes('zastojnstwa', [0, 2, 0, 1, 4, 2, 1, 4, 0, 2]))
     self.assertEqual(word.get_syllables(), ['za', 'stojn', 'stwa'])
     self.assertEqual(word.get_phonotypes(),
                      [[0, 2], [0, 0, 2, 1, 1], [0, 0, 1, 2]])
예제 #25
0
 def test_vedomi(self):
     word = run_through_module(
         TextPhonotypes('vědomí', [CONS, VOWEL, CONS, VOWEL, SONOR, VOWEL]))
     self.assertEqual(word.get_syllables(), ['vě', 'do', 'mí'])
     self.assertEqual(word.get_phonotypes(),
                      [[CONS, VOWEL], [CONS, VOWEL], [SONOR, VOWEL]])