def apply_T1(word): '''There is a syllable boundary in front of every CV-sequence.''' T1 = ' T1' WORD = _split_consonants_and_vowels(word) CONTINUE_VV = 0 CONTINUE_VVV = 0 for i, v in enumerate(WORD): if i == 0 and is_consonant(v[0][0]): continue elif is_consonant(v[0]) and i + 1 != len(WORD): WORD[i] = v[:-1] + '.' + v[-1] elif is_vowel(v[0]): if len(v) > 2: CONTINUE_VVV += 1 elif len(v) > 1: CONTINUE_VV += 1 word = ''.join(WORD) return word, CONTINUE_VV, CONTINUE_VVV, T1
def apply_T6(word): '''If a VVV-sequence contains a long vowel, there is a syllable boundary between it and the third vowel, e.g. [kor.ke.aa], [yh.ti.öön], [ruu.an], [mää.yt.te].''' T6 = '' WORD = word.split('.') for i, v in enumerate(WORD): if contains_VVV(v): VV = [v.find(j) for j in LONG_VOWELS if v.find(j) > 0] if VV: I = VV[0] T6 = ' T6' if I + 2 == len(v) or is_vowel(v[I + 2]): WORD[i] = v[:I + 2] + '.' + v[I + 2:] # TODO else: WORD[i] = v[:I] + '.' + v[I:] word = '.'.join(WORD) word = word.strip('.') # TODO return word, T6
def apply_T7(word): '''If a VVV-sequence does not contain a potential /i/-final diphthong, there is a syllable boundary between the second and third vowels, e.g. [kau.an], [leu.an], [kiu.as].''' WORD = _split_consonants_and_vowels(word) for k, v in WORD.iteritems(): if len(v) == 3 and is_vowel(v[0]): WORD[k] = v[:2] + '.' + v[2:] word = _compile_dict_into_word(WORD) return word
def apply_T2(word): '''There is a syllable boundary within a sequence VV of two nonidentical that are not a genuine diphthong, e.g., [ta.e], [ko.et.taa].''' WORD = _split_consonants_and_vowels(word) for k, v in WORD.iteritems(): if is_diphthong(v): continue if len(v) == 2 and is_vowel(v[0]): if v[0] != v[1]: WORD[k] = v[0] + '.' + v[1] word = _compile_dict_into_word(WORD) return word
def apply_T7(word): '''If a VVV-sequence does not contain a potential /i/-final diphthong, there is a syllable boundary between the second and third vowels, e.g. [kau.an], [leu.an], [kiu.as].''' T7 = '' WORD = word.split('.') for i, v in enumerate(WORD): if contains_VVV(v): for I, V in enumerate(v[::-1]): if is_vowel(V): WORD[i] = v[:I] + '.' + v[I:] T7 = ' T7' word = '.'.join(WORD) return word, T7
def apply_T5(word): '''If a (V)VVV sequence contains a VV sequence that could be an /i/-final diphthong, there is a syllable boundary between it and the third vowel, e.g., [raa.ois.sa], [huo.uim.me], [la.eis.sa], [sel.vi.äi.si], [tai.an], [säi.e], [oi.om.me].''' WORD = word offset = 0 for vi in i_final_diphthong_vvv_sequences(WORD): s = max(vi.start(1), vi.start(2)) i = 2 if s + 2 < len(word) and is_vowel(word[s + 2]) else 0 if not (s == i == 0): i += s + offset WORD = WORD[:i] + '.' + WORD[i:] offset += 1 RULE = ' T5' if word != WORD else '' return WORD, RULE
def apply_T6(word): '''If a VVV-sequence contains a long vowel, there is a syllable boundary between it and the third vowel, e.g. [kor.ke.aa], [yh.ti.öön], [ruu.an], [mää.yt.te].''' WORD = _split_consonants_and_vowels(word) for k, v in WORD.iteritems(): if len(v) == 3 and is_vowel(v[0]): vv = [v.find(i) for i in LONG_VOWELS if v.find(i) > 0] if any(vv): vv = vv[0] if vv == v[0]: WORD[k] = v[:2] + '.' + v[2:] else: WORD[k] = v[:vv] + '.' + v[vv:] word = _compile_dict_into_word(WORD) return word
def apply_T5(word): '''If a (V)VVV-sequence contains a VV-sequence that could be an /i/-final diphthong, there is a syllable boundary between it and the third vowel, e.g., [raa.ois.sa], [huo.uim.me], [la.eis.sa], [sel.vi.äi.si], [tai.an], [säi.e], [oi.om.me].''' WORD = _split_consonants_and_vowels(word) for k, v in WORD.iteritems(): if len(v) >= 3 and is_vowel(v[0]): vv = [v.find(i) for i in i_DIPHTHONGS if v.find(i) > 0] if any(vv): vv = vv[0] if vv == v[0]: WORD[k] = v[:2] + '.' + v[2:] else: WORD[k] = v[:vv] + '.' + v[vv:] word = _compile_dict_into_word(WORD) return word