def apply_T1(word): '''There is a syllable boundary in front of every CV-sequence.''' T1 = ' T1' WORD = _split_consonants_and_vowels(word) CONTINUE_VV = 0 CONTINUE_VVV = 0 for i, v in enumerate(WORD): if i == 0 and is_consonant(v[0][0]): continue elif is_consonant(v[0]) and i + 1 != len(WORD): WORD[i] = v[:-1] + '.' + v[-1] elif is_vowel(v[0]): if len(v) > 2: CONTINUE_VVV += 1 elif len(v) > 1: CONTINUE_VV += 1 word = ''.join(WORD) return word, CONTINUE_VV, CONTINUE_VVV, T1
def apply_T4(word): '''An agglutination diphthong that ends in /u, y/ usually contains a syllable boundary when -C# or -CCV follow, e.g., [lau.ka.us], [va.ka.ut.taa].''' T4 = '' WORD = word.split('.') for i, v in enumerate(WORD): # i % 2 != 0 prevents this rule from applying to first, third, etc. # syllables, which receive stress (WSP) if is_consonant(v[-1]) and i % 2 != 0: if i + 1 == len(WORD) or is_consonant(WORD[i + 1][0]): if contains_Vu_diphthong(v): I = v.rfind('u') WORD[i] = v[:I] + '.' + v[I:] T4 = ' T4' elif contains_Vy_diphthong(v): I = v.rfind('y') WORD[i] = v[:I] + '.' + v[I:] T4 = ' T4' word = '.'.join(WORD) return word, T4
def apply_T4(word): # OPTIMIZE '''An agglutination diphthong that ends in /u, y/ usually contains a syllable boundary when -C# or -CCV follow, e.g., [lau.ka.us], [va.ka.ut.taa].''' WORD = _split_consonants_and_vowels(word) for k, v in WORD.iteritems(): if len(v) == 2 and v.endswith(('u', 'y')): if WORD.get(k + 2, 0): if not WORD.get(k + 3, 0): if len(WORD[k + 2]) == 1 and is_consonant(WORD[k + 2]): WORD[k] = v[0] + '.' + v[1] elif len(WORD[k + 1]) == 1 and WORD.get(k + 3, 0): if is_consonant(WORD[k + 3][0]): WORD[k] = v[0] + '.' + v[1] elif len(WORD[k + 2]) == 2: WORD[k] = v[0] + '.' + v[1] word = _compile_dict_into_word(WORD) return word
def apply_T4(word): # TODO: toggle variation '''An agglutination diphthong that ends in /u, y/ optionally contains a syllable boundary when -C# or -CCV follow, e.g., [lau.ka.us], [va.ka.ut.taa].''' WORD = word.split('.') PARTS = [[] for part in range(len(WORD))] for i, v in enumerate(WORD): # i % 2 != 0 prevents this rule from applying to first, third, etc. # syllables, which receive stress (WSP) if is_consonant(v[-1]) and i % 2 != 0: if i + 1 == len(WORD) or is_consonant(WORD[i + 1][0]): vv = u_y_final_diphthongs(v) if vv: I = vv.start(1) + 1 PARTS[i].append(v[:I] + '.' + v[I:]) # include original form (non-application of rule) PARTS[i].append(v) WORDS = [w for w in product(*PARTS)] for WORD in WORDS: WORD = '.'.join(WORD) RULE = ' T4' if word != WORD else '' yield WORD, RULE
def apply_T1(word): '''There is a syllable boundary in front of every CV-sequence.''' # split consonants and vowels: 'balloon' -> ['b', 'a', 'll', 'oo', 'n'] WORD = [w for w in re.split('([ieAyOauo]+)', word) if w] count = 0 for i, v in enumerate(WORD): if i == 0 and is_consonant(v[0]): continue elif is_consonant(v[0]) and i + 1 != len(WORD): if is_cluster(v): # WSP if count % 2 == 0: WORD[i] = v[0] + '.' + v[1:] # CC > C.C, CCC > C.CC else: WORD[i] = '.' + v # CC > .CC, CCC > .CCC # elif is_sonorant(v[0]) and is_cluster(v[1:]): # NEW # if count % 2 == 0: # WORD[i] = v[0:2] + '.' + v[2:] # else: # WORD[i] = v[0] + '.' + v[1:] else: WORD[i] = v[:-1] + '.' + v[-1] # CC > C.C, CCC > CC.C count += 1 WORD = ''.join(WORD) RULE = ' T1' if word != WORD else '' return WORD, RULE
def apply_T1(word): '''There is a syllable boundary in front of every CV-sequence.''' # split consonants and vowels: 'balloon' -> ['b', 'a', 'll', 'oo', 'n'] WORD = [w for w in re.split('([ieAyOauo]+)', word) if w] for i, v in enumerate(WORD): if i == 0 and is_consonant(v[0]): continue elif is_consonant(v[0]) and i + 1 != len(WORD): WORD[i] = v[:-1] + '.' + v[-1] WORD = ''.join(WORD) RULE = ' T1' if word != WORD else '' return WORD, RULE
def apply_T4(word): '''An agglutination diphthong that ends in /u, y/ usually contains a syllable boundary when -C# or -CCV follow, e.g., [lau.ka.us], [va.ka.ut.taa].''' WORD = word.split('.') for i, v in enumerate(WORD): # i % 2 != 0 prevents this rule from applying to first, third, etc. # syllables, which receive stress (WSP) if is_consonant(v[-1]) and i % 2 != 0: if i + 1 == len(WORD) or is_consonant(WORD[i + 1][0]): vv = u_y_final_diphthongs(v) if vv and not is_long(vv.group(1)): I = vv.start(1) + 1 WORD[i] = v[:I] + '.' + v[I:] WORD = '.'.join(WORD) RULE = ' T4' if word != WORD else '' return WORD, RULE
def apply_T1(word): '''There is a syllable boundary in front of every CV-sequence.''' WORD = _split_consonants_and_vowels(word) for k, v in WORD.iteritems(): if k == 1 and is_consonantal_onset(v): WORD[k] = '.' + v elif is_consonant(v[0]) and WORD.get(k + 1, 0): WORD[k] = v[:-1] + '.' + v[-1] word = _compile_dict_into_word(WORD) return word
def apply_T5(word): # BROKEN '''If a (V)VVV-sequence contains a VV-sequence that could be an /i/-final diphthong, there is a syllable boundary between it and the third vowel, e.g., [raa.ois.sa], [huo.uim.me], [la.eis.sa], [sel.vi.äi.si], [tai.an], [säi.e], [oi.om.me].''' T5 = '' WORD = word.split('.') for i, v in enumerate(WORD): if contains_VVV(v) and any(i for i in i_DIPHTHONGS if i in v): I = v.rfind('i') - 1 or 2 I = I + 2 if is_consonant(v[I - 1]) else I WORD[i] = v[:I] + '.' + v[I:] T5 = ' T5' word = '.'.join(WORD) return word, T5
def apply_T1(word, T1E=True): # split consonants and vowels: 'balloon' -> ['b', 'a', 'll', 'oo', 'n'] WORD = [w for w in re.split('([ieAyOauo]+)', word) if w] # these are to keep track of which sub-rules are applying A, B, C, D, E, F, G = '', '', '', '', '', '', '' # a count divisible by 2 indicates an even syllable count = 1 for i, v in enumerate(WORD): # T1B # If there is a consonant cluster word-initially, the entire cluster # forms the onset of the first syllable: # CCV > #CCV if i == 0 and is_consonant(v[0]): B = 'b' elif is_consonant(v[0]): count += 1 # True if the current syllable is unstressed, else False unstressed = count % 2 == 0 # T1C # If there is a consonant cluster word-finally, the entire cluster # forms the coda of the final syllable: # VCC# > VCC# if i + 1 == len(WORD): C = 'c' # T1D # If there is a bare "Finnish" consonant cluster word-medially and # the previous syllable receives stress, the first consonant of the # cluster forms the coda of the previous syllable (to create a # heavy syllable); otherwise, the whole cluster forms the onset of # the current syllable (thisis the /kr/ rule): # 'VCCV > 'VC.CV, VCCV > V.CCV elif is_cluster(v): D = 'd' WORD[i] = v[0] + '.' + v[1:] if unstressed else '.' + v elif is_cluster(v[1:]): # T1E (optional) # If there is a word-medial "Finnish" consonant cluster that is # preceded by a sonorant consonant, if the previous syllable # receives stress, the sonorant consonant and the first # consonant of the cluster form the coda of the previous # syllable, and the remainder of the cluster forms the onset of # the current syllable: # 'VlCC > VlC.C if T1E and is_sonorant(v[0]) and unstressed: E = 'e' WORD[i] = v[:2] + '.' + v[2:] # T1F # If there is a word-medial "Finnish" cluster that follows a # consonant, that first consonant forms the coda of the # previous syllable, and the cluster forms the onset of the # current syllable: # VCkr > VC.kr else: F = 'f' WORD[i] = v[0] + '.' + v[1:] # T1A # There is a syllable boundary in front of every CV sequence: # VCV > V.CV, CCV > C.CV else: WORD[i] = v[:-1] + '.' + v[-1] A = 'a' WORD = ''.join(WORD) RULE = ' T1' + A + B + C + D + E + F + G if word != WORD else '' return WORD, RULE