def apply_T1(word): '''There is a syllable boundary in front of every CV-sequence.''' # split consonants and vowels: 'balloon' -> ['b', 'a', 'll', 'oo', 'n'] WORD = [w for w in re.split('([ieAyOauo]+)', word) if w] count = 0 for i, v in enumerate(WORD): if i == 0 and is_consonant(v[0]): continue elif is_consonant(v[0]) and i + 1 != len(WORD): if is_cluster(v): # WSP if count % 2 == 0: WORD[i] = v[0] + '.' + v[1:] # CC > C.C, CCC > C.CC else: WORD[i] = '.' + v # CC > .CC, CCC > .CCC # elif is_sonorant(v[0]) and is_cluster(v[1:]): # NEW # if count % 2 == 0: # WORD[i] = v[0:2] + '.' + v[2:] # else: # WORD[i] = v[0] + '.' + v[1:] else: WORD[i] = v[:-1] + '.' + v[-1] # CC > C.C, CCC > CC.C count += 1 WORD = ''.join(WORD) RULE = ' T1' if word != WORD else '' return WORD, RULE
def apply_T1(word, T1E=True): # split consonants and vowels: 'balloon' -> ['b', 'a', 'll', 'oo', 'n'] WORD = [w for w in re.split('([ieAyOauo]+)', word) if w] # these are to keep track of which sub-rules are applying A, B, C, D, E, F, G = '', '', '', '', '', '', '' # a count divisible by 2 indicates an even syllable count = 1 for i, v in enumerate(WORD): # T1B # If there is a consonant cluster word-initially, the entire cluster # forms the onset of the first syllable: # CCV > #CCV if i == 0 and is_consonant(v[0]): B = 'b' elif is_consonant(v[0]): count += 1 # True if the current syllable is unstressed, else False unstressed = count % 2 == 0 # T1C # If there is a consonant cluster word-finally, the entire cluster # forms the coda of the final syllable: # VCC# > VCC# if i + 1 == len(WORD): C = 'c' # T1D # If there is a bare "Finnish" consonant cluster word-medially and # the previous syllable receives stress, the first consonant of the # cluster forms the coda of the previous syllable (to create a # heavy syllable); otherwise, the whole cluster forms the onset of # the current syllable (thisis the /kr/ rule): # 'VCCV > 'VC.CV, VCCV > V.CCV elif is_cluster(v): D = 'd' WORD[i] = v[0] + '.' + v[1:] if unstressed else '.' + v elif is_cluster(v[1:]): # T1E (optional) # If there is a word-medial "Finnish" consonant cluster that is # preceded by a sonorant consonant, if the previous syllable # receives stress, the sonorant consonant and the first # consonant of the cluster form the coda of the previous # syllable, and the remainder of the cluster forms the onset of # the current syllable: # 'VlCC > VlC.C if T1E and is_sonorant(v[0]) and unstressed: E = 'e' WORD[i] = v[:2] + '.' + v[2:] # T1F # If there is a word-medial "Finnish" cluster that follows a # consonant, that first consonant forms the coda of the # previous syllable, and the cluster forms the onset of the # current syllable: # VCkr > VC.kr else: F = 'f' WORD[i] = v[0] + '.' + v[1:] # T1A # There is a syllable boundary in front of every CV sequence: # VCV > V.CV, CCV > C.CV else: WORD[i] = v[:-1] + '.' + v[-1] A = 'a' WORD = ''.join(WORD) RULE = ' T1' + A + B + C + D + E + F + G if word != WORD else '' return WORD, RULE