示例#1
0
def apply_T1(word):
    '''There is a syllable boundary in front of every CV-sequence.'''
    T1 = ' T1'
    WORD = _split_consonants_and_vowels(word)
    CONTINUE_VV = 0
    CONTINUE_VVV = 0

    for i, v in enumerate(WORD):

        if i == 0 and is_consonant(v[0][0]):
            continue

        elif is_consonant(v[0]) and i + 1 != len(WORD):
            WORD[i] = v[:-1] + '.' + v[-1]

        elif is_vowel(v[0]):

            if len(v) > 2:
                CONTINUE_VVV += 1

            elif len(v) > 1:
                CONTINUE_VV += 1

    word = ''.join(WORD)

    return word, CONTINUE_VV, CONTINUE_VVV, T1
示例#2
0
def apply_T4(word):
    '''An agglutination diphthong that ends in /u, y/ usually contains a
    syllable boundary when -C# or -CCV follow, e.g., [lau.ka.us],
    [va.ka.ut.taa].'''
    T4 = ''
    WORD = word.split('.')

    for i, v in enumerate(WORD):

        # i % 2 != 0 prevents this rule from applying to first, third, etc.
        # syllables, which receive stress (WSP)
        if is_consonant(v[-1]) and i % 2 != 0:

            if i + 1 == len(WORD) or is_consonant(WORD[i + 1][0]):

                if contains_Vu_diphthong(v):
                    I = v.rfind('u')
                    WORD[i] = v[:I] + '.' + v[I:]
                    T4 = ' T4'

                elif contains_Vy_diphthong(v):
                    I = v.rfind('y')
                    WORD[i] = v[:I] + '.' + v[I:]
                    T4 = ' T4'

    word = '.'.join(WORD)

    return word, T4
示例#3
0
文件: v01.py 项目: tsnaomi/finnsyll
def apply_T4(word):  # OPTIMIZE
    '''An agglutination diphthong that ends in /u, y/ usually contains a
    syllable boundary when -C# or -CCV follow, e.g., [lau.ka.us],
    [va.ka.ut.taa].'''
    WORD = _split_consonants_and_vowels(word)

    for k, v in WORD.iteritems():

        if len(v) == 2 and v.endswith(('u', 'y')):

            if WORD.get(k + 2, 0):

                if not WORD.get(k + 3, 0):
                    if len(WORD[k + 2]) == 1 and is_consonant(WORD[k + 2]):
                        WORD[k] = v[0] + '.' + v[1]

                elif len(WORD[k + 1]) == 1 and WORD.get(k + 3, 0):
                    if is_consonant(WORD[k + 3][0]):
                        WORD[k] = v[0] + '.' + v[1]

                elif len(WORD[k + 2]) == 2:
                    WORD[k] = v[0] + '.' + v[1]

    word = _compile_dict_into_word(WORD)

    return word
示例#4
0
def apply_T4(word):  # TODO: toggle variation
    '''An agglutination diphthong that ends in /u, y/ optionally contains a
    syllable boundary when -C# or -CCV follow, e.g., [lau.ka.us],
    [va.ka.ut.taa].'''
    WORD = word.split('.')
    PARTS = [[] for part in range(len(WORD))]

    for i, v in enumerate(WORD):

        # i % 2 != 0 prevents this rule from applying to first, third, etc.
        # syllables, which receive stress (WSP)
        if is_consonant(v[-1]) and i % 2 != 0:
            if i + 1 == len(WORD) or is_consonant(WORD[i + 1][0]):
                vv = u_y_final_diphthongs(v)

                if vv:
                    I = vv.start(1) + 1
                    PARTS[i].append(v[:I] + '.' + v[I:])

        # include original form (non-application of rule)
        PARTS[i].append(v)

    WORDS = [w for w in product(*PARTS)]

    for WORD in WORDS:
        WORD = '.'.join(WORD)
        RULE = ' T4' if word != WORD else ''

        yield WORD, RULE
示例#5
0
def apply_T1(word):
    '''There is a syllable boundary in front of every CV-sequence.'''
    # split consonants and vowels: 'balloon' -> ['b', 'a', 'll', 'oo', 'n']
    WORD = [w for w in re.split('([ieAyOauo]+)', word) if w]
    count = 0

    for i, v in enumerate(WORD):

        if i == 0 and is_consonant(v[0]):
            continue

        elif is_consonant(v[0]) and i + 1 != len(WORD):
            if is_cluster(v):  # WSP
                if count % 2 == 0:
                    WORD[i] = v[0] + '.' + v[1:]  # CC > C.C, CCC > C.CC

                else:
                    WORD[i] = '.' + v  # CC > .CC, CCC > .CCC

            # elif is_sonorant(v[0]) and is_cluster(v[1:]):  # NEW
            #     if count % 2 == 0:
            #         WORD[i] = v[0:2] + '.' + v[2:]

            #     else:
            #         WORD[i] = v[0] + '.' + v[1:]

            else:
                WORD[i] = v[:-1] + '.' + v[-1]  # CC > C.C, CCC > CC.C

            count += 1

    WORD = ''.join(WORD)
    RULE = ' T1' if word != WORD else ''

    return WORD, RULE
示例#6
0
def apply_T1(word):
    '''There is a syllable boundary in front of every CV-sequence.'''
    # split consonants and vowels: 'balloon' -> ['b', 'a', 'll', 'oo', 'n']
    WORD = [w for w in re.split('([ieAyOauo]+)', word) if w]

    for i, v in enumerate(WORD):

        if i == 0 and is_consonant(v[0]):
            continue

        elif is_consonant(v[0]) and i + 1 != len(WORD):
            WORD[i] = v[:-1] + '.' + v[-1]

    WORD = ''.join(WORD)
    RULE = ' T1' if word != WORD else ''

    return WORD, RULE
示例#7
0
def apply_T4(word):
    '''An agglutination diphthong that ends in /u, y/ usually contains a
    syllable boundary when -C# or -CCV follow, e.g., [lau.ka.us],
    [va.ka.ut.taa].'''
    WORD = word.split('.')

    for i, v in enumerate(WORD):

        # i % 2 != 0 prevents this rule from applying to first, third, etc.
        # syllables, which receive stress (WSP)
        if is_consonant(v[-1]) and i % 2 != 0:
            if i + 1 == len(WORD) or is_consonant(WORD[i + 1][0]):
                vv = u_y_final_diphthongs(v)

                if vv and not is_long(vv.group(1)):
                    I = vv.start(1) + 1
                    WORD[i] = v[:I] + '.' + v[I:]

    WORD = '.'.join(WORD)
    RULE = ' T4' if word != WORD else ''

    return WORD, RULE
示例#8
0
文件: v01.py 项目: tsnaomi/finnsyll
def apply_T1(word):
    '''There is a syllable boundary in front of every CV-sequence.'''
    WORD = _split_consonants_and_vowels(word)

    for k, v in WORD.iteritems():

        if k == 1 and is_consonantal_onset(v):
            WORD[k] = '.' + v

        elif is_consonant(v[0]) and WORD.get(k + 1, 0):
            WORD[k] = v[:-1] + '.' + v[-1]

    word = _compile_dict_into_word(WORD)

    return word
示例#9
0
def apply_T5(word):  # BROKEN
    '''If a (V)VVV-sequence contains a VV-sequence that could be an /i/-final
    diphthong, there is a syllable boundary between it and the third vowel,
    e.g., [raa.ois.sa], [huo.uim.me], [la.eis.sa], [sel.vi.äi.si], [tai.an],
    [säi.e], [oi.om.me].'''
    T5 = ''
    WORD = word.split('.')

    for i, v in enumerate(WORD):
        if contains_VVV(v) and any(i for i in i_DIPHTHONGS if i in v):
            I = v.rfind('i') - 1 or 2
            I = I + 2 if is_consonant(v[I - 1]) else I
            WORD[i] = v[:I] + '.' + v[I:]
            T5 = ' T5'

    word = '.'.join(WORD)

    return word, T5
示例#10
0
文件: v07.py 项目: tsnaomi/finnsyll
def apply_T1(word, T1E=True):
    # split consonants and vowels: 'balloon' -> ['b', 'a', 'll', 'oo', 'n']
    WORD = [w for w in re.split('([ieAyOauo]+)', word) if w]

    # these are to keep track of which sub-rules are applying
    A, B, C, D, E, F, G = '', '', '', '', '', '', ''

    # a count divisible by 2 indicates an even syllable
    count = 1

    for i, v in enumerate(WORD):

        # T1B
        # If there is a consonant cluster word-initially, the entire cluster
        # forms the onset of the first syllable:
        # CCV > #CCV
        if i == 0 and is_consonant(v[0]):
            B = 'b'

        elif is_consonant(v[0]):
            count += 1

            # True if the current syllable is unstressed, else False
            unstressed = count % 2 == 0

            # T1C
            # If there is a consonant cluster word-finally, the entire cluster
            # forms the coda of the final syllable:
            # VCC# > VCC#
            if i + 1 == len(WORD):
                C = 'c'

            # T1D
            # If there is a bare "Finnish" consonant cluster word-medially and
            # the previous syllable receives stress, the first consonant of the
            # cluster forms the coda of the previous syllable (to create a
            # heavy syllable); otherwise, the whole cluster forms the onset of
            # the current syllable (thisis the /kr/ rule):
            # 'VCCV > 'VC.CV,  VCCV > V.CCV
            elif is_cluster(v):
                D = 'd'
                WORD[i] = v[0] + '.' + v[1:] if unstressed else '.' + v

            elif is_cluster(v[1:]):

                # T1E (optional)
                # If there is a word-medial "Finnish" consonant cluster that is
                # preceded by a sonorant consonant, if the previous syllable
                # receives stress, the sonorant consonant and the first
                # consonant of the cluster form the coda of the previous
                # syllable, and the remainder of the cluster forms the onset of
                # the current syllable:
                # 'VlCC > VlC.C
                if T1E and is_sonorant(v[0]) and unstressed:
                    E = 'e'
                    WORD[i] = v[:2] + '.' + v[2:]

                # T1F
                # If there is a word-medial "Finnish" cluster that follows a
                # consonant, that first consonant forms the coda of the
                # previous syllable, and the cluster forms the onset of the
                # current syllable:
                # VCkr > VC.kr
                else:
                    F = 'f'
                    WORD[i] = v[0] + '.' + v[1:]

            # T1A
            # There is a syllable boundary in front of every CV sequence:
            # VCV > V.CV, CCV > C.CV
            else:
                WORD[i] = v[:-1] + '.' + v[-1]
                A = 'a'

    WORD = ''.join(WORD)
    RULE = ' T1' + A + B + C + D + E + F + G if word != WORD else ''

    return WORD, RULE