def cluster_fact(cluster_list, phenome):
     current_cluster = cluster_list.pop()
     #print(current_cluster)  # debug
     #print(phenome)  # debug
     '''Consonants must be grouped together into clusters '''
     if current_cluster.type() == Consonant and type(
             phenome) == Consonant or current_cluster.type() == None:
         # Adjacent phenomes of type consonant belong to the same cluster, if the
         # current cluster.last_phenome == None that means it's empty
         # update current cluster
         ## AC 2017-08-12: provided it's not NG (should not be clustered)
         if NG in current_cluster.get_phoneme_string():
             # create new cluster add phenome to it and return
             cluster_list.append(current_cluster)
             cluster_list.append(Cluster(phenome))
         else:
             current_cluster.add_phenome(phenome)
             # append to cluster list
             cluster_list.append(current_cluster)
         # return cluster list
         return cluster_list
     else:
         # create new cluster add phenome to it and return
         cluster_list.append(current_cluster)
         cluster_list.append(Cluster(phenome))
         return cluster_list
def onset_rules(cluster):
    ''' checks if the cluster is a valid onset or whther it needs to be split'''

    phonemes = map(str, cluster.get_phoneme())
    #print('onset rules')  # debug
    phonemelist = list(
        phonemes
    )  # AC 2017-09-05: grabbed list of phonemes to move away from Py3 map problem, and strip trailing spaces
    list_of_phonemes = []
    for phone in phonemelist:
        list_of_phonemes.append(phone.rstrip())
    #print(list_of_phonemes)  # debug
    coda_cluster = Cluster()

    def _split_and_update(phoneme,
                          phonemes=list_of_phonemes,
                          coda_cluster=coda_cluster):
        #_get index of phoneme
        index = phonemes.index(phoneme)
        # split on phoneme and add tail coda cluster
        tail = cluster.phoneme_list[:index]
        # remaining phonemes
        #head = cluster.phoneme_list[index+1:]
        head = cluster.phoneme_list[index:]
        #extend list
        coda_cluster.phoneme_list.extend(tail)
        #update cluster
        cluster.phoneme_list = head
        #update string list
        #phonemes = phonemes[index+1:]
        phonemes = phonemes[index:]
        return (phonemes, coda_cluster)

    def _remove_and_update(phonemes=list_of_phonemes,
                           coda_cluster=coda_cluster):
        head = cluster.phoneme_list[0]
        rest = cluster.phoneme_list[1:]
        #extend list
        coda_cluster.phoneme_list.extend([head])
        #update cluster
        cluster.phoneme_list = rest
        #update string list
        phonemes = phonemes[1:]
        return (phonemes, coda_cluster)

    # rule 1 - /NG/ cannot exist in a valid onset
    # does /NG? exist? split on NG add NG to cod
    # AC tests: ringing
    #if NG in ' '.join(phonemes):
    if NG in list_of_phonemes:
        #phonemes, coda_cluster = _split_and_update(NG)
        #print("onset rule 1")
        list_of_phonemes, coda_cluster = _remove_and_update(
            'NG'
        )  # AC 2017-08-12: corrected to remove_and_update; 2017-09-05: added speech marks to phoneme

    # rule 2a - no affricates in complex onsets
    # /CH/ exist? split on affricate
    # AC tests:
    #if CH in ' '.join(phonemes):
    if CH in list_of_phonemes:
        #print("onset rule 2a")
        list_of_phonemes, coda_cluster = _split_and_update('CH')

    # rule 2b - no affricates in complex onsets
    # /JH/ exist? split on affricate
    # AC tests:
    # if JH in ' '.join(phonemes):
    if JH in list_of_phonemes:
        #print("onset rule 2b")
        list_of_phonemes, coda_cluster = _split_and_update('JH')

    # rule 3 - first consonant in a complex onset must be obstruent
    # if first consonant stop or fricative or nasal
    # AC tests:
    #if len(list(phonemes)) > 1 and not phonemes[0] in [B,D,G,K,P,T,DH,F,S,SH,TH,V,ZH,M,N]:
    if len(list_of_phonemes) > 1 and not list_of_phonemes[0] in [
            B, D, G, K, P, T, DH, F, S, SH, TH, V, ZH, M, N
    ]:
        #print("onset rule 3")
        list_of_phonemes, coda_cluster = _remove_and_update()

    # rule 4 - second consonant in a complex onset must be a voiced obstruent
    # if not OBSTRUENT and VOICED? split on second consonant
    # AC tests: describe (added check for 0=S), attract & playground (added 1=R), amused & therapeutic (added 1=Y)
    #if len(list(phonemes)) > 1 and not phonemes[1] in [B,M,V,D,N,Z,ZH]:
    if len(list_of_phonemes) > 1 and not list_of_phonemes[
            0] == S and not list_of_phonemes[1] in [
                B, M, V, D, N, Z, ZH, R, Y
            ]:
        #print("onset rule 4")
        list_of_phonemes, coda_cluster = _remove_and_update()

    # rule 5 - if first consonant in a complex onset is not /s/
    # the second consonant must be liquid or glide /L/ /R/ /W/ /Y/
    # AC tests:
    #if len(list(phonemes)) > 1 and not phonemes[0] == S and not phonemes[1] in [L,R,W,Y]:
    if len(list_of_phonemes) > 1 and not list_of_phonemes[
            0] == S and not list_of_phonemes[1] in [
                L, R, W, Y
            ] and len(list_of_phonemes) < 3:
        #print("onset rule 5")
        list_of_phonemes, coda_cluster = _remove_and_update()

    # rule 6 - deal with N|DR, ND|L, T|BR clusters
    # AC tests: endless, undress, heartbreak, grandmother, toothbrush, handbag, handling
    if len(list_of_phonemes) > 2 and list_of_phonemes[0] in [
            'N', 'T', 'TH'
    ] and list_of_phonemes[1] in ['D', 'B']:
        #print("onset rule 6")
        if list_of_phonemes[0] in ['R', 'T'] and list_of_phonemes[1] in [
                'B'
        ] and list_of_phonemes[2] in ['R']:  # heartbreak
            list_of_phonemes, coda_cluster = _split_and_update(
                list_of_phonemes[0])
        elif list_of_phonemes[0] in ['TH']:  # toothbrush
            list_of_phonemes, coda_cluster = _split_and_update(
                list_of_phonemes[1])
        elif list_of_phonemes[0] in ['N'] or list_of_phonemes[2] in ['L', 'M']:
            if list_of_phonemes[1] in ['D'] and list_of_phonemes[2] in [
                    'R'
            ]:  # undress
                list_of_phonemes, coda_cluster = _split_and_update(
                    list_of_phonemes[1])
            else:  # endless, handbag
                list_of_phonemes, coda_cluster = _split_and_update(
                    list_of_phonemes[2])

    if coda_cluster.get_phoneme() == []:
        coda_cluster = None

    if cluster.get_phoneme() == []:
        cluster = None

    return (coda_cluster, cluster)
def factory(phoneme):
    # argument is a string of phonemes e.g.'B IH0 K AH0 Z'
    phoneme_list = phoneme.split()

    #print(phoneme_list)  # debug

    def phoneme_fact(phoneme):
        # match against regular expression
        phoneme_feature = re.match(phoneme_classify, phoneme).groupdict()
        #print(phoneme_feature)  # debug

        #input is phoneme feature dictionary
        if phoneme_feature['Consonant']:
            # return consonant object
            return Consonant(**phoneme_feature)
        elif phoneme_feature['Vowel']:
            # return vowel object
            return Vowel(**phoneme_feature)
        else:
            # unknown phoneme class
            raise Exception(
                'unkown Phoneme Class: cannot create appropriate Phoneme object'
            )

    def cluster_fact(cluster_list, phenome):
        current_cluster = cluster_list.pop()
        #print(current_cluster)  # debug
        #print(phenome)  # debug
        '''Consonants must be grouped together into clusters '''
        if current_cluster.type() == Consonant and type(
                phenome) == Consonant or current_cluster.type() == None:
            # Adjacent phenomes of type consonant belong to the same cluster, if the
            # current cluster.last_phenome == None that means it's empty
            # update current cluster
            ## AC 2017-08-12: provided it's not NG (should not be clustered)
            if NG in current_cluster.get_phoneme_string():
                # create new cluster add phenome to it and return
                cluster_list.append(current_cluster)
                cluster_list.append(Cluster(phenome))
            else:
                current_cluster.add_phenome(phenome)
                # append to cluster list
                cluster_list.append(current_cluster)
            # return cluster list
            return cluster_list
        else:
            # create new cluster add phenome to it and return
            cluster_list.append(current_cluster)
            cluster_list.append(Cluster(phenome))
            return cluster_list

    def syllable_fact(syllable_list, cluster):
        syllable = syllable_list.pop()
        #print(syllable)  # debug
        push = syllable_list.append

        if syllable.onset_is_empty() and syllable.nucleus_is_empty(
        ) and cluster.type() == Consonant:
            # cluster is assigned to the onset of the current syllable
            syllable.set_onset(cluster)
            push(syllable)
            return syllable_list

        if cluster.type() == Vowel:
            if syllable.has_nucleus():
                # this cluster becomes the nucleus of a new syllable
                # push current syllable back on the syllable stack
                push(syllable)
                # create new syllable
                new_syllable = Syllable(nucleus=cluster)
                # push new_syllable onto the stack
                push(new_syllable)
                return syllable_list
            else:
                # syllable does not have nucleus so this cluster becomes the
                # nucleus on the current syllable
                syllable.set_nucleus(cluster)
                push(syllable)
                return syllable_list

        if syllable.has_nucleus() and cluster.type() == Consonant:
            if syllable.has_coda():
                # this cluster is the onset of the next syllable
                new_syllable = Syllable(onset=cluster)
                # push syllable onto stack
                push(new_syllable)
                return syllable_list
            elif syllable.coda_is_empty():
                # Onset Maximalism dictates we push consonant clusters to
                # the onset of the next syllable, unless the nuclear cluster is LIGHT and
                # has primary stress
                maximal_coda, maximal_onset = onset_rules(cluster)

                ## AC 2017-09-15: removed ambisyllabicity as a theoretical stance
                #if syllable.is_short() and syllable.get_stress() == '1' and not maximal_coda:
                # The syllable is LIGHT and the consonat cluster is therefore ambisyllabic
                # it belongs to both the current syllable and the next
                # coda is empty
                #    light_coda = coda_rules(maximal_onset)
                #    syllable.set_coda(light_coda)
                #    push(syllable)
                #    new_syllable = Syllable(onset = maximal_onset)
                #    push(new_syllable)
                #    return syllable_list
                #else:
                # add cluster only to the next syllable
                if maximal_coda:
                    syllable.set_coda(maximal_coda)
                    push(syllable)
                else:
                    push(syllable)
                if maximal_onset:
                    new_syllable = Syllable(onset=maximal_onset)
                else:
                    new_syllable = Syllable()
                push(new_syllable)
                return syllable_list

    def check_last_syllable(syllable_list):
        # the syllable algorithm may assign a consonant cluster to a syllable that does not have
        # a nucleus, this is not allowed in the English language.

        # check the last syllable
        syllable = syllable_list.pop()
        #print('last syll:')  # debug
        #print(syllable)  # debug
        push = syllable_list.append

        if syllable.nucleus_is_empty():
            if syllable.has_onset():
                # pop the previous syllable
                prev_syllable = syllable_list.pop()
                onset = syllable.get_onset()
                # set the coda of the previous syllable to the value of the orphan onset
                if prev_syllable.has_coda():
                    #add phoneme
                    coda_cluster = prev_syllable.get_coda()
                    if coda_cluster != onset:
                        for phoneme in onset.phoneme_list:
                            coda_cluster.add_phoneme(phoneme)
                            push(prev_syllable)
                        # for phoneme in phonemes: coda_cluster.add_phoneme(phoneme)
                    else:
                        push(prev_syllable)
                else:
                    prev_syllable.set_coda(onset)
                    push(prev_syllable)
        else:
            # There is no violation, push syllable back on the stack
            push(syllable)

        return syllable_list

    # Create a list of phoneme clusters from phoneme list
    cluster_list = functools.reduce(cluster_fact,
                                    map(phoneme_fact, phoneme_list),
                                    [Cluster()])

    # Apply syllable creation rules to list of phoneme clusters
    syllable_list = functools.reduce(syllable_fact, cluster_list, [Syllable()])

    # Validate last syllable, and return completed syllable list
    return check_last_syllable(syllable_list)