예제 #1
0
def get_transformation_list(key, im, case=0):
    """
        Return list of transformations inferred from entered key.  The
        map between transform types and keys is given by module
        bogo_config (if exists) or by variable simple_telex_im

        if entered key is not in im, return u"+key", meaning appending
        the entered key to current text
    """
    if key in im:
        lkey = key
    else:
        lkey = key.lower()

    if lkey in im:
        if isinstance(im[lkey], list):
            trans_list = im[lkey]
        else:
            trans_list = [im[lkey]]
        for i, trans in enumerate(trans_list):
            if trans[0] == u'<':
                trans_list[i] = trans[0] + utils.change_case(trans[1], case)
        return trans_list
    else:
        return [u'+' + unicode(key)]
예제 #2
0
def rep_chars(word, offset):
        rep = word[0]
        other_case = utils.change_case(word[0])

        for char in word:
                if char != rep and char != other_case and \
                len(word[offset:]) != 0:
                        return offset
                offset += 1


        return offset - 1
예제 #3
0
def _next(possible, word, prev):

        offset = 0

        vowels = {'A', 'E', 'I', 'O', 'U', 'a', 'e', 'i', 'o', 'u'}


        cur = word[offset]
        temp = utils.change_case(cur)


        if prev == cur or prev == temp:
                #Run to the end of the input
                offset = rep_chars(word, offset)

        while 1:
                cur = word[offset]
                temp = utils.change_case(cur)

                if cur in possible:
                        offset += 1
                        return cur, offset

                if temp in possible:
                        offset += 1
                        return temp, offset

                if cur in vowels:
                        pos_v = vowels.intersection(possible)
                        if pos_v:
                                offset += 1
                                return pos_v.pop(), offset
                if offset < 1:
                        break
                offset -= 1

        char, offset = check_final_run(word, prev, offset)
        return char, offset
예제 #4
0
def add_accent_char(char, accent):
    """
    Add accent to a single char.  Parameter accent is member of class
    Accent
    """
    if char == u'':
        return u'';
    case = char.isupper()
    char = char.lower()
    index = utils.VOWELS.find(char)
    if (index != -1):
        index = index - index % 6 + 5
        char = utils.VOWELS[index - accent]
    return utils.change_case(char, case)
예제 #5
0
def add_mark_char(char, mark):
    """
    Add mark to a single char.
    """
    if char == u'':
        return u''
    case = char.isupper()
    ac = accent.get_accent_char(char)
    char = accent.add_accent_char(char.lower(), Accent.NONE)
    new_char = char
    if mark == Mark.HAT:
        if char in FAMILY_A:
            new_char = u"â"
        elif char in FAMILY_O:
            new_char = u"ô"
        elif char in FAMILY_E:
            new_char = u"ê"
    elif mark == Mark.HORN:
        if char in FAMILY_O:
            new_char = u"ơ"
        elif char in FAMILY_U:
            new_char = u"ư"
    elif mark == Mark.BREVE:
        if char in FAMILY_A:
            new_char = u"ă"
    elif mark == Mark.BAR:
        if char in FAMILY_D:
            new_char = u"đ"
    elif mark == Mark.NONE:
        if char in FAMILY_A:
            new_char = u"a"
        elif char in FAMILY_E:
            new_char = u"e"
        elif char in FAMILY_O:
            new_char = u"o"
        elif char in FAMILY_U:
            new_char = u"u"
        elif char in FAMILY_D:
            new_char = u"d"

    new_char = accent.add_accent_char(new_char, ac)
    return utils.change_case(new_char, case)
예제 #6
0
def gen_error(word):

        new_word = [] 
        for char in word:
                if not char.isalpha():
                        new_word.append(char)
                        continue
                to_do = random.randint(1, 4)

                if to_do == VOWEL and utils.is_vowel(char):
                        new_word.append(random.choice(vowels))
                elif to_do == CAPITALIZATION:
                        new_word.append(utils.change_case(char))
                elif to_do == REPETITION:
                        while 1:
                                new_word.append(char)
                                if random.randint(0, 1):
                                        break
                else:
                        new_word.append(char)
        return ''.join(new_word)
예제 #7
0
def is_valid_combination(components):
    """Check if a character combination complies to Vietnamese spelling.
    
    Input:
        components - a list of the form [u'c', u'a', u'm']
    Output:
        True if OK, False otherwise.
    """
    comps = list(components)
    # We only work with lower case
    for i in range(len(comps)):
        comps[i] = utils.change_case(comps[i], 0)

    # Allow 'đ' to appear in abbreviations like 'đm', 'đc', 'kgcđ', etc.
    #if comps[0] and not comps[1] and not comps[2] and \
    #not comps[0] in ('gi', 'qu'):
    #for c in comps[0]:
    #if not c in CONSONANTS:
    #return False
    #return True
    if comps[0] and not comps[1] and not comps[2]:
        return True

    # Check if our start sound is a proper consonant
    if (comps[0] != u'') and (not (comps[0] in CONSONANTS)):
        return False

    # And if our ending sound is a proper ending consonant
    if (comps[2] != u'') and (not (comps[2] in ENDING_CONSONANTS)):
        return False

    vowel = accent.remove_accent_string(comps[1])
    if len(vowel) > 1:
        if not (vowel in OPEN_COMPOUND_VOWELS or \
            vowel in CLOSED_COMPOUND_VOWELS):
            return False

    if vowel in CLOSED_COMPOUND_VOWELS and \
        not vowel in OPEN_COMPOUND_VOWELS and comps[2] != u'':
        return False

    # 'ăch'?
    if comps[2] == u'ch' and ((vowel in u'ăâeôơuư') or \
        (vowel in OPEN_COMPOUND_VOWELS and not vowel in CLOSED_COMPOUND_VOWELS)):
        return False

    # 'ương' is ok but 'ơng' ?
    if comps[2] == u'ng' and vowel in (u'ơ'):
        return False

    # Sadly, this interferes with 'nhếch' :<
    #if comps[2] == u'c' and vowel in u'ê':
    #    return False

    # Get the first accent
    ac = Accent.NONE
    for i in range(len(comps[1])):
        a = accent.get_accent_char(comps[1][i])
        if a != Accent.NONE:
            ac = a
            break

    # These consonants can only go with ACUTE, DOT or NONE accents
    if comps[2] in [u'c', u'p', u't', u'ch'] and \
        not ac in [Accent.NONE, Accent.ACUTE, Accent.DOT]:
        return False

    return True
예제 #8
0
def is_valid_combination(components):
    """Check if a character combination complies to Vietnamese spelling.
    
    Input:
        components - a list of the form [u'c', u'a', u'm']
    Output:
        True if OK, False otherwise.
    """
    comps = list(components)
    # We only work with lower case
    for i in range(len(comps)):
        comps[i] = utils.change_case(comps[i], 0)
    
    # Allow 'đ' to appear in abbreviations like 'đm', 'đc', 'kgcđ', etc.
    #if comps[0] and not comps[1] and not comps[2] and \
        #not comps[0] in ('gi', 'qu'):
        #for c in comps[0]:
            #if not c in CONSONANTS:
                #return False
        #return True
    if comps[0] and not comps[1] and not comps[2]:
        return True
    
    # Check if our start sound is a proper consonant
    if (comps[0] != u'') and (not (comps[0] in CONSONANTS)):
        return False
    
    # And if our ending sound is a proper ending consonant
    if (comps[2] != u'') and (not (comps[2] in ENDING_CONSONANTS)):
        return False
    
    vowel = accent.remove_accent_string(comps[1])
    if len(vowel) > 1:
        if not (vowel in OPEN_COMPOUND_VOWELS or \
            vowel in CLOSED_COMPOUND_VOWELS):
            return False

    if vowel in CLOSED_COMPOUND_VOWELS and \
        not vowel in OPEN_COMPOUND_VOWELS and comps[2] != u'':
        return False
    
    # 'ăch'?
    if comps[2] == u'ch' and ((vowel in u'ăâeôơuư') or \
        (vowel in OPEN_COMPOUND_VOWELS and not vowel in CLOSED_COMPOUND_VOWELS)):
        return False
    
    # 'ương' is ok but 'ơng' ?
    if comps[2] == u'ng' and vowel in (u'ơ'):
        return False
    
    # Sadly, this interferes with 'nhếch' :<
    #if comps[2] == u'c' and vowel in u'ê':
    #    return False
    
    # Get the first accent
    ac = Accent.NONE
    for i in range(len(comps[1])):
        a = accent.get_accent_char(comps[1][i])
        if a != Accent.NONE:
            ac = a
            break
    
    # These consonants can only go with ACUTE, DOT or NONE accents
    if comps[2] in [u'c', u'p', u't', u'ch'] and \
        not ac in [Accent.NONE, Accent.ACUTE, Accent.DOT]:
        return False
    
    return True