示例#1
0
def add_mark(components, mark):
    """
    Case Mark.NONE will be deal with separately by user
    """
    comp = list(components)
    if mark == Mark.BAR and comp[0] and comp[0][-1].lower() in FAMILY_D:
        comp[0] = add_mark_at(comp[0], len(comp[0]) - 1, Mark.BAR)
    else:
        #remove all marks and accents in vowel part
        raw_vowel = accent.add_accent(comp, Accent.NONE)[1].lower()
        raw_vowel = utils.join(
            [add_mark_char(c, Mark.NONE) for c in raw_vowel])
        if mark == Mark.HAT:
            pos = max(raw_vowel.find(u"a"), raw_vowel.find(u"o"),
                      raw_vowel.find(u"e"))
            comp[1] = add_mark_at(comp[1], pos, Mark.HAT)
        elif mark == Mark.BREVE:
            if raw_vowel != u"ua":
                comp[1] = add_mark_at(comp[1], raw_vowel.find(u"a"),
                                      Mark.BREVE)
        elif mark == Mark.HORN:
            if raw_vowel in (u"uo", u"uoi", u"uou"):
                comp[1] = utils.join(
                    [add_mark_char(c, Mark.HORN)
                     for c in comp[1][:2]]) + comp[1][2:]
            elif raw_vowel == u"oa":
                comp[1] = add_mark_at(comp[1], 1, Mark.HORN)
            else:
                pos = max(raw_vowel.find(u"u"), raw_vowel.find(u"o"))
                comp[1] = add_mark_at(comp[1], pos, Mark.HORN)
    return comp
示例#2
0
def add_mark(components, mark):
    """
    Case Mark.NONE will be deal with separately by user
    """
    comp = list(components)
    if mark == Mark.BAR and comp[0] and comp[0][-1].lower() in FAMILY_D:
        comp[0] = add_mark_at(comp[0], len(comp[0])-1, Mark.BAR)
    else:
        #remove all marks and accents in vowel part
        raw_vowel = accent.add_accent(comp, Accent.NONE)[1].lower()
        raw_vowel = utils.join([add_mark_char(c, Mark.NONE) for c in raw_vowel])
        if mark == Mark.HAT:
            pos = max(raw_vowel.find(u"a"), raw_vowel.find(u"o"),
                      raw_vowel.find(u"e"))
            comp[1] = add_mark_at(comp[1], pos, Mark.HAT)
        elif mark == Mark.BREVE:
            if raw_vowel != u"ua":
                comp[1] = add_mark_at(comp[1], raw_vowel.find(u"a"), Mark.BREVE)
        elif mark == Mark.HORN:
            if raw_vowel in (u"uo", u"uoi", u"uou"):
                comp[1] = utils.join([add_mark_char(c, Mark.HORN) for c in comp[1][:2]]) + comp[1][2:]
            elif raw_vowel == u"oa":
                comp[1] = add_mark_at(comp[1], 1, Mark.HORN)
            else:
                pos = max(raw_vowel.find(u"u"), raw_vowel.find(u"o"))
                comp[1] = add_mark_at(comp[1], pos, Mark.HORN)
    return comp
def reverse(components, trans):
    """
    Reverse the effect of transformation 'trans' on 'components'
    If the transformation does not effect the components, return the original string
    Workflow:
    - Find the part of components that is effected by the transformation
    - Transform this part to the original state (remove accent if the trans
    is ADD_ACCENT action, remove mark if the trans is ADD_MARK action)
    """
    action, factor = get_action(trans)
    comps = list(components)
    string = utils.join(comps)

    if action == Action.ADD_CHAR and string[-1] == trans[1]:
        if comps[2]: i = 2
        elif comps[1]: i = 1
        else: i = 0
        comps[i] = comps[i][:-1]
    elif action == Action.ADD_ACCENT:
        comps = accent.add_accent(comps, Accent.NONE)
    elif action == Action.ADD_MARK:
        if factor == Mark.BAR:
            comps[0] = comps[0][:-1] + \
                mark.add_mark_char(comps[0][-1:], Mark.NONE)
        else:
            if mark.is_valid_mark(comps, trans):
                comps[1] = u"".join(
                    [mark.add_mark_char(c, Mark.NONE) for c in comps[1]])
    return comps
def reverse(components, trans):
    """
    Reverse the effect of transformation 'trans' on 'components'
    If the transformation does not effect the components, return the original string
    Workflow:
    - Find the part of components that is effected by the transformation
    - Transform this part to the original state (remove accent if the trans
    is ADD_ACCENT action, remove mark if the trans is ADD_MARK action)
    """
    action, factor = get_action (trans)
    comps = list(components)
    string = utils.join(comps)

    if action == Action.ADD_CHAR and string[-1] == trans[1]:
        if comps[2]: i = 2
        elif comps[1] : i = 1
        else: i = 0
        comps[i] = comps[i][:-1]
    elif action == Action.ADD_ACCENT:
        comps = accent.add_accent(comps, Accent.NONE)
    elif action == Action.ADD_MARK:
        if factor == Mark.BAR:
            comps[0] = comps[0][:-1] + \
                mark.add_mark_char(comps[0][-1:], Mark.NONE)
        else:
            if mark.is_valid_mark(comps, trans):
                comps[1] = u"".join([mark.add_mark_char(c, Mark.NONE)
                                          for c in comps[1]])
    return comps
示例#5
0
def is_valid_mark(comps, mark_trans):
    """
    Check whether the mark given by mark_trans is valid to add to the components
    """
    components = list(comps)
    if components[1] != u"":
        raw_vowel = accent.add_accent(components, Accent.NONE)[1].lower()
        raw_vowel = utils.join([add_mark_char(c, Mark.NONE) for c in raw_vowel])
    if mark_trans[0] == 'd' and components[0] \
            and components[0][-1].lower() in (u"d", u"đ"):
        return True
    elif components[1] != u"" and raw_vowel.find(mark_trans[0]) != -1:
        return True
    else:
        return False
示例#6
0
def is_valid_mark(comps, mark_trans):
    """
    Check whether the mark given by mark_trans is valid to add to the components
    """
    components = list(comps)
    if components[1] != u"":
        raw_vowel = accent.add_accent(components, Accent.NONE)[1].lower()
        raw_vowel = utils.join(
            [add_mark_char(c, Mark.NONE) for c in raw_vowel])
    if mark_trans[0] == 'd' and components[0] \
            and components[0][-1].lower() in (u"d", u"đ"):
        return True
    elif components[1] != u"" and raw_vowel.find(mark_trans[0]) != -1:
        return True
    else:
        return False
def transform(comps, trans):
    """
    Transform the given string with transform type trans
    """

    components = list(comps)

    # Special case for 'ư, ơ'
    #if trans[0] == '<' and not trans[1] in (u'ư', u'ơ', u'Ư', u'Ơ'):
    #        trans = '+' + trans[1]
    # (Not our job)

    if trans[0] == u'<':
        if not components[2]:
            # Undo operation
            if components[1][-1:] == trans[1]:
                return components
            # Only allow ư, ơ or ươ sitting alone in the middle part
            elif not components[1] or \
                (components[1].lower() == u'ư' and trans[1].lower() == u'ơ'):
                components[1] += trans[1]
            # Quite a hack. If you want to type gi[f = 'giờ', separate()
            # will create ['g', 'i', '']. Therefore we have to allow
            # components[1] == 'i'.
            elif components[1].lower() == 'i' and components[0].lower() == 'g':
                components[1] += trans[1]
                components = separate(utils.join(components))

    if trans[0] == u'+':
        # See this and you'll understand:
        #   transform([u'nn', '', ''],'+n') = [u'nnn', '', '']
        #   transform([u'c', '', ''],'+o') = [u'c', 'o', '']
        #   transform([u'c', 'o', ''],'+o') = [u'c', 'oo', '']
        #   transform([u'c', 'o', ''],'+n') = [u'c', 'o', 'n']
        if components[1] == u'':
            if utils.is_vowel(trans[1]):
                components[1] += trans[1]
            else:
                components[0] += trans[1]
        else:
            if components[2] == u'' and utils.is_vowel(trans[1]):
                components[1] += trans[1]
            else:
                components[2] += trans[1]

        # If there is any accent, remove and reapply it
        # because it is likely to be misplaced in previous transformations
        ac = accent.Accent.NONE
        for c in components[1]:
            ac = accent.get_accent_char(c)
            if ac:
                break
        if ac != accent.Accent.NONE:
            # Remove accent
            components = accent.add_accent(components, Accent.NONE)
            components = accent.add_accent(components, ac)
        return components

    action, factor = get_action(trans)
    if action == Action.ADD_ACCENT:
        components = accent.add_accent(components, factor)
    elif action == Action.ADD_MARK:
        if (mark.is_valid_mark(components, trans)):
            components = mark.add_mark(components, factor)
    return components
def transform(comps, trans):
    """
    Transform the given string with transform type trans
    """
    
    components = list(comps)
    
    # Special case for 'ư, ơ'
    #if trans[0] == '<' and not trans[1] in (u'ư', u'ơ', u'Ư', u'Ơ'):
    #        trans = '+' + trans[1]
    # (Not our job)

    if trans[0] == u'<':
        if not components[2]:
            # Undo operation
            if components[1][-1:] == trans[1]:
                return components
            # Only allow ư, ơ or ươ sitting alone in the middle part
            elif not components[1] or \
                (components[1].lower() == u'ư' and trans[1].lower() == u'ơ'):
                components[1] += trans[1]
            # Quite a hack. If you want to type gi[f = 'giờ', separate()
            # will create ['g', 'i', '']. Therefore we have to allow
            # components[1] == 'i'.
            elif components[1].lower() == 'i' and components[0].lower() == 'g':
                components[1] += trans[1]
                components = separate(utils.join(components))

    if trans[0] == u'+':
        # See this and you'll understand:
        #   transform([u'nn', '', ''],'+n') = [u'nnn', '', '']
        #   transform([u'c', '', ''],'+o') = [u'c', 'o', '']
        #   transform([u'c', 'o', ''],'+o') = [u'c', 'oo', '']
        #   transform([u'c', 'o', ''],'+n') = [u'c', 'o', 'n']
        if components[1] == u'':
            if utils.is_vowel(trans[1]):
                components[1] += trans[1]
            else:
                components[0] += trans[1]
        else:
            if components[2] == u'' and utils.is_vowel(trans[1]):
                components[1] += trans[1]
            else:
                components[2] += trans[1]
        
        # If there is any accent, remove and reapply it
        # because it is likely to be misplaced in previous transformations
        ac = accent.Accent.NONE
        for c in components[1]:
            ac = accent.get_accent_char(c)
            if ac:
                break
        if ac != accent.Accent.NONE:
            # Remove accent
            components = accent.add_accent(components, Accent.NONE)
            components = accent.add_accent(components, ac)
        return components
            
    action, factor = get_action (trans)
    if action == Action.ADD_ACCENT:
        components =  accent.add_accent(components, factor)
    elif action == Action.ADD_MARK:
        if (mark.is_valid_mark(components, trans)):
            components = mark.add_mark(components, factor)
    return components