示例#1
0
 def test_uyir_mei_split(self):
     ak = utf8.splitMeiUyir(u"ஃ")
     self.assertEqual(ak, u"ஃ")
     il = utf8.splitMeiUyir(u"ல்")
     self.assertEqual(il, u"ல்")
     il, ee = utf8.splitMeiUyir(u"லி")
     self.assertEqual((il, ee), (u"ல்", u"இ"))
示例#2
0
 def test_uyir_mei_split(self):
     ak = utf8.splitMeiUyir(u"ஃ")
     self.assertEqual(ak,u"ஃ")
     il = utf8.splitMeiUyir(u"ல்")
     self.assertEqual(il,u"ல்")
     il,ee = utf8.splitMeiUyir(u"லி")
     self.assertEqual((il,ee),(u"ல்",u"இ"))
示例#3
0
def map_to_braille(tamil_string):
    result = []
    for letter in get_letters(tamil_string):
        if letter in grantha_mei_letters:
            pos = grantha_mei_letters.index(letter)
            agaram = grantha_agaram_letters[pos]
            result.append(table[agaram])
            result.append(table[pulli_symbols[0]])
        elif letter in uyir_letters or letter == ayudha_letter:
            result.append(table[letter])
        else:
            lMei, lUyir = splitMeiUyir(letter)
            pos = grantha_mei_letters.index(lMei)
            agaram = grantha_agaram_letters[pos]
            result.append(table[agaram])
            if lUyir != "அ":
                result.append(table[lUyir])
    return result
示例#4
0
def joinWords(word_a, word_b):
    word_a = word_a.strip()
    word_b = word_b.strip()
    # get readable letters of first word
    first_word_letters = get_letters(word_a)

    if first_word_letters[-1] in mei_letters:
        # first word last char is mei letter. so just return as it is.
        # todo : apply special conditions also
        rval = word_a + " " + word_b
        return rval
    # end of if first_word_last_chars[-1] in mei_letters:

    # get mei & uyir characters of first word's last char
    first_word_last_chars = splitMeiUyir(first_word_letters[-1])
    if len(first_word_last_chars) == 2:
        first_word_last_mei_char, first_word_last_uyir_char = first_word_last_chars
    else:
        first_word_last_mei_char, first_word_last_uyir_char = (
            first_word_last_chars[0],
            first_word_last_chars[0],
        )

    # get rule sub dictionary from all dictionary by passing
    rule = all_rules.get(first_word_last_uyir_char, None)

    if word_a == word_b:
        # both input words are same
        same_word_rule = rule.get("same_words", [])
        if word_a in same_word_rule[0]:
            # get conjuction char
            jn = same_word_rule[1]
            # insert conjuction char between input words
            rval = first_word_letters[0] + jn + word_b
            return rval
        elif len(first_word_letters) == 3:
            # both words are same but length is 3.
            disappear_lastchar = rule.get("same_word_disappear_lastchar", [])
            if disappear_lastchar:
                disappear_lastchar = disappear_lastchar[0]
                if first_word_last_uyir_char == disappear_lastchar:
                    first_word_first_char = first_word_letters[0]
                    # get uyir char of second word's first char
                    first_word_first_uyir_char = splitMeiUyir(
                        first_word_first_char)[-1]
                    # get conjuction char by joining first word's last mei char and second word's first uyir char
                    jn = joinMeiUyir(first_word_last_mei_char,
                                     first_word_first_uyir_char)
                    # get first word till pre-last char
                    first_word = u"".join(first_word_letters[:-1])
                    # get second word from second char till end
                    second_word = u"".join(first_word_letters[1:])
                    # join all first, conjuction, second word
                    rval = first_word + jn + second_word
                    return rval
            # end of if disappear_lastchar:
        # end of if word_a in same_word_rule[0]:
    # end of if word_a == word_b:

    if rule:
        if word_a in rule.get("first_solo_words", []):
            # todo : need to find tune this first solo word check like using startswith, endswith, etc
            rval = word_a + " " + word_b
            return rval
        # end of if word_a in rule.get('first_solo_words', []):

        for diff_jn in rule.get("diff_jn_words", []):
            if word_a in diff_jn[0]:
                for last in diff_jn[1]:
                    if word_b.startswith(last):
                        # apply different conjuction char rule
                        rval = word_a + diff_jn[2] + word_b
                        return rval
    # end of for diff_jn in  rule.get('diff_jn_words', []):

    # get readable letters of second word
    second_word_letters = get_letters(word_b)
    # get second word's from second char to till end
    second_word_after_first_char = u"".join(second_word_letters[1:])
    # get mei & uyir characters of second word's first char
    second_word_first_chars = splitMeiUyir(second_word_letters[0])
    if len(second_word_first_chars) == 2:
        (
            second_word_first_mei_char,
            second_word_first_uyir_char,
        ) = second_word_first_chars
    else:
        second_word_first_mei_char, second_word_first_uyir_char = (
            second_word_first_chars[0],
            second_word_first_chars[0],
        )

    if rule:
        if second_word_first_mei_char in rule.get("secondword_first_chars",
                                                  []):
            # apply major conjuction rule
            return word_a + second_word_first_mei_char + " " + word_b
            # end of if second_word_first_mei_char in rule.get('secondword_first_chars', []):

        firstword_double_special_secondword = rule.get(
            "firstword_double_special_secondword", None)
        if firstword_double_special_secondword:
            if len(first_word_letters) == 4:
                # check either first word has repeated two times
                if (first_word_letters[:2] == first_word_letters[2:]
                    ):  # first word repeat two times within it
                    # get root second word by removing prefix
                    sec_word = (second_word_first_uyir_char +
                                second_word_after_first_char)
                    if sec_word in firstword_double_special_secondword[0]:
                        # get conjuction char by joining  special conjuction and  second root word
                        jn = joinMeiUyir(
                            firstword_double_special_secondword[1],
                            second_word_first_uyir_char,
                        )
                        # join all
                        return word_a + jn + second_word_after_first_char
        # end of if firstword_double_special_secondword:

        special_secondword_first_chars = rule.get(
            "special_secondword_first_chars", None)
        if special_secondword_first_chars:
            if second_word_first_uyir_char in special_secondword_first_chars[
                    0]:
                # get special conjuction char
                jn = special_secondword_first_chars[1]
                # join special conjuction char with second word's first uyir char
                second_word_first_schar = joinMeiUyir(
                    jn, second_word_first_uyir_char)
                # complete second word with prefix of conjuction
                second_word = second_word_first_schar + second_word_after_first_char
                # join all
                return word_a + second_word
            # end of if second_word_first_uyir_char in special_secondword_first_chars[0]:
        # end of if special_secondword_first_chars:

    # if all above rules not applicable, then just return as it is !
    return word_a + " " + word_b
def joinWords(word_a, word_b):
    word_a = word_a.strip()
    word_b = word_b.strip()
    # get readable letters of first word
    first_word_letters = get_letters(word_a)

    if first_word_letters[-1] in mei_letters:
        # first word last char is mei letter. so just return as it is.
        # todo : apply special conditions also
        rval = word_a + ' ' + word_b
        return rval
    # end of if first_word_last_chars[-1] in mei_letters:

    # get mei & uyir characters of first word's last char
    first_word_last_chars = splitMeiUyir(first_word_letters[-1])
    if len(first_word_last_chars) == 2:
        first_word_last_mei_char, first_word_last_uyir_char = first_word_last_chars
    else:
        first_word_last_mei_char, first_word_last_uyir_char = first_word_last_chars[0], first_word_last_chars[0]

    # get rule sub dictionary from all dictionary by passing
    rule = all_rules[first_word_last_uyir_char]

    if word_a == word_b:
        # both input words are same
        same_word_rule = rule.get('same_words', [])
        if word_a in same_word_rule[0]:
            # get conjuction char
            jn = same_word_rule[1]
            # insert conjuction char between input words
            rval = first_word_letters[0] + jn + word_b
            return rval
        elif len(first_word_letters) == 3:
            # both words are same but length is 3.
            disappear_lastchar = rule.get('same_word_disappear_lastchar', [])
            if disappear_lastchar:
                disappear_lastchar = disappear_lastchar[0]
                if first_word_last_uyir_char == disappear_lastchar:
                    first_word_first_char = first_word_letters[0]
                    # get uyir char of second word's first char
                    first_word_first_uyir_char = splitMeiUyir(first_word_first_char)[-1]
                    # get conjuction char by joining first word's last mei char and second word's first uyir char
                    jn = joinMeiUyir(first_word_last_mei_char, first_word_first_uyir_char)
                    # get first word till pre-last char
                    first_word = u''.join(first_word_letters[:-1])
                    # get second word from second char till end
                    second_word = u''.join(first_word_letters[1:])
                    # join all first, conjuction, second word
                    rval = first_word + jn + second_word
                    return rval
            # end of if disappear_lastchar:
        # end of if word_a in same_word_rule[0]:
    # end of if word_a == word_b:

    if word_a in rule.get('first_solo_words', []):
        # todo : need to find tune this first solo word check like using startswith, endswith, etc
        rval = word_a + ' ' + word_b
        return rval
    # end of if word_a in rule.get('first_solo_words', []):

    for diff_jn in rule.get('diff_jn_words', []):
        if word_a in diff_jn[0]:
            for last in diff_jn[1]:
                if word_b.startswith(last):
                    # apply different conjuction char rule
                    rval = word_a + diff_jn[2] + word_b
                    return rval
    # end of for diff_jn in  rule.get('diff_jn_words', []):

    # get readable letters of second word
    second_word_letters = get_letters(word_b)
    # get second word's from second char to till end
    second_word_after_first_char = u''.join(second_word_letters[1:])
    # get mei & uyir characters of second word's first char
    second_word_first_chars = splitMeiUyir(second_word_letters[0])
    if len(second_word_first_chars) == 2:
        second_word_first_mei_char, second_word_first_uyir_char = second_word_first_chars
    else:
        second_word_first_mei_char, second_word_first_uyir_char = second_word_first_chars[0], second_word_first_chars[0]

    if second_word_first_mei_char in rule.get('secondword_first_chars', []):
        # apply major conjuction rule
        return word_a + second_word_first_mei_char + ' ' + word_b
    # end of if second_word_first_mei_char in rule.get('secondword_first_chars', []):

    firstword_double_special_secondword = rule.get('firstword_double_special_secondword', [])
    if firstword_double_special_secondword:
        if len(first_word_letters) == 4:
            # check either first word has repeated two times
            if first_word_letters[:2] == first_word_letters[2:]:  # first word repeat two times within it
                # get root second word by removing prefix
                sec_word = second_word_first_uyir_char + second_word_after_first_char
                if sec_word in firstword_double_special_secondword[0]:
                    # get conjuction char by joining  special conjuction and  second root word
                    jn = joinMeiUyir(firstword_double_special_secondword[1], second_word_first_uyir_char)
                    # join all
                    return word_a + jn + second_word_after_first_char
    # end of if firstword_double_special_secondword:

    special_secondword_first_chars = rule.get('special_secondword_first_chars', [])
    if special_secondword_first_chars:
        if second_word_first_uyir_char in special_secondword_first_chars[0]:
            # get special conjuction char
            jn = special_secondword_first_chars[1]
            # join special conjuction char with second word's first uyir char
            second_word_first_schar = joinMeiUyir(jn, second_word_first_uyir_char)
            # complete second word with prefix of conjuction
            second_word = second_word_first_schar + second_word_after_first_char
            # join all
            return word_a + second_word
        # end of if second_word_first_uyir_char in special_secondword_first_chars[0]:
    # end of if special_secondword_first_chars:

    # if all above rules not applicable, then just return as it is !
    return word_a + ' ' + word_b
示例#6
0
文件: itrans.py 项目: nv-d/open-tamil
    table = OrderedDict()#{}

def _options(_ref,_sym):
    _v = []
    for _k,_v in _ref:
        if _k == _sym: break
    return _v

for ta_map in [_uyir,_mei,_aytham]:
    for obj in ta_map:
        ta,en=obj[0],obj[1]
        if not isinstance(en,(list,tuple)):
            en = list(en)
        for e in en:
            Transliteration.table[e] = ta

# mix of consonants and compound - uyirmei - letters
for vc in uyirmei_letters:
    c,v = splitMeiUyir(vc)
    for vo in _options(_uyir,v):
        for co in _options(_mei,c):
            if not Transliteration.table.get(co+vo,None):
                Transliteration.table[co+vo] = vc
            elif False:#elif not vc in Transliteration.table.values():
                #print("clobbered ",co+vo,Transliteration.table[co+vo],vc)
                Transliteration.table[co+vo]=vc

#from pprint import pprint
#pprint(Transliteration.table)
#print(len(Transliteration.table))