def split_letters(word=u""): """ Returns the graphemes (i.e. the Tamil characters) in a given word as a list """ # ensure that the word is a valid word TamilWord.validate(word) # list (which will be returned to user) letters = [] # a tuple of all combination endings and of all அ combinations combination_endings = TamilLetter.get_combination_endings() a_combinations = TamilLetter.get_combination_column(u"அ").values() # loop through for codepoint in word: # if codepoint is an அ combination, a vowel, aytham or a space, # add it to the list if ( codepoint in a_combinations or TamilLetter.is_whitespace(codepoint) or TamilLetter.is_vowel(codepoint) or TamilLetter.is_aytham(codepoint) ): letters.append(codepoint) # if codepoint is a combination ending or a pulli ('்'), add it # to the end of the previously-added codepoint elif codepoint in combination_endings or codepoint == TamilLetter.get_pulli(): # ensure that at least one character already exists if len(letters) > 0: letters[-1] = letters[-1] + codepoint # otherwise raise an Error. However, validate_word() # should catch this else: raise ValueError( """Unknown error: Combination ending %s cannot be first character of a word""" % (codepoint) ) # if codepoint was neither a vowel, aytham, a pulli or a # combination ending, an unexpected error has occurred else: raise ValueError( """Unknown error: Codepoint \'%s\' in word %s is neither a vowel, consonant, combination or aytham""" % (codepoint, word) ) # TODO: Write extensive test cases for this return letters
def split_letters(word=''): """ Returns the graphemes (i.e. the Tamil characters) in a given word as a list """ # ensure that the word is a valid word TamilWord.validate(word) # list (which will be returned to user) letters = [] # a tuple of all combination endings and of all அ combinations combination_endings = TamilLetter.get_combination_endings() a_combinations = list(TamilLetter.get_combination_column('அ').values()) # loop through for codepoint in word: # if codepoint is an அ combination, a vowel, aytham or a space, # add it to the list if codepoint in a_combinations or \ TamilLetter.is_whitespace(codepoint) or \ TamilLetter.is_vowel(codepoint) or \ TamilLetter.is_aytham(codepoint): letters.append(codepoint) # if codepoint is a combination ending or a pulli ('்'), add it # to the end of the previously-added codepoint elif codepoint in combination_endings or \ codepoint == TamilLetter.get_pulli(): # ensure that at least one character already exists if len(letters) > 0: letters[-1] = letters[-1] + codepoint # otherwise raise an Error. However, validate_word() # should catch this else: raise ValueError("""Unknown error: Combination ending %s cannot be first character of a word""" % (codepoint)) # if codepoint was neither a vowel, aytham, a pulli or a # combination ending, an unexpected error has occurred else: raise ValueError("""Unknown error: Codepoint \'%s\' in word %s is neither a vowel, consonant, combination or aytham""" % (codepoint, word)) # TODO: Write extensive test cases for this return letters
def split_syllables(letters=[]): """ Returns the syllables in a given word as a list """ # Generic algorithm: # Each vowel and combination is its own syllable. Consonants and # aytham get added to the end of the previous syllable # ensure that the word is a valid word TamilWord.validate("".join(letters)) # initialize empty list syllables = [] # loop through letters in the word for letter in letters: # if letter is a vowel or combination, it gets its own syllable if TamilLetter.is_combination(letter) or TamilLetter.is_vowel(letter): syllables.append(letter) # if codepoint is a consonant or aytham, add it to the end # of the previously-added codepoint elif TamilLetter.is_consonant(letter) or TamilLetter.is_aytham(letter): # ensure that at least one character already exists if len(syllables) > 0: syllables[-1] = syllables[-1] + letter # if the first letter is a consonant (probably b/c it' s a # loanword), add it to the beginning of the string else: syllables.append(letter) # if codepoint was neither a vowel, aytham, a pulli or a # combination ending, an unexpected error has occurred else: raise Exception( """Unknown error: \'%s\' in word %s is neither a vowel, consonant, combination or aytham""" % (letter, "".join(letters)) ) return syllables
def split_syllables(letters=[]): """ Returns the syllables in a given word as a list """ # Generic algorithm: # Each vowel and combination is its own syllable. Consonants and # aytham get added to the end of the previous syllable # ensure that the word is a valid word TamilWord.validate(''.join(letters)) # initialize empty list syllables = [] # loop through letters in the word for letter in letters: # if letter is a vowel or combination, it gets its own syllable if TamilLetter.is_combination(letter) or \ TamilLetter.is_vowel(letter): syllables.append(letter) # if codepoint is a consonant or aytham, add it to the end # of the previously-added codepoint elif TamilLetter.is_consonant(letter) or \ TamilLetter.is_aytham(letter): # ensure that at least one character already exists if len(syllables) > 0: syllables[-1] = syllables[-1] + letter # if the first letter is a consonant (probably b/c it' s a # loanword), add it to the beginning of the string else: syllables.append(letter) # if codepoint was neither a vowel, aytham, a pulli or a # combination ending, an unexpected error has occurred else: raise Exception("""Unknown error: \'%s\' in word %s is neither a vowel, consonant, combination or aytham""" % (letter, ''.join(letters)))