def cut(self, sentence): sentence = utils.toUnicode(sentence) result = '' for i, s in enumerate(self._split(sentence)): if ord(s[0]) < MyanmarTokenizer._MYANMAR_CODES_START \ or ord(s[0]) > MyanmarTokenizer._MYANMAR_CODES_END: if i != 0: result += self.separator result += s + self.separator continue categorys = self.code2Category(s) result += self._syllableSegmentation(categorys, s)[1] return result
def code2Category(self, sentence): sentence = utils.toUnicode(sentence) return ''.join([ self.codeCategory[c] if c in MyanmarTokenizer._MYANMAR_CODES else '?' for c in sentence ])
def code2Category(self, sentence): sentence = utils.toUnicode(sentence) return ''.join([self.codeCategory[c] if c in MyanmarTokenizer._MYANMAR_CODES else '?' for c in sentence])