示例#1
0
 def cut(self, sentence):
     sentence = utils.toUnicode(sentence)
     result = ''
     for i, s in enumerate(self._split(sentence)):
         if ord(s[0]) < MyanmarTokenizer._MYANMAR_CODES_START \
                 or ord(s[0]) > MyanmarTokenizer._MYANMAR_CODES_END:
             if i != 0: result += self.separator
             result += s + self.separator
             continue
         categorys = self.code2Category(s)
         result += self._syllableSegmentation(categorys, s)[1]
     return result
 def cut(self, sentence):
     sentence = utils.toUnicode(sentence)
     result = ''
     for i, s in enumerate(self._split(sentence)):
         if ord(s[0]) < MyanmarTokenizer._MYANMAR_CODES_START \
                 or ord(s[0]) > MyanmarTokenizer._MYANMAR_CODES_END:
             if i != 0: result += self.separator
             result += s + self.separator
             continue
         categorys = self.code2Category(s)
         result += self._syllableSegmentation(categorys, s)[1]
     return result
示例#3
0
 def code2Category(self, sentence):
     sentence = utils.toUnicode(sentence)
     return ''.join([
         self.codeCategory[c]
         if c in MyanmarTokenizer._MYANMAR_CODES else '?' for c in sentence
     ])
 def code2Category(self, sentence):
     sentence = utils.toUnicode(sentence)
     return ''.join([self.codeCategory[c] if c in MyanmarTokenizer._MYANMAR_CODES else '?' for c in sentence])