示例#1
0
文件: Chcut.py 项目: terasum/chBayes
 def cut(self,content):
     '''正向快速匹配'''
     content = content.encode('utf8', 'ingore')
     content = strQB.strQ2B(content.decode('utf8'))
     wordlist = list(jieba.cut(content.decode("utf8")))
     #去停用词
     words = list(set(wordlist) - set(self._stop_word_list) - set([' ',' ']))
     return words
示例#2
0
文件: Chcut.py 项目: terasum/chBayes
 def cuta(self,content):
     '''全词精确匹配'''
     content = content.encode('utf8','ingore')
     content = strQB.strQ2B(content.decode('utf8'))
     #全角转半角
     wordlist =  list(jieba.cut(content.decode("utf8"),cut_all=True))
     # 去停用词
     words = list(set(wordlist) - set(self._stop_word_list))
     return words