def cut(self,content): '''正向快速匹配''' content = content.encode('utf8', 'ingore') content = strQB.strQ2B(content.decode('utf8')) wordlist = list(jieba.cut(content.decode("utf8"))) #去停用词 words = list(set(wordlist) - set(self._stop_word_list) - set([' ',' '])) return words
def cuta(self,content): '''全词精确匹配''' content = content.encode('utf8','ingore') content = strQB.strQ2B(content.decode('utf8')) #全角转半角 wordlist = list(jieba.cut(content.decode("utf8"),cut_all=True)) # 去停用词 words = list(set(wordlist) - set(self._stop_word_list)) return words