punctuation = (",", "。", "!", "?", "、", ";", ":") space = ("\n", "\t", " ") # 标号:引号(“ ” ‘ ’)、括号〔( ) [ ] { } ,,── ,、,······,、,,、,,《,》,〈,〉,、,·,、,—,____ tmp = "" sentence = [] for c in text: if c is space: continue if c in punctuation: sentence.append((c, tmp)) tmp = "" else: tmp += c result = [] for item in sentence: trie_cut = trie.cut(item[1]) markov_cut = markov.cut(item[1]) jieba_cut = list(jieba.cut(item[1])) print("----------------------------") print("trie:", trie_cut) print("markov:", markov_cut) print("jieba:", jieba_cut) print("----------------------------") # some thing need to improve # emit matrix key error