class Sentiment: def __init__(self): self.classifier = Bayes() self.seg = Seg() self.seg.load('seg.pickle') def save(self, fname): self.classifier.save(fname) def load(self, fname): self.classifier = self.classifier.load(fname) def handle(self, doc): words = self.seg.seg(doc) words = self.filter_stop(words) return words def train(self, neg_docs, pos_docs): datas = [] for doc in neg_docs: datas.append([self.handle(doc), 'neg']) for doc in pos_docs: datas.append([self.handle(doc), 'pos']) self.classifier.train(datas) def classify(self, doc): ret, prob = self.classifier.classify(self.handle(doc)) if ret == 'pos': return prob else: return 1 - prob @staticmethod def filter_stop(words): return list(filter(lambda x: x not in stop_words, words))
def get_seg(self, fname='seg.pickle'): seg = Seg() seg.load(fname) return seg
yield tmp tmp = '' else: # middle tmp += c if tmp: yield tmp def save(): segger.save() def load(): segger.load() if __name__ == '__main__': # train(fname='/Users/LeonTao/PycharmProjects/snownlp/snownlp/seg/data.txt') # # segger.save() segger.load() sentence = '中华民族的伟大复兴' words = list(seg(sentence)) print('/'.join(words))