class Chatbot_port2(object): def __init__(self): # 分词工具,基于jieba分词,并去除停用词 seg = Seg() self.ss = SentenceSimilarity(seg) self.ss.restore_model() with open("dataset/answer.txt", 'r', encoding='utf-8') as file_answer: self.line = file_answer.readlines() def chat(self, question): question = question.strip() top_10 = self.ss.similarity(question) answer_index = top_10[0][0] answer = self.line[answer_index] return answer, top_10[0][1]
for sen in raw_test_sentences: test_sentences.append(sen.strip()) for sen in test_sentences: print(sen) # 分词工具,基于jieba分词,并去除停用词 seg = Seg() # 训练模型 ss = SentenceSimilarity(seg) if train: ss.set_sentences(train_sentences) ss.TfidfModel() # tfidf模型 ss.save_model() else: ss.restore_model() # 测试集 right_count = 0 print(os.getcwd()) file_result = open('dataset/test_output.txt', 'w',encoding='utf-8') with open("dataset/answer.txt", 'r', encoding='utf-8') as file_answer: line = file_answer.readlines() for i in range(0, len(test_sentences)): top_10 = ss.similarity(test_sentences[i]) answer_index = top_10[0][0] answer = line[answer_index] file_result.write(test_sentences[i]+'\t')