train_word_vects = word_vects train_classes = classes train_dataset = [ get_doc_vector(words, vocabulary) for words in train_word_vects ] # 训练贝叶斯模型 cond_probs, cls_probs = clf.train(train_dataset, train_classes) # 测试模型 error = 0 for test_word_vect, test_cls in zip(test_word_vects, test_classes): test_data = get_doc_vector(test_word_vect, vocabulary) pred_cls = clf.classify(test_data, cond_probs, cls_probs) if test_cls != pred_cls: print('Predict: {} -- Actual: {}'.format(pred_cls, test_cls)) error += 1 print('Error Rate: {}'.format(error / len(test_classes))) # 绘制不同类型的概率分布曲线 fig = plt.figure() ax = fig.add_subplot(111) for cls, probs in cond_probs.items(): ax.scatter(np.arange(0, len(probs)), probs * cls_probs[cls], label=cls, alpha=0.3) ax.legend()
test_word_vects.append(word_vects.pop(idx)) test_classes.append(classes.pop(idx)) train_word_vects = word_vects train_classes = classes train_dataset = [get_doc_vector(words, vocabulary) for words in train_word_vects] # 训练贝叶斯模型 cond_probs, cls_probs = clf.train(train_dataset, train_classes) # 测试模型 error = 0 for test_word_vect, test_cls in zip(test_word_vects, test_classes): test_data = get_doc_vector(test_word_vect, vocabulary) pred_cls = clf.classify(test_data, cond_probs, cls_probs) if test_cls != pred_cls: print('Predict: {} -- Actual: {}'.format(pred_cls, test_cls)) error += 1 print('Error Rate: {}'.format(error/len(test_classes))) # 绘制不同类型的概率分布曲线 fig = plt.figure() ax = fig.add_subplot(111) for cls, probs in cond_probs.items(): ax.scatter(np.arange(0, len(probs)), probs*cls_probs[cls], label=cls, alpha=0.3) ax.legend()
train_word_vecs = word_vecs train_labels = labels train_dataset = [ get_doc_vector(words, vocabulary) for words in train_word_vecs ] #训练贝叶斯模型 cond_probs, labels_probs = clf.train(train_dataset, train_labels) #测试模型 error = 0 for test_word_vec, test_label in zip(test_word_vecs, test_labels): test_data = get_doc_vector(test_word_vec, vocabulary) pred_label = clf.classify(test_data, cond_probs, cls_probs) if test_label != pred_label: print("Predict:{} -- Actual:{}").format(pred_label, test_label) error += 1 print("Error Rate:{}".format(error / len(test_labels))) #绘制不同类型的概率分布曲线 fig = plt.figure() ax = fig.add_subplot(111) for label, probs in cond_probs.items(): ax.scatter(np.arange(0, len(probs)), probs * label_probs[label], label=label, alpha=0.3) ax.legend() plt.show()