示例#1
0
    train_word_vects = word_vects
    train_classes = classes

    train_dataset = [
        get_doc_vector(words, vocabulary) for words in train_word_vects
    ]

    # 训练贝叶斯模型
    cond_probs, cls_probs = clf.train(train_dataset, train_classes)

    # 测试模型
    error = 0
    for test_word_vect, test_cls in zip(test_word_vects, test_classes):
        test_data = get_doc_vector(test_word_vect, vocabulary)
        pred_cls = clf.classify(test_data, cond_probs, cls_probs)
        if test_cls != pred_cls:
            print('Predict: {} -- Actual: {}'.format(pred_cls, test_cls))
            error += 1

    print('Error Rate: {}'.format(error / len(test_classes)))

    # 绘制不同类型的概率分布曲线
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for cls, probs in cond_probs.items():
        ax.scatter(np.arange(0, len(probs)),
                   probs * cls_probs[cls],
                   label=cls,
                   alpha=0.3)
        ax.legend()
示例#2
0
文件: sms.py 项目: MoherX/MLBox
        test_word_vects.append(word_vects.pop(idx))
        test_classes.append(classes.pop(idx))

    train_word_vects = word_vects
    train_classes = classes

    train_dataset = [get_doc_vector(words, vocabulary) for words in train_word_vects]

    # 训练贝叶斯模型
    cond_probs, cls_probs = clf.train(train_dataset, train_classes)

    # 测试模型
    error = 0
    for test_word_vect, test_cls in zip(test_word_vects, test_classes):
        test_data = get_doc_vector(test_word_vect, vocabulary)
        pred_cls = clf.classify(test_data, cond_probs, cls_probs)
        if test_cls != pred_cls:
            print('Predict: {} -- Actual: {}'.format(pred_cls, test_cls))
            error += 1

    print('Error Rate: {}'.format(error/len(test_classes)))

    # 绘制不同类型的概率分布曲线
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for cls, probs in cond_probs.items():
        ax.scatter(np.arange(0, len(probs)),
                   probs*cls_probs[cls],
                   label=cls,
                   alpha=0.3)
        ax.legend()
示例#3
0
    train_word_vecs = word_vecs
    train_labels = labels

    train_dataset = [
        get_doc_vector(words, vocabulary) for words in train_word_vecs
    ]

    #训练贝叶斯模型
    cond_probs, labels_probs = clf.train(train_dataset, train_labels)

    #测试模型
    error = 0
    for test_word_vec, test_label in zip(test_word_vecs, test_labels):
        test_data = get_doc_vector(test_word_vec, vocabulary)
        pred_label = clf.classify(test_data, cond_probs, cls_probs)
        if test_label != pred_label:
            print("Predict:{} -- Actual:{}").format(pred_label, test_label)
            error += 1
    print("Error Rate:{}".format(error / len(test_labels)))

    #绘制不同类型的概率分布曲线
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for label, probs in cond_probs.items():
        ax.scatter(np.arange(0, len(probs)),
                   probs * label_probs[label],
                   label=label,
                   alpha=0.3)
        ax.legend()
    plt.show()