Пример #1
0
 def test_train(self):
     print("Test Train:")
     data_mat, labels_arr = adb.load_simple_data()
     classifies_arr, est_agg = adb.train(data_mat, labels_arr, 9)
     self.assertEqual(len(classifies_arr), 3)
     self.assertEqual(0, classifies_arr[-1]['dim'])
     self.assertEqual('lt', classifies_arr[-1]['ineq'])
     self.assertEqual(0.9, classifies_arr[-1]['thresh'])
     self.assertEqual(0.8958797346, round(classifies_arr[-1]['alpha'], 10))
Пример #2
0
 def test_build_stump(self):
     print("Test Build Stump:")
     data_mat, labels_arr = adb.load_simple_data()
     w_data = np.mat(np.ones((5, 1)) / 5)
     best_stump, min_error, best_estimation = adb.build_stump(
         data_mat, labels_arr, w_data)
     self.assertEqual(best_stump, {'dim': 0, 'ineq': 'lt', 'thresh': 1.3})
     self.assertEqual(min_error, np.matrix([[0.2]]))
     res = np.array([[-1.], [1.], [-1.], [-1.], [1.]])
     self.assertEqual(True, (best_estimation == res).all())
Пример #3
0
    def test_classify(self):
        print("Test Classify:")
        print("Classify Simple Data:")
        data_mat, labels_arr = adb.load_simple_data()
        classifies_arr, est_agg = adb.train(data_mat, labels_arr, 30)
        pred = adb.classify([[5, 5], [0, 0]], classifies_arr)
        res = np.matrix([[1.], [-1.]])
        self.assertEqual(True, (pred == res).all())

        print("Classify Loaded Data:")
        datArr, labelArr = adb.load_data_set('horseColicTraining2.txt')
        classiferArray, aggClassEst = adb.train(datArr, labelArr, 10)
        testArr, testLabelArr = adb.load_data_set('horseColicTest2.txt')
        prediction10 = adb.classify(testArr, classiferArray)
        errArr = np.mat(np.ones((67, 1)))
        err_rate = errArr[prediction10 != np.mat(testLabelArr).T].sum() / 67
        self.assertEqual(16.0 / 67, err_rate)
        print("Test Error: %f%%" % (err_rate * 100))
        # 绘制ROC和计算AUC
        val_auc = adb.plot_roc(aggClassEst, labelArr)
        self.assertLessEqual(0.8582969635, round(val_auc, 10))
        D = np.multiply(D, np.exp(expon))
        D = D / np.sum(D)
        agg_class_est += alpha * predict_value
        print("agg_class_est: ", agg_class_est.T)
        # np.sign(agg_class_est) != np.mat(class_labels).T 将错误的置位1 对的置位0 便于统计错误的个数
        agg_errors = np.multiply(
            np.sign(agg_class_est) != np.mat(class_labels).T, np.ones((m, 1)))
        error_rate = agg_errors.sum() / m
        print('total error:', error_rate)
        if error_rate == 0:
            break
    return weak_class_arr


if __name__ == '__main__':
    # 加载简单的数据
    dat_arr, labels_arr = adaboost.load_simple_data()

    # show_plot(dat_arr, labels_arr)
    m = np.shape(dat_arr)[0]
    D = np.mat(np.ones((m, 1)) / m)

    # 最佳单层决策树
    # best_stump, error, predict_value, = build_stump(dat_arr, labels_arr, D)
    # print(best_stump, error)
    # print(predict_value)

    weak_class_arr = adaboost_train_ds(dat_arr, labels_arr)
    print('********' * 10)
    print(weak_class_arr)
Пример #5
0
        agg_error = np.multiply(np.sign(agg_class_est) != np.mat(class_labels).T, np.ones((m, 1)))
        error_rate = agg_error.sum()/m
<<<<<<< HEAD
        print "total error: ", error_rate, "\n"
=======
        # print "total error: ", error_rate, "\n"
>>>>>>> 68d745d9f4a5c0b75e4d74d7d47153b544aab2b9
        if error_rate == 0:
            break
    return weak_class_arr


<<<<<<< HEAD
if __name__ == '__main__':
    import adaboost
    data_mat, class_labels = adaboost.load_simple_data()
    a = adaboost.adaboost_train(data_mat, class_labels, 9)
    print a
=======
def ada_classify(datto_class, classifier_arr):
    data_mat = np.matrix(datto_class)
    m = data_mat.shape[0]
    agg_class_est = np.zeros((m, 1))
    for i in range(len(classifier_arr)):
        class_est = stump_classify(data_mat, classifier_arr[i]["dim"], classifier_arr[i]["threshold"],
                                        classifier_arr[i]["inequality"])
        agg_class_est += classifier_arr[i]["alpha"] * class_est
        # print agg_class_est
    # print np.sign(agg_class_est)
    return np.sign(agg_class_est)