s = 0
            for X, y in zip(X_data, y_data):
                if self._fx(X) != y:
                    s += 1
            if s / self.N <= self.epsilon:  # 分类错误数目占比小于等于epsilon, 停止训练
                print('the err ratio is {0}'.format(s / self.N))
                break
        return

    def predict(self, X):
        # 预测
        return self._fx(X)


if __name__ == '__main__':
    X_data_raw = np.linspace(-50, 50, 100)
    np.random.shuffle(X_data_raw)
    y_data = np.sign(X_data_raw)
    X_data = np.transpose([X_data_raw])
    from machine_learning_algorithm.cross_validation import validate

    g = validate(X_data, y_data)
    for item in g:
        X_train, y_train, X_test, y_test = item
        AB = AdaBoost(epsilon=0.02)
        AB.fit(X_train, y_train)
        score = 0
        for X, y in zip(X_test, y_test):
            if AB.predict(X) == y:
                score += 1
        print(score / len(y_test))
示例#2
0
        self.disp_helper(current_node.right)

        return


if __name__ == '__main__':

    from sklearn.datasets import load_iris

    X_data = load_iris().data

    y_data = load_iris().target

    from machine_learning_algorithm.cross_validation import validate

    g = validate(X_data, y_data, ratio=0.2)

    for item in g:

        X_data_train, y_data_train, X_data_test, y_data_test = item

        clf = CART_CLF()

        clf.fit(X_data_train, y_data_train)

        score = 0

        for X, y in zip(X_data_test, y_data_test):

            if clf.predict(X) == y: