s = 0 for X, y in zip(X_data, y_data): if self._fx(X) != y: s += 1 if s / self.N <= self.epsilon: # 分类错误数目占比小于等于epsilon, 停止训练 print('the err ratio is {0}'.format(s / self.N)) break return def predict(self, X): # 预测 return self._fx(X) if __name__ == '__main__': X_data_raw = np.linspace(-50, 50, 100) np.random.shuffle(X_data_raw) y_data = np.sign(X_data_raw) X_data = np.transpose([X_data_raw]) from machine_learning_algorithm.cross_validation import validate g = validate(X_data, y_data) for item in g: X_train, y_train, X_test, y_test = item AB = AdaBoost(epsilon=0.02) AB.fit(X_train, y_train) score = 0 for X, y in zip(X_test, y_test): if AB.predict(X) == y: score += 1 print(score / len(y_test))
self.disp_helper(current_node.right) return if __name__ == '__main__': from sklearn.datasets import load_iris X_data = load_iris().data y_data = load_iris().target from machine_learning_algorithm.cross_validation import validate g = validate(X_data, y_data, ratio=0.2) for item in g: X_data_train, y_data_train, X_data_test, y_data_test = item clf = CART_CLF() clf.fit(X_data_train, y_data_train) score = 0 for X, y in zip(X_data_test, y_data_test): if clf.predict(X) == y: