def __init__(self, n_estimator=10, learning_rate=0.01, min_sample=2, min_gain=0.1, max_depth=10): super(GBDTRegressionScratch, self).__init__(n_estimator, learning_rate) # 回归树损失函数维平方损失 self._loss = SquareLoss() for _ in range(self._n_estimator): tree = CARTRegressionScratch(min_sample, min_gain, max_depth) self._trees.append(tree)
class GBDTRegressionScratch(GBDTScratch): def __init__(self, n_estimator=10, learning_rate=0.01, min_sample=2, min_gain=0.1, max_depth=10): super(GBDTRegressionScratch, self).__init__(n_estimator, learning_rate) # 回归树损失函数维平方损失 self._loss = SquareLoss() for _ in range(self._n_estimator): tree = CARTRegressionScratch(min_sample, min_gain, max_depth) self._trees.append(tree) def fit(self, X, y): """模型训练""" n_sample = y.shape[0] residual_pred = np.zeros(n_sample) for i in range(self._n_estimator): residual_gradient = self._loss.calc_gradient(y, residual_pred) # 每棵树以残差为目标进行训练 self._trees[i].fit(X, residual_gradient) residual_update = np.zeros(n_sample) for j in range(n_sample): residual_update[j] = self._trees[i].predict(X[j]) residual_pred -= self._lr * residual_update def predict(self, x): """给定输入样本,预测输出""" y_pred = 0 for tree in self._trees: residual_update = tree.predict(x) y_pred -= self._lr * residual_update return y_pred