def _tree_prune(self, tree, X, y): if not isinstance(tree, TreeNode): return Lb = tree.L Rb = tree.R self._tree_prune(Lb, X, y) self._tree_prune(Rb, X, y) if isinstance(Lb, TreeNode): pred_no_prone = self.predict(X) perf_no_prone = mean_error(y, pred_no_prone) tree.L = Lb.Default pred_with_prone = self.predict(X) perf_with_prone = mean_error(y, pred_with_prone) if perf_with_prone < perf_no_prone: improve = perf_no_prone - perf_with_prone logger.info('tree prune, mean error descent %f' % (improve)) else: tree.L = Lb if isinstance(Rb, TreeNode): pred_no_prone = self.predict(X) perf_no_prone = mean_error(y, pred_no_prone) tree.R = Rb.Default pred_with_prone = self.predict(X) perf_with_prone = mean_error(y, pred_with_prone) if perf_with_prone < perf_no_prone: improve = perf_no_prone - perf_with_prone logger.info('tree prune, mean error descent %f' % (improve)) else: tree.R = Rb
else: is_valid = False nFeat = X.shape[1] if nFeat == self._nFeat: is_valid = True return is_valid def predict(self, X): models = self._parameter['trees'] pred = np.zeros(X.shape[0]) for model in models: pred += np.array(model.predict(X)) return pred if __name__ == '__main__': path = os.getcwd() + '/../dataset/winequality-white.csv' loader = DataLoader(path) dataset = loader.load(target_col_name='quality') trainset, testset = dataset.cross_split() gbdt = GradientBoostingDecisionTree(10) gbdt.fit(trainset[0], trainset[1]) predict = gbdt.predict(testset[0]) print 'GBDT mean error:', mean_error(testset[1], predict) dt = DecisionTreeRegressor() dt.fit(trainset[0], trainset[1]) predict = dt.predict(testset[0]) print 'DecisionTree mean error:', mean_error(testset[1], predict)
if nFeat == self._nFeat: is_valid = True return is_valid def feval(self, parameter, X, y): y = np.reshape(y, (y.shape[0], 1)) param_list = unroll_parameter(parameter, self._parameter_shape) W, b = param_list[0], param_list[1] nSize = X.shape[0] h = np.dot(X, W) + np.repeat(np.reshape(b, (1, b.shape[0])), X.shape[0], axis=0) loss = self._lossor.calculate(y, h) residual = h - y grad_W = 1. / nSize * np.dot(X.T, residual) grad_b = 1. / nSize * np.sum(residual) grad_parameter = roll_parameter([grad_W, grad_b]) return loss, grad_parameter if __name__ == '__main__': path = os.getcwd() + '/../dataset/winequality-white.csv' loader = DataLoader(path) dataset = loader.load(target_col_name='quality') trainset, testset = dataset.cross_split() linear = LinearRegressor(solve_type='numeric', normalize=True, max_iter=2000, batch_size=50, learning_rate=1e-3, is_plot_loss=True) linear.fit(trainset[0], trainset[1]) prediction = linear.predict(testset[0]) performance = mean_error(testset[1], prediction) print performance