示例#1
0
 def _tree_prune(self, tree, X, y):
     if not isinstance(tree, TreeNode):
         return
     Lb = tree.L
     Rb = tree.R
     self._tree_prune(Lb, X, y)
     self._tree_prune(Rb, X, y)
     if isinstance(Lb, TreeNode):
         pred_no_prone = self.predict(X)
         perf_no_prone = mean_error(y, pred_no_prone)
         tree.L = Lb.Default
         pred_with_prone = self.predict(X)
         perf_with_prone = mean_error(y, pred_with_prone)
         if perf_with_prone < perf_no_prone:
             improve = perf_no_prone - perf_with_prone
             logger.info('tree prune, mean error descent %f' % (improve))
         else:
             tree.L = Lb
     if isinstance(Rb, TreeNode):
         pred_no_prone = self.predict(X)
         perf_no_prone = mean_error(y, pred_no_prone)
         tree.R = Rb.Default
         pred_with_prone = self.predict(X)
         perf_with_prone = mean_error(y, pred_with_prone)
         if perf_with_prone < perf_no_prone:
             improve = perf_no_prone - perf_with_prone
             logger.info('tree prune, mean error descent %f' % (improve))
         else:
             tree.R = Rb
示例#2
0
        else:
            is_valid = False
            nFeat = X.shape[1]
            if nFeat == self._nFeat:
                is_valid = True
            return is_valid

    def predict(self, X):
        models = self._parameter['trees']
        pred = np.zeros(X.shape[0])
        for model in models:
            pred += np.array(model.predict(X))
        return pred


if __name__ == '__main__':
    path = os.getcwd() + '/../dataset/winequality-white.csv'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='quality')
    trainset, testset = dataset.cross_split()
    gbdt = GradientBoostingDecisionTree(10)
    gbdt.fit(trainset[0], trainset[1])
    predict = gbdt.predict(testset[0])
    print 'GBDT mean error:', mean_error(testset[1], predict)

    dt = DecisionTreeRegressor()
    dt.fit(trainset[0], trainset[1])
    predict = dt.predict(testset[0])
    print 'DecisionTree mean error:', mean_error(testset[1], predict)

示例#3
0
            if nFeat == self._nFeat:
                is_valid = True
            return is_valid

    def feval(self, parameter, X, y):
        y = np.reshape(y, (y.shape[0], 1))
        param_list = unroll_parameter(parameter, self._parameter_shape)
        W, b = param_list[0], param_list[1]
        nSize = X.shape[0]
        h = np.dot(X, W) + np.repeat(np.reshape(b, (1, b.shape[0])), X.shape[0], axis=0)
        loss = self._lossor.calculate(y, h)
        residual = h - y
        grad_W = 1. / nSize * np.dot(X.T, residual)
        grad_b = 1. / nSize * np.sum(residual)
        grad_parameter = roll_parameter([grad_W, grad_b])
        return loss, grad_parameter


if __name__ == '__main__':
    path = os.getcwd() + '/../dataset/winequality-white.csv'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='quality')
    trainset, testset = dataset.cross_split()
    linear = LinearRegressor(solve_type='numeric', normalize=True, max_iter=2000, batch_size=50,
                             learning_rate=1e-3,
                             is_plot_loss=True)
    linear.fit(trainset[0], trainset[1])
    prediction = linear.predict(testset[0])
    performance = mean_error(testset[1], prediction)
    print performance