def fit(self, X, y): X = np.asarray(X) y = np.asarray(y) self.mean = np.mean(y) #y = np.asarray([self.mean]*len(y)) #hypothesis = self.learner().fit(X, y) #self.hypotheses.append(hypothesis) for round in xrange(self.max_rounds): residual = [(yn - yl) for yn, yl in zip(y, self.predict(X))] hypothesis = self.learner().fit(X, residual) self.hypotheses.append(hypothesis) self.local_error.append(hw4.compute_mse(residual, hypothesis.predict(X))) pred_round = self.predict(X) self.train_score = hw4.compute_mse(pred_round, y) self.training_error.append(self.train_score)
def q7(): h_test, h_train = utils.load_and_normalize_housing_set() housingData_test = hw3.pandas_to_data(h_test) housingData_train = hw3.pandas_to_data(h_train) y, X = hw4.split_truth_from_data(housingData_train) y_test, X_test = hw4.split_truth_from_data(housingData_test) #gb = GradientBoostingRegressor(learning_rate=.1, n_estimators=1, max_depth=1) gb = gradb.GradientBoostRegressor(learning_rate=.1, n_estimators=100, max_depth=1, learner=lambda: DecisionTreeRegressor(max_depth=1)) gb.fit(X, y) gb.print_stats() yhat = gb.predict(X_test) print y_test[:10] print yhat[:10] print 'MSE: {}'.format(hw4.compute_mse(y_test, yhat))