clf = ensemble.GradientBoostingRegressor(**params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) # print("MSE: %.4f" % mse) ############################################################################### # Plot training deviance # compute test set deviance test_score = np.zeros((params['n_estimators'],), dtype=np.float64) for i, y_pred in enumerate(clf.staged_decision_function(X_test)): test_score[i] = clf.loss_(y_test, y_pred) plt.figure(figsize=(12, 6)) plt.subplot(1, 2, 1) plt.title('Deviance') plt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-', label='Training Set Deviance') plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-', label='Test Set Deviance') plt.legend(loc='upper right') plt.xlabel('Boosting Iterations') plt.ylabel('Deviance') ############################################################################### # Plot feature importance feature_importance = clf.feature_importances_ # make importances relative to max importance feature_importance = 100.0 * (feature_importance / feature_importance.max())
bottom_h = bottom + height + 0.03 def price_format(x, pos): # the two args are the value and tick position return ('$%s,000' % (int(x/1000))) if x else '' c_dicts = [coord_to_cut, coord_to_color_label, coord_to_clar] def c_format(c_dict): def c(x): try: return c_dict[x] except KeyError: return '' return c plt.figure(1, figsize = (4,8)) for i, label in enumerate(['Carat', 'Cut', 'Color', 'Clarity']): left_start = left + i*(width + 0.03) rect_main = [left_start, bottom, width, height] rect_hist = [left_start, bottom_h, width, 0.2] axScatter = plt.axes(rect_main) axHist = plt.axes(rect_hist) axScatter.scatter(data[i], price, s = area, c = color, alpha = 0.3) axScatter.scatter(data[i], price, s = clar_area, c = color, alpha = 0.5) axScatter.set_ylim((0, 20000)) axScatter.set_xlabel(label) axScatter.minorticks_on()
clf = ensemble.GradientBoostingRegressor(**params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) # print("MSE: %.4f" % mse) ############################################################################### # Plot training deviance # compute test set deviance test_score = np.zeros((params['n_estimators'], ), dtype=np.float64) for i, y_pred in enumerate(clf.staged_decision_function(X_test)): test_score[i] = clf.loss_(y_test, y_pred) plt.figure(figsize=(12, 6)) plt.subplot(1, 2, 1) plt.title('Deviance') plt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-', label='Training Set Deviance') plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-', label='Test Set Deviance') plt.legend(loc='upper right') plt.xlabel('Boosting Iterations') plt.ylabel('Deviance') ###############################################################################
c_dicts = [coord_to_cut, coord_to_color_label, coord_to_clar] def c_format(c_dict): def c(x): try: return c_dict[x] except KeyError: return '' return c plt.figure(1, figsize=(4, 8)) for i, label in enumerate(['Carat', 'Cut', 'Color', 'Clarity']): left_start = left + i * (width + 0.03) rect_main = [left_start, bottom, width, height] rect_hist = [left_start, bottom_h, width, 0.2] axScatter = plt.axes(rect_main) axHist = plt.axes(rect_hist) axScatter.scatter(data[i], price, s=area, c=color, alpha=0.3) axScatter.scatter(data[i], price, s=clar_area, c=color, alpha=0.5) axScatter.set_ylim((0, 20000)) axScatter.set_xlabel(label) axScatter.minorticks_on()