coursera.output('overfitting.txt', 'overfitting') looses = {} def plot_score(test_predictions, y_test, train_predictions, y_train, color, learning_rate): test_loss = [log_loss(y_test, pred) for pred in test_predictions] train_loss = [log_loss(y_train, pred) for pred in train_predictions] plt.plot(test_loss, color, linewidth=2) plt.plot(train_loss, color+'--', linewidth=2) looses[learning_rate] = test_loss plt.figure() colors = ['r', 'g', 'b', 'c', 'm'] learn_rates = [1, 0.5, 0.3, 0.2, 0.1] for index, learning_rate in enumerate(learn_rates): clf.learning_rate = learning_rate clf.fit(X_train, y_train) test_predictions = clf.staged_predict_proba(X_test) train_predictions = clf.staged_predict_proba(X_train) plot_score(test_predictions, y_test, train_predictions, y_train, color=colors[index], learning_rate=learning_rate) legends = [["Test {}".format(learn_rate), "Train {}".format(learn_rate)] for learn_rate in learn_rates] legends = [item for sublist in legends for item in sublist] plt.legend(legends) plt.savefig("coursera_out/gradient_boosting.png") min_loss_on_iteration = np.argmin(looses[0.2]) min_loss = looses[0.2][min_loss_on_iteration] print("on iteration {} was loose {}".format(min_loss_on_iteration, min_loss)) coursera.output("min_loose_on_0.2.txt", "{:.2f} {}".format(min_loss, min_loss_on_iteration))