def test_AIC_score(self): y_true = [1, 2, 3, 4, 5] y_pred = [1, 3, 2, 5, 4] self.assertAlmostEqual( AIC_score(y_true, y_pred, MockModel(coef_=[1] * 2)), 21.0736, places=3) self.assertAlmostEqual( AIC_score(y_true, y_pred, df=3), AIC_score(y_true, y_pred, MockModel(coef_=[1] * 2)), places=3) self.assertGreater( AIC_score(y_true, y_pred, df=3), AIC_score(y_true, y_pred, df=2))
def display_logistic_regression(df, train_x, valid_x, train_y, valid_y): print('(8) Display logistic regression\n') # fit a logistic regression (set penalty=l2 and C=1e42 to avoid regularization) logit_reg = LogisticRegression(penalty='l2', C=1e42, solver='liblinear') logit_reg.fit(train_x, train_y) print('intercept ', logit_reg.intercept_[0]) print( pd.DataFrame({'coeff': sorted(abs(logit_reg.coef_[0]), reverse=True)}, index=PREDICTORS), '\n') print( 'AIC', AIC_score(valid_y, logit_reg.predict(valid_x), df=len(train_x.columns) + 1)) classificationSummary(train_y, logit_reg.predict(train_x)) classificationSummary(valid_y, logit_reg.predict(valid_x)) prediction_valid = logit_reg.predict(valid_x) prediction_train = logit_reg.predict(train_x) print('precision on test is:', precision_score(valid_y, prediction_valid)) print('recall on test is:', recall_score(valid_y, prediction_valid)) print('f1 on test is:', f1_score(valid_y, prediction_valid)) print('Logistic Regression:Accuracy on train is:', accuracy_score(train_y, prediction_train)) print('Logistic Regression:Accuracy on test is:', accuracy_score(valid_y, prediction_valid), '\n')
def score_model(model, variables): if len(variables) == 0: return AIC_score(y, [y.mean()] * len(y), model, df=1) return AIC_score(y, model.predict(X[variables]), model)
def score_model(model: LinearRegression, variables: list): if len(variables) == 0: return AIC_score(y, [y.mean()] * len(y), model, df=1) return AIC_score(y, model.predict(x[variables]), model)
def score_model(model, variables): if len(variables) == 0: return AIC_score(train_y, [train_y.mean()] * len(train_y), model, df=1) return AIC_score(train_y, model.predict(train_X[variables]), model)
def score_model(model, variables): return AIC_score(train_Y, model.predict(train_X[variables]), model)
axes[1].set_title('validation') axes[2].set_title(' ') axes[2].set_ylim(-30000, 30000) plt.suptitle('Prediction errors') plt.subplots_adjust(bottom=0.15, top=0.85, wspace=0.35) plt.show() # In[27]: #Adjusted R2 BIC and AIC pred_Y = reg.predict(train_X) print('adjusted r2 : ', adjusted_r2_score(train_Y, pred_Y, reg)) print('AIC : ', AIC_score(train_Y, pred_Y, reg)) print('BIC : ', BIC_score(train_Y, pred_Y, reg)) # Use predict() to make predictions on a new set reg_lm_pred = reg.predict(valid_X) result = pd.DataFrame({ 'Predicted': reg_lm_pred, 'Actual': valid_Y, 'Residual': valid_Y - reg_lm_pred }) print(result.head(20)) # Compute common accuracy measures regressionSummary(valid_Y, reg_lm_pred)