Пример #1
0
def display_logistic_regression(df, train_x, valid_x, train_y, valid_y):
    print('(8) Display logistic regression\n')
    # fit a logistic regression (set penalty=l2 and C=1e42 to avoid regularization)
    logit_reg = LogisticRegression(penalty='l2', C=1e42, solver='liblinear')
    logit_reg.fit(train_x, train_y)

    print('intercept ', logit_reg.intercept_[0])
    print(
        pd.DataFrame({'coeff': sorted(abs(logit_reg.coef_[0]), reverse=True)},
                     index=PREDICTORS), '\n')
    print(
        'AIC',
        AIC_score(valid_y,
                  logit_reg.predict(valid_x),
                  df=len(train_x.columns) + 1))

    classificationSummary(train_y, logit_reg.predict(train_x))
    classificationSummary(valid_y, logit_reg.predict(valid_x))

    prediction_valid = logit_reg.predict(valid_x)
    prediction_train = logit_reg.predict(train_x)

    print('precision on test is:', precision_score(valid_y, prediction_valid))
    print('recall on test is:', recall_score(valid_y, prediction_valid))
    print('f1 on test is:', f1_score(valid_y, prediction_valid))
    print('Logistic Regression:Accuracy on train is:',
          accuracy_score(train_y, prediction_train))
    print('Logistic Regression:Accuracy on test is:',
          accuracy_score(valid_y, prediction_valid), '\n')
Пример #2
0
    def test_classificationSummary(self):
        y_true = [1, 0, 0, 1, 1, 1]
        y_pred = [1, 0, 1, 1, 0, 0]

        out = StringIO()
        with redirect_stdout(out):
            classificationSummary(y_true, y_pred, class_names=['a', 'b'])
        s = out.getvalue()

        self.assertIn('Confusion Matrix', s)
        self.assertIn('       Prediction', s)
        self.assertIn('a 1 1', s)
        self.assertIn('b 2 2', s)
Пример #3
0
    def test_classificationSummary(self):
        y_true = [1, 0, 0, 1, 1, 1]
        y_pred = [1, 0, 1, 1, 0, 0]

        out = StringIO()
        with redirect_stdout(out):
            classificationSummary(y_true, y_pred, class_names=['a', 'b'])
        s = out.getvalue()

        self.assertIn('Confusion Matrix', s)
        self.assertIn('       Prediction', s)
        self.assertIn('a 1 1', s)
        self.assertIn('b 2 2', s)

        lines = s.split('\n')
        self.assertEqual(lines[0], 'Confusion Matrix (Accuracy 0.5000)')
        self.assertEqual(lines[3], 'Actual a b')
        self.assertEqual(lines[4], '     a 1 1')
Пример #4
0
true_y = y == 'default'
true_pos = true_y & pred_y
true_neg = ~true_y & ~pred_y
false_pos = ~true_y & pred_y
false_neg = true_y & ~pred_y

conf_mat = pd.DataFrame([[np.sum(true_pos), np.sum(false_neg)], [np.sum(false_pos), np.sum(true_neg)]],
                       index=['Y = default', 'Y = paid off'],
                       columns=['Yhat = default', 'Yhat = paid off'])
print(conf_mat)

print(confusion_matrix(y, logit_reg.predict(X)))

# The package _dmba_ contains the function `classificationSummary` that prints confusion matrix and accuracy for a classification model.

classificationSummary(y, logit_reg.predict(X), 
                      class_names=logit_reg.classes_)

### Precision, Recall, and Specificity
# The _scikit-learn_ function `precision_recall_fscore_support` returns
# precision, recall, fbeta_score and support.

conf_mat = confusion_matrix(y, logit_reg.predict(X))
print('Precision', conf_mat[0, 0] / sum(conf_mat[:, 0]))
print('Recall', conf_mat[0, 0] / sum(conf_mat[0, :]))
print('Specificity', conf_mat[1, 1] / sum(conf_mat[1, :]))

precision_recall_fscore_support(y, logit_reg.predict(X), 
                                labels=['default', 'paid off'])

### ROC Curve
# The function `roc_curve` in _Scikit-learn_ calculates all the information that is required for plotting a ROC curve.
Пример #5
0
logit_reg = LogisticRegressionCV(penalty="l2",
                                 Cs=100,
                                 solver='liblinear',
                                 cv=10,
                                 class_weight='balanced',
                                 scoring='accuracy',
                                 max_iter=1000)

logit_reg.fit(train_X, train_y)

# In[20]:

# display confusion matrices for train and test data

classificationSummary(train_y, logit_reg.predict(train_X))
classificationSummary(test_y, logit_reg.predict(test_X))

# In[21]:

# display classification report for the test data

classes = logit_reg.predict(test_X)

print(metrics.classification_report(test_y, classes))

# ### Build a default RandomForest classifier

# In[22]:

# Rerun the same train/test split as before
Пример #6
0
# Subset a specific set/ predicting for new data
df = pd.concat([
    pd.DataFrame({
        'actual': y_valid,
        'predicted': y_valid_pred
    }),
    pd.DataFrame(predProb_valid, index=y_valid.index)
],
               axis=1)
mask = ((X_valid.inventorygrowthabovefive_YES == 1) &
        (X_valid.populationgrowthabove_YES == 1))

print(df[mask])

#Confusionmatrix
classificationSummary(y_train, y_train_pred, class_names=classes)

print()

classificationSummary(y_valid, y_valid_pred, class_names=classes)

# In[47]:

#Regressiontree

get_ipython().run_line_magic('matplotlib', 'inline')

from pathlib import Path

import pandas as pd
import numpy as np