Exemplo n.º 1
0
def get_performance_report(dataset='credit card fraud', model_nr='rf', model_type='synthpop_so', nr_normal_training=[213224],
                           nr_fraud_training=[0], nr_synthetic_fraud_training=[0], test_size=0.25):
    model_dict = {}
    report_dict = {}  # report on recall, precision, etc.
    for i in range(0, len(nr_normal_training)):
        model_name = model_nr +'_'+ model_type +'_'+ str(nr_normal_training[i]) + '_'+str(nr_fraud_training[i]) + '_' + \
                     str(nr_synthetic_fraud_training[i]) +'_ts'+ str(test_size) + '.pkl'
        path = '4) performance improvement/' + dataset + '/synthetic only/models/' + model_name

        with open(path, 'rb') as file:
            model = pickle.load(file)
        model_dict[model_name] = model

        model_predictions = model_dict[model_name].predict(X_test)
        model_predictions = [int(round(x)) for x in model_predictions]  # round to be 0 or 1

        report = classification_report(y_test, model_predictions, labels=[0,1],
                                       target_names=['class0','class1'], digits=2, output_dict=True)
        report_dict[model_name] = report

        # plot confusion matrix
        cm_analysis(y_test, model_predictions, filename='4) performance improvement/' + dataset + '/synthetic only/figures/confusion matrices/cm_' + model_name + '.png',
                    labels=[0, 1], ymap=['class0','class1'], title='RF model trained on\n#class0: '+str(nr_normal_training[i])+' #class1: '+str(nr_fraud_training[i])+'\n'+'  #synthetic class1: '+str(nr_synthetic_fraud_training[i]))
        plt.close()
    return report_dict
Exemplo n.º 2
0
get_model_performance(unbalanced_model, 'unbalanced', X_test_unbalanced,
                      y_test_unbalanced, 'RF', 'fraud dataset')
plt.savefig(
    '1) classification algorithms/assess model performance/credit card fraud/figures/PRcurve_rf_unbalanced_fraud.png'
)
plt.close()
get_model_performance(balanced_model, 'balanced', X_test_balanced,
                      y_test_balanced, 'RF', 'fraud dataset')
plt.savefig(
    '1) classification algorithms/assess model performance/credit card fraud/figures/PRcurve_rf_balanced_fraud.png'
)
plt.close()

cm_analysis(
    y_test_balanced,
    balanced_predictions,
    filename=
    '1) classification algorithms/assess model performance/credit card fraud/figures/cm_rf_balanced_fraud.png',
    labels=[0, 1],
    ymap=['normal', 'fraud'],
    title='RF performance on balanced data\nfraud dataset')

cm_analysis(
    y_test_unbalanced,
    unbalanced_predictions,
    filename=
    '1) classification algorithms/assess model performance/credit card fraud/figures/cm_rf_unbalanced_fraud.png',
    labels=[0, 1],
    ymap=['normal', 'fraud'],
    title='RF performance on unbalanced data\nfraud dataset')
# unpack unbalanced model
path = '1) classification algorithms/random forest/customer churn/model_forest_unbalanced_churn.pkl'
with open(path, 'rb') as file:
    unbalanced_model = pickle.load(file)

# unpack balanced model
path = '1) classification algorithms/random forest/customer churn/model_forest_balanced_churn.pkl'
with open(path, 'rb') as file:
    balanced_model = pickle.load(file)

# predict labels
unbalanced_predictions = unbalanced_model.predict(X_test_unbalanced)
unbalanced_predictions = [int(round(x)) for x in unbalanced_predictions]
balanced_predictions = balanced_model.predict(X_test_balanced)
balanced_predictions = [int(round(x)) for x in balanced_predictions]

# print the confusion matrix, precision, recall, etc.
get_model_performance(unbalanced_model, 'unbalanced', X_test_unbalanced, y_test_unbalanced, 'RF','churn dataset')
plt.savefig('1) classification algorithms/assess model performance/customer churn/figures/PRcurve_rf_unbalanced_churn.png')
plt.close()
get_model_performance(balanced_model, 'balanced', X_test_balanced, y_test_balanced, 'RF','churn dataset')
plt.savefig('1) classification algorithms/assess model performance/customer churn/figures/PRcurve_rf_balanced_churn.png')
plt.close()


cm_analysis(y_test_balanced, balanced_predictions, filename='1) classification algorithms/assess model performance/customer churn/figures/cm_rf_balanced_churn.png',labels=[0, 1],
            ymap=['normal','churn'],title='RF performance on balanced data\nchurn dataset')

cm_analysis(y_test_unbalanced,unbalanced_predictions,filename='1) classification algorithms/assess model performance/customer churn/figures/cm_rf_unbalanced_churn.png',labels=[0, 1],
            ymap=['normal','churn'],title='RF performance on unbalanced data\nchurn dataset')
Exemplo n.º 4
0
get_model_performance(unbalanced_model, 'unbalanced', X_test_unbalanced,
                      y_test_unbalanced, 'RF', 'bioresponse dataset')
plt.savefig(
    '1) classification algorithms/assess model performance/bioresponse/figures/PRcurve_rf_unbalanced_bio.png'
)
plt.close()
get_model_performance(balanced_model, 'balanced', X_test_balanced,
                      y_test_balanced, 'RF', 'bioresponse dataset')
plt.savefig(
    '1) classification algorithms/assess model performance/bioresponse/figures/PRcurve_rf_balanced_bio.png'
)
plt.close()

cm_analysis(
    y_test_balanced,
    balanced_predictions,
    filename=
    '1) classification algorithms/assess model performance/bioresponse/figures/cm_rf_balanced_bio.png',
    labels=[0, 1],
    ymap=['class0', 'class1'],
    title='RF performance on balanced data\nbioresponse dataset')

cm_analysis(
    y_test_unbalanced,
    unbalanced_predictions,
    filename=
    '1) classification algorithms/assess model performance/bioresponse/figures/cm_rf_unbalanced_bio.png',
    labels=[0, 1],
    ymap=['class0', 'class1'],
    title='RF performance on unbalanced data\nbioresponse dataset')
Exemplo n.º 5
0
plt.savefig(
    '1) classification algorithms/assess model performance/bioresponse/figures/PRcurve_nn_unbalanced_bio_short.png'
)
plt.close()
get_model_performance(balanced_model, 'balanced', X_test_balanced,
                      y_test_balanced, 'NN', 'bioresponse dataset')
plt.savefig(
    '1) classification algorithms/assess model performance/bioresponse/figures/PRcurve_nn_balanced_bio_short.png'
)
plt.close()
print('PR curve ready')

cm_analysis(
    y_test_balanced,
    balanced_predictions,
    filename=
    '1) classification algorithms/assess model performance/bioresponse/figures/cm_nn_balanced_bio_short.png',
    labels=[0, 1],
    ymap=['response', 'no response'],
    title='NN performance on balanced data\nbioresponse dataset')

cm_analysis(
    y_test_unbalanced,
    unbalanced_predictions,
    filename=
    '1) classification algorithms/assess model performance/bioresponse/figures/cm_nn_unbalanced_bio_short.png',
    labels=[0, 1],
    ymap=['response', 'no response'],
    title='NN performance on unbalanced data\nbioresponse dataset')
print('confusion matrix ready')