示例#1
0
def show_performance(X, y, clf, conf_matrix_cutoff=0.5, show_auc=True, show_accuracy=True, show_classification_report=True, show_confusion_matrix=True):
    y_pred_proba=clf.predict_proba(X)[:,1]
    y_pred=[1 if i>=conf_matrix_cutoff else 0 for i in y_pred_proba]
    
    if show_auc:
        print ("AUC:{0:.3f}".format(__roc_auc_score(y,y_pred_proba)),"\n")
    if show_accuracy:
        print ("Accuracy:{0:.3f}".format(__accuracy_score(y,y_pred)),"\n")

    if show_classification_report:
        print ("Classification report")
        print (__classification_report(y,y_pred))
        
    if show_confusion_matrix:
        print ("Confusion matrix")
        showcm(__confusion_matrix(y, y_pred))
        print (__confusion_matrix(y,y_pred,labels=[0,1]),"\n")
示例#2
0
def print_report_tt(Y_TRUE_TRAIN, Y_PREDICTED_TRAIN, Y_TRUE_TEST, Y_PREDICTED_TEST, FEAT_IMP='', dcl =[1,5,10,20,30,40,50,60,70,80,90,100], CNT_TO_PRINT=10, NAME_MODEL='PTB', NFLAG_PRINT_TABLES = True, TARGET_NAME='', DATA_SET='', METHOD='', TIME_PERIOD='', UNDER='', CALIBR='', PYTHON_SCRIPT='', ORACLE_TABLE='', CSV_FILE='', SQL_FILE=''):
    
    gs = __GridSpec(9, 1, left=0, right=0.2, hspace=0.3, wspace=0.3)

    __plt.figure(figsize=(55,75))
    __plt.subplot(gs[0,0])

    _show_acclift([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train'), (Y_PREDICTED_TEST, Y_TRUE_TEST, 'Test')], NUM_SUBPLOT=gs[0, 0])
    
    __plt.subplot(gs[1, 0])
    
    _showlift([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train'), (Y_PREDICTED_TEST, Y_TRUE_TEST, 'Test')], NUM_SUBPLOT=gs[1, 0])
    
    __plt.subplot(gs[2, 0])

    _showroccurve([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train'), (Y_PREDICTED_TEST, Y_TRUE_TEST, 'Test')], NUM_SUBPLOT=gs[2, 0])
    
    __plt.subplot(gs[3, 0])
    
    width = 0.35
    ind = __np.arange(10) 
    x_test=get_lift_df(Y_PREDICTED_TEST,Y_TRUE_TEST)['lift']
    __plt.bar(ind, x_test, width, label='test', color='orange', alpha = 0.6)
    __plt.xlabel('Part')
    __plt.ylabel('Lift')
    __plt.xticks(ind + width / 2, ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'))
    __plt.title('DECILE_LIFT', fontsize=23)
    __plt.title(r'Decile lift')
    for i in zip(ind, x_test):
        __plt.text(i[0], i[1], str(round(i[1],2)), horizontalalignment='center', verticalalignment='bottom', fontsize=13)
    
    __plt.legend(loc='best', fontsize=15)
    
    __plt.subplot(gs[4, 0])
    
    width = 0.35
    ind = __np.arange(10) 
    z_test=get_lift_df(Y_PREDICTED_TEST,Y_TRUE_TEST)['pred_mean']
    __plt.bar(ind, z_test, width, label='test', color='orange', alpha = 0.6)
    __plt.xlabel('Part')
    __plt.ylabel('Responce')
    __plt.xticks(ind + width / 2, ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'))
    __plt.title('DECILE_RESPONCE', fontsize=23)
    __plt.title(r'Decile responce')
    for i in zip(ind, z_test):
        __plt.text(i[0], i[1], str(round(i[1],2)), horizontalalignment='center', verticalalignment='bottom', fontsize=13)
    
    __plt.legend(loc='best', fontsize=15)
    
    __plt.subplot(gs[5, 0])
    
    _gainchart([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train'), (Y_PREDICTED_TEST, Y_TRUE_TEST, 'Test')], NUM_SUBPLOT=gs[5, 0])
    
    __plt.subplot(gs[6, 0])
    
    _kschart([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train'), (Y_PREDICTED_TEST, Y_TRUE_TEST, 'Test')], NUM_SUBPLOT=gs[6, 0])
    
    __plt.subplot(gs[7, 0])
    
    _showcc([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train'), (Y_PREDICTED_TEST, Y_TRUE_TEST, 'Test')], NUM_SUBPLOT=gs[7, 0])
    
    
    info = __pd.DataFrame(columns=['BASE', 'TARGET', 'TARGET, %', 'ROC_AUC', 'GINI', 'Target name', 'Data set name','Model method',
                                       'Time period','Undersampling usage', 'Calibrated', 'Python script location',
                                       'Data set oracle location', 'Data set csv location', 'SQL code location'], index=range(1))
    info.loc[0,'BASE'] = len(Y_TRUE_TRAIN)
    info.loc[0,'TARGET'] = int(sum(Y_TRUE_TRAIN))
    info.loc[0,'TARGET, %'] = float(sum(Y_TRUE_TRAIN)/len(Y_TRUE_TRAIN))*100
    info.loc[0,'ROC_AUC'] = __roc_auc_score(Y_TRUE_TRAIN,Y_PREDICTED_TRAIN)
    info.loc[0,'GINI'] = 2*__roc_auc_score(Y_TRUE_TRAIN,Y_PREDICTED_TRAIN) - 1
    
    info.loc[1,'BASE'] = len(Y_TRUE_TEST)
    info.loc[1,'TARGET'] = int(sum(Y_TRUE_TEST))
    info.loc[1,'TARGET, %'] = float(sum(Y_TRUE_TEST)/len(Y_TRUE_TEST))*100
    info.loc[1,'ROC_AUC'] = __roc_auc_score(Y_TRUE_TEST,Y_PREDICTED_TEST)
    info.loc[1,'GINI'] = 2*__roc_auc_score(Y_TRUE_TEST,Y_PREDICTED_TEST) - 1

    info.loc[0,'Target name'] = TARGET_NAME
    info.loc[0,'Data set name'] = DATA_SET
    info.loc[0,'Model method'] = METHOD
    info.loc[0,'Time period'] = TIME_PERIOD
    info.loc[0,'Undersampling usage'] = UNDER
    info.loc[0,'Calibrated'] = CALIBR
    info.loc[0,'Python script location'] = PYTHON_SCRIPT
    info.loc[0,'Data set oracle location'] = ORACLE_TABLE
    info.loc[0,'Data set csv location'] = CSV_FILE
    info.loc[0,'SQL code location'] = SQL_FILE
    
    info_t = info.T
    info_t.rename(columns={0: 'Train', 1: 'Test'}, inplace=True)
    
    if NFLAG_PRINT_TABLES==True:
        print ('Model info')
        print (__pd.DataFrame(info_t))
        print ('________________________________________________________')
        print ('Feature importance')
        print (__pd.DataFrame(FEAT_IMP))
        print ('________________________________________________________')    
示例#3
0
def print_report_t(Y_TRUE_TRAIN, Y_PREDICTED_TRAIN, FEAT_IMP='', dcl = [1,5,10,20,30,40,50,60,70,80,90,100], CNT_TO_PRINT=10, NAME_MODEL='PTB', NFLAG_PRINT_TABLES = True,TARGET_NAME='', DATA_SET='', METHOD='', TIME_PERIOD='', UNDER='', CALIBR='', PYTHON_SCRIPT='', ORACLE_TABLE='',CSV_FILE='', SQL_FILE=''):
    
    gs = __GridSpec(9, 1, left=0, right=0.2, hspace=0.3, wspace=0.3)

    __plt.figure(figsize=(55,75))
    __plt.subplot(gs[0,0])

    #int_dcl = lift_df_ptb.k.astype(int)
    
    
    _show_acclift([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train')], NUM_SUBPLOT=gs[0, 0])
    
    __plt.subplot(gs[1, 0])
    
    _showlift([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train')], NUM_SUBPLOT=gs[1, 0])
    
    __plt.subplot(gs[2, 0])

    _showroccurve([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train')], NUM_SUBPLOT=gs[2, 0])
    
    __plt.subplot(gs[3, 0])
    
    average_precision = __average_precision_score(Y_TRUE_TRAIN,Y_PREDICTED_TRAIN)

    precision_x, recall_x, _ = __precision_recall_curve(Y_TRUE_TRAIN,Y_PREDICTED_TRAIN)

    __plt.step(recall_x, precision_x, alpha=0.2,where='post', label=NAME_MODEL)
    __plt.fill_between(recall_x, precision_x, step='post', alpha=0.2)

    __plt.xlabel('Recall')
    __plt.ylabel('Precision')
    __plt.ylim([0.0, 1.05])
    __plt.xlim([0.0, 1.0])
    __plt.title('2-class Precision-Recall curve: AUC={0:0.2f}'.format(average_precision))
    __plt.legend(loc='best', fontsize=11)
    
    __plt.subplot(gs[4, 0])
    
    _gainchart([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train')], NUM_SUBPLOT=gs[4, 0])
    
    __plt.subplot(gs[5, 0])
    
    _kschart([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train')], NUM_SUBPLOT=gs[5, 0])
    
    __plt.subplot(gs[6, 0])
    
    _showcc([(Y_PREDICTED_TRAIN, Y_TRUE_TRAIN, 'Train')], NUM_SUBPLOT=gs[6, 0])
    
    
    info = __pd.DataFrame(columns=['BASE', 'TARGET', 'TARGET, %', 'ROC_AUC', 'GINI', 'Target name', 'Data set name','Model method',
                                       'Time period','Undersampling usage', 'Calibrated', 'Python script location',
                                       'Data set oracle location', 'Data set csv location', 'SQL code location'], index=range(1))
    info.loc[0,'BASE'] = len(Y_TRUE_TRAIN)
    info.loc[0,'TARGET'] = int(sum(Y_TRUE_TRAIN))
    info.loc[0,'TARGET, %'] = float(sum(Y_TRUE_TRAIN)/len(Y_TRUE_TRAIN))*100
    info.loc[0,'ROC_AUC'] = __roc_auc_score(Y_TRUE_TRAIN,Y_PREDICTED_TRAIN)
    info.loc[0,'GINI'] = 2*__roc_auc_score(Y_TRUE_TRAIN,Y_PREDICTED_TRAIN) - 1
   
    info.loc[0,'Target name'] = TARGET_NAME
    info.loc[0,'Data set name'] = DATA_SET
    info.loc[0,'Model method'] = METHOD
    info.loc[0,'Time period'] = TIME_PERIOD
    info.loc[0,'Undersampling usage'] = UNDER
    info.loc[0,'Calibrated'] = CALIBR
    info.loc[0,'Python script location'] = PYTHON_SCRIPT
    info.loc[0,'Data set oracle location'] = ORACLE_TABLE
    info.loc[0,'Data set csv location'] = CSV_FILE
    info.loc[0,'SQL code location'] = SQL_FILE
    
    info_t = info.T
    info_t.rename(columns={0: 'Train'}, inplace=True)
    
    if NFLAG_PRINT_TABLES==True:
        print ('Model info')
        print (__pd.DataFrame(info_t))
        print ('________________________________________________________')
        print ('Feature importance')
        print (__pd.DataFrame(FEAT_IMP))
        print ('________________________________________________________')
示例#4
0
def _showroccurve(arr, NUM_SUBPLOT=111):
    colors = __itertools.cycle(["royalblue", "darkorange", "forestgreen"])
    for tr in arr:
        df_scores = __pd.DataFrame(list(zip(tr[0], tr[1])), columns=['score', 'target'])
        fpr_ptb, tpr_ptb, _ = __roc_curve(df_scores['target'], df_scores['score'])
        __plt.plot([0, 1], [0, 1], 'k--')
        __plt.plot(fpr_ptb, tpr_ptb, label=str(tr[2])+' ROC AUC={} GINI={}'.format( round(__roc_auc_score(df_scores['target'], df_scores['score']),3), str(round((__roc_auc_score(df_scores['target'], df_scores['score'])-0.5)*2.0,3)) ), linewidth= 2 ,linestyle='--', color=next(colors), alpha=0.7)
        __plt.xlabel('False positive rate')
        __plt.ylabel('True positive rate')
        __plt.title('ROC', fontsize=13)
        __plt.legend(loc='best', fontsize=12)
示例#5
0
def print_lift_roc_pr_plot(Y_TRUE, Y_PREDICTED, dcl = [1,5,10,20,30,40,50,60,70,80,90,100], CNT_TO_PRINT=10, NAME_MODEL='PTB', NFLAG_PRINT_TABLE = True, NUM_FIGURE=1, NUM_upper_text=0.05):

    lift_df_ptb = __lift1(Y_TRUE,Y_PREDICTED,dcl)
    if NFLAG_PRINT_TABLE:
        print (NAME_MODEL+ '\n' +str(lift_df_ptb[['k', 'lift_cumulative', 'precision', 'tp', 'num_samples','threshold']])+ '\n')
    
    s_lift_df_ptb = __lift1(Y_TRUE,Y_PREDICTED,[10,20,30,40,50,60,70,80,90,100])
    
    #__plt.figure(NUM_FIGURE)
    __plt.figure(NUM_FIGURE, figsize=(20,20))
    __plt.subplot(321)

    int_dcl = lift_df_ptb.k.astype(int)
    __plt.plot( int_dcl , lift_df_ptb['lift_cumulative'], label=NAME_MODEL,linewidth= 3 )
    __plt.xticks(int_dcl)
    #__plt.plot(int_dcl[0:CNT_TO_PRINT+1], lift_df_ptb['lift_cumulative'][0:CNT_TO_PRINT+1],'g^',color='r', label='Lift Value')
    __plt.plot(int_dcl[0:CNT_TO_PRINT+1], lift_df_ptb['lift_cumulative'][0:CNT_TO_PRINT+1],'o', label='Lift Value ' + NAME_MODEL)
    for i in zip(int_dcl[0:CNT_TO_PRINT+1], lift_df_ptb['lift_cumulative'][0:CNT_TO_PRINT+1]):
        __plt.text(i[0], i[1] + NUM_upper_text, str(round(i[1],2)), fontsize=11) # i[1] - 0.15

    __plt.legend(loc='upper right', fontsize=10, frameon=False)
    #plt.legend(frameon=False, fontsize=15)

    __plt.title("__LIFT_CUMULATIVE__") 
    __plt.ylabel('Lift')
    __plt.xlabel('Decile')
    
    __plt.subplot(322)
    int_dcl = s_lift_df_ptb.k.astype(int)
    __plt.plot( int_dcl , s_lift_df_ptb['lift_k'], label=NAME_MODEL,linewidth= 3 )
    __plt.xticks(int_dcl)
    __plt.plot(int_dcl[0:CNT_TO_PRINT+1], s_lift_df_ptb['lift_k'][0:CNT_TO_PRINT+1],'o', label='Lift Value ' + NAME_MODEL)
    for i in zip(int_dcl[0:CNT_TO_PRINT+1], s_lift_df_ptb['lift_k'][0:CNT_TO_PRINT+1]):
        __plt.text(i[0], i[1] + NUM_upper_text, str(round(i[1],2)), fontsize=11)

    __plt.legend(loc='upper right', fontsize=10, frameon=False)
    __plt.title("__LIFT__") 
    __plt.ylabel('Lift')
    __plt.xlabel('Decile')

    __plt.subplot(323)

    __plt.plot([0, 1], [0, 1], 'k--')

    fpr_ptb, tpr_ptb, _ = __roc_curve(Y_TRUE,Y_PREDICTED)

    __plt.plot(fpr_ptb, tpr_ptb
             , label=NAME_MODEL + ' ROC AUC={} GINI={}'.format( round(__roc_auc_score(Y_TRUE,Y_PREDICTED),3), str(round((__roc_auc_score(Y_TRUE,Y_PREDICTED)-0.5)*2.0,3)) )
             , linewidth= 2 ,linestyle='--')
    #plt.plot(fpr_ptb, tpr_ptb,linewidth= 2 ,linestyle='--')
    __plt.xlabel('False positive rate')
    __plt.ylabel('True positive rate')
    __plt.title('ROC', fontsize=15)
    __plt.legend(loc='best', fontsize=10)
    
    __plt.subplot(324)
    average_precision = __average_precision_score(Y_TRUE,Y_PREDICTED)

    #print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    precision_x, recall_x, _ = __precision_recall_curve(Y_TRUE,Y_PREDICTED)

    __plt.step(recall_x, precision_x, alpha=0.2,where='post', label=NAME_MODEL)
    __plt.fill_between(recall_x, precision_x, step='post', alpha=0.2)

    __plt.xlabel('Recall')
    __plt.ylabel('Precision')
    __plt.ylim([0.0, 1.05])
    __plt.xlim([0.0, 1.0])
    __plt.title('2-class Precision-Recall curve: AUC={0:0.2f}'.format(average_precision))
    __plt.legend(loc='best', fontsize=10)

    
    _showcc([(Y_PREDICTED, Y_TRUE, NAME_MODEL)],NUM_SUBPLOT=325)