Пример #1
0
def leave_target_out():
    global data
    classifier = LogisticRegression()
    
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    all_tpr = []
    
    targets = list(data.keys())
    
    #added to try and fix legend
    fig = plt.figure()
    ax = plt.subplot(111)
    
    for i in range (len(targets)):
        test_x, test_y, train_x, train_y = pd.leave_one_target_out(data, i)
        probs = classifier.fit(train_x, train_y).predict_proba(test_x)
        fpr, tpr, thresholds = roc_curve(test_y, probs[:, 1])
        
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = auc(fpr, tpr)
        ax.plot(fpr, tpr, lw=1, label='%s (area = %0.2f)' % (targets[i], roc_auc)) 
        
    ax.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')

    mean_tpr /= len(targets) 
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    ax.plot(mean_fpr, mean_tpr, 'k--',
             label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Leave one (target) out (SCOREDATA.vina.balanced)')
    
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=4)

    #plt.legend(loc="lower right")
    plt.show()
Пример #2
0
def leave_target_out_dist():
    global data
    classifier = LogisticRegression()
    
    rocs = []
    targets = list(data.keys())
    
    for i in range (len(targets)):
#     for i in range (3):
        test_x, test_y, train_x, train_y = pd.leave_one_target_out(data, i)
        probs = classifier.fit(train_x, train_y).predict_proba(test_x)
        fpr, tpr, thresholds = roc_curve(test_y, probs[:, 1])
#         rocs.append((targets[i], auc(fpr, tpr)))
        rocs.append(auc(fpr, tpr))
    
#     sorted_rocs = sorted(rocs, key=lambda x: x[1])
#     for tuple in sorted_rocs:
#         print tuple

    plt.hist(rocs)
    plt.title("Target AUC distribution")
    plt.xlabel("AUC")
    plt.ylabel("Frequency")
    plt.show()
Пример #3
0
def plot(i):
    test_x, test_y, train_x, train_y = pd.leave_one_target_out(data, i)
    predictions = clf.fit(train_x, train_y).predict(test_x)
    fpr, tpr, thresholds = roc_curve(test_y, predictions)
    return {'name': targets[i], 'fpr': fpr, 'tpr': tpr, 'auc': auc(fpr, tpr)}