def linreg_ccv_plot_roc(num_folds): global data folds = pd.create_folds(data, num_folds) classifier = LinearRegression() mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] for i in range(num_folds): test_x, test_y, train_x, train_y = pd.split_into_sets(data, folds, i) probs = classifier.fit(train_x, train_y).predict(test_x) fpr, tpr, thresholds = roc_curve(test_y, probs) #takes, y_true and y_score mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(folds) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('%d-fold Clustered Cross-Validation' % num_folds) plt.legend(loc="lower right") plt.show()
def precision_recall_curve(num_folds): #w ccv 10fold #haven't tested that this works yet global data folds = pd.create_folds(data, num_folds) classifier = LogisticRegression() for j in range(num_folds): test_x, test_y, train_x, train_y = pd.split_into_sets(data, folds, j) probs = classifier.fit(train_x, train_y).predict_proba(test_x) precision, recall, _ = precision_recall_curve(test_y, probs[:, 1]) print precision print recall precision = dict() recall = dict() average_precision = dict() #for i in range(n_classes): for i in range (2): #2 classes? precision[i], recall[i], _ = precision_recall_curve(test_y, probs[:, 1]) average_precision[i] = average_precision_score(test_y, probs[:, 1]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve(test_y.ravel(), probs[:, 1].ravel()) average_precision["micro"] = average_precision_score(test_y, probs[:, 1], average="micro") # Plot Precision-Recall curve plt.clf() plt.plot(recall[0], precision[0], label='Precision-Recall curve') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0])) plt.legend(loc="lower left") plt.show() # Plot Precision-Recall curve for each class plt.clf() plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})' ''.format(average_precision["micro"])) # for i in range(n_classes): for i in range(2): #same deal plt.plot(recall[i], precision[i], label='Precision-recall curve of class {0} (area = {1:0.2f})' ''.format(i, average_precision[i])) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="lower right") plt.show()
def logreg_precision_recall_ccv(num_folds): global data folds = pd.create_folds(data, num_folds) classifier = LogisticRegression() mean_recall = 0.0 mean_precision = 0.0 for i in range(num_folds): test_x, test_y, train_x, train_y = pd.split_into_sets(data, folds, i) probs = classifier.fit(train_x, train_y).predict_proba(test_x) y_pred = [1 if x >= .5 else 0 for x in probs[:, 1]] # print test_y # print y_pred recall = recall_score(test_y, y_pred) #y_true, y_pred # print 'RECALL' # print recall precision = precision_score(test_y, y_pred) # print 'PRECISION' # print precision # mean_recall += recall mean_precision += precision mean_precision /= len(folds) mean_recall /= len(folds) print "MEAN PRECISION" print mean_precision print "MEAN RECALL" print mean_recall