Пример #1
0
def AUC(targetVariable, allPredictions):
    trainMask = numpy.isfinite(targetVariable)
    targetVariableTrainOnly = targetVariable[trainMask]
    predictionsTrainOnly = allPredictions[trainMask]
    FPR, TPR, thresholds = roc(targetVariableTrainOnly, predictionsTrainOnly)
    roc_auc = auc(FPR, TPR)
    return roc_auc
Пример #2
0
def AUC(targetVariable, allPredictions):
    AUC_DEC_PTS = 3  # decimal points to round predictions to, to speed AUC calculation
    trainMask = numpy.isfinite(targetVariable)
    targetVariableTrainOnly = targetVariable[trainMask]
    predictionsTrainOnly = allPredictions[trainMask]
    predictionsTrainOnly = numpy.round(predictionsTrainOnly,
                                       decimals=AUC_DEC_PTS)  #new
    FPR, TPR, thresholds = roc(targetVariableTrainOnly, predictionsTrainOnly)
    roc_auc = auc(FPR, TPR)
    return roc_auc
Пример #3
0
def AUCkFoldLogisticRegression(regularization, inData, penalty, kFolds):
    print "\n\tCalculating AUC for regularization", regularization, "using", kFolds, "folds"
    sys.stdout.flush()
    xData, yData = getXYData(inData)
    nSamples, nFeatures = xData.shape
    if nSamples % kFolds != 0:
        raise UserWarning(
            "Uneven fold sizes! Must evenly divide 5922 (e.g. 2,3,7 or 9 folds"
        )
        # 2, 3, 7, and 9 are factors of 5922 (#data points) & yield equal fold sizes
    crossValFolds = KFold(nSamples, kFolds)
    yTestDataAllFolds = array([])
    probasTestDataAllFolds = array([])
    sumAUC = 0.0
    for foldNum, (train, test) in enumerate(crossValFolds):
        # fit a new LR model for each fold's data & evaluate using AUC
        LRclassifier = LogisticRegression(C=regularization, penalty=penalty)
        probas_ = LRclassifier.fit(xData[train],
                                   yData[train]).predict_proba(xData[test])
        numNon0Coefs = sum(
            [1 for coef in LRclassifier.coef_[:][0] if coef != 0])
        # probas_ contains 2 columns of probabilities, one for each of the 2 classes (0,1)
        # In the documentation, seems like col 1 is for class 1,
        # but tests show it seems like col 0 is for class 1, so we use that below.
        CLASS_1_COL = 0
        # Compute ROC curve and area under the curve
        FPR, TPR, thresholds = roc(yData[test], probas_[:, CLASS_1_COL])
        roc_auc = auc(FPR, TPR)
        print "\tFold:", foldNum, " AUC:", roc_auc, "Non0Coefs:", numNon0Coefs,
        print "Reg:", regularization,
        print localTimeString()
        sys.stdout.flush()
        sumAUC += roc_auc
        yTestDataAllFolds = numpy.concatenate((yTestDataAllFolds, yData[test]))
        probasTestDataAllFolds = \
                numpy.concatenate((probasTestDataAllFolds,probas_[:,CLASS_1_COL]) )
    FPRallFolds, TPRallFolds, thresholds = roc(yTestDataAllFolds,
                                               probasTestDataAllFolds)
    roc_auc_allFolds = auc(FPRallFolds, TPRallFolds)
    print "AUC_all_folds:", roc_auc_allFolds,
    print "Reg:", regularization, "Penalty:", penalty, "kFolds:", kFolds,
    print localTimeString()
    return roc_auc_allFolds
Пример #4
0
def test_roc():
    """test Receiver operating characteristic (ROC)"""
    fpr, tpr, thresholds = roc(y[half:], probas_[:,1])
    roc_auc = auc(fpr, tpr)
    assert_array_almost_equal(roc_auc, 0.80, decimal=2)
Пример #5
0
n_samples, n_features = X.shape
p = range(n_samples)
random.seed(0)
random.shuffle(p)
X, y = X[p], y[p]
half = int(n_samples/2)

# Add noisy features
X = np.c_[X,np.random.randn(n_samples, 200*n_features)]

# Run classifier
classifier = svm.SVC(kernel='linear', probability=True)
probas_ = classifier.fit(X[:half],y[:half]).predict_proba(X[half:])

# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc(y[half:], probas_[:,1])
roc_auc = auc(fpr, tpr)
print "Area under the ROC curve : %f" % roc_auc

# Plot ROC curve
pl.figure(-1)
pl.clf()
pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
pl.plot([0, 1], [0, 1], 'k--')
pl.xlim([0.0,1.0])
pl.ylim([0.0,1.0])
pl.xlabel('False Positive Rate')
pl.ylabel('True Positive Rate')
pl.title('Receiver operating characteristic example')
pl.legend(loc="lower right")
pl.show()
Пример #6
0
################################################################################
# Classification and ROC analysis

# Run classifier with crossvalidation and plot ROC curves
cv = StratifiedKFold(y, k=6)
classifier = svm.SVC(kernel='linear', probability=True)

mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
all_tpr = []

for i, (train, test) in enumerate(cv):
    probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc(y[test], probas_[:,1])
    mean_tpr += interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    pl.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

pl.plot([0, 1], [0, 1], '--', color=(0.6,0.6,0.6), label='Luck')

mean_tpr /= len(cv)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
pl.plot(mean_fpr, mean_tpr, 'k--', 
        label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)

pl.xlim([-0.05,1.05])
pl.ylim([-0.05,1.05])
Пример #7
0
def calcAUC(targetData):
    yData = targetData
    probas = numpy.random.random((len(targetData)))
    FPR, TPR, thresholds = roc(yData, probas)
    roc_auc = auc(FPR, TPR)
    return roc_auc