Exemplo n.º 1
0
def LogisticGridSearch_OLD():  
    # C=1 is best
    cs = 10.0**np.arange(-1,2,0.25)   
    aucs = []
    for c in cs:
        clf = LogisticRegression(penalty='l1',C=c).fit(f_train, y_train)
        probs = clf.predict_proba(f_test)
        fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
        roc_auc = auc(fpr,tpr)
        cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search - Logistic Regression ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(cs)):
        if aucs[i] > aucs[best]:
            best = i
    c = cs[best]
    clf = LogisticRegression(penalty='l1',C=c).fit(f_train, y_train)
    probs = clf.predict_proba(f_test)
    fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best C = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search - Logistic Regression ROC Curve')
    myplt.show()
    return clf
Exemplo n.º 2
0
def MultinomialNaiveBayesGridSearch_OLD():  
    # C=1 is best
    cs = 10.0**np.arange(-9,2,0.5)
    aucs = []
    for c in cs:
        clf = MultinomialNB(alpha=c).fit(f_train, y_train)
        probs = clf.predict_proba(f_test)
        fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
        roc_auc = auc(fpr,tpr)
        cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search - Multinomial Naive Bayes ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(cs)):
        if aucs[i] > aucs[best]:
            best = i
    c = cs[best]
    clf = MultinomialNB(alpha=c).fit(f_train, y_train)
    probs = clf.predict_proba(f_test)
    fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best alpha = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search - Multinomial Naive Bayes ROC Curve')
    myplt.show()
    return clf
Exemplo n.º 3
0
def svm_model(train_x_pca_df, train_y, validation_x_pca_df, validation_y):

    """
    This function is to build two kinds of svm models with and without setting class weight and compare their performances.
    """

    # Build svm model: uw for unweighted, w for weighted
    my_svm_uw = svm.SVC(C=0.001, kernel="linear", probability=True)
    my_svm_w = svm.SVC(C=0.001, kernel="linear", probability=True, class_weight="auto")
    # calculate the predicted probability
    proba_svm_uw = my_svm_uw.fit(train_x_pca_df, train_y).predict_proba(validation_x_pca_df)
    proba_svm_w = my_svm_w.fit(train_x_pca_df, train_y).predict_proba(validation_x_pca_df)
    # calculate AUC
    auc_uw = roc_auc_score(validation_y, proba_svm_uw[:, 1])
    auc_w = roc_auc_score(validation_y, proba_svm_w[:, 1])
    # prepare to plot ROC curve
    fpr_svm_w, tpr_svm_w, thresholds_svm_w = roc_curve(validation_y, proba_svm_w[:, 1])
    fpr_svm_uw, tpr_svm_uw, thresholds_svm_uw = roc_curve(validation_y, proba_svm_uw[:, 1])
    # plot ROC curve
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_xlabel("False Positive Rate")
    ax.set_ylabel("True Positive Rate")
    plt.title("ROC Curve for SVM with & without setting class weight")
    plt.plot(fpr_svm_w, tpr_svm_w, "grey", lw=2.0, label="SVM_w ({0:.3f})".format(auc_w))
    plt.plot(fpr_svm_uw, tpr_svm_uw, "g", lw=2.0, label="SVM_uw ({0:.3f})".format(auc_uw))
    plt.legend(loc=4)
    plt.show()
Exemplo n.º 4
0
def SGDGridSearch_OLD():  
    # C=1 is best
    cs = 10.0**np.arange(-9,9,1)   
    aucs = []
    for c in cs:
        clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train)
        probs = clf.decision_function(f_test)
        fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
        roc_auc = auc(fpr,tpr)
        cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search - SGD Classifier ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(cs)):
        if aucs[i] > aucs[best]:
            best = i
    c = cs[best]
    clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train)
    probs = clf.decision_function(f_test)
    fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best C = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search - SGD Classifier ROC Curve')
    myplt.show()
    return clf, aucs
Exemplo n.º 5
0
def test_roc_curve_hard():
    # roc_curve for hard decisions
    y_true, pred, probas_pred = make_prediction(binary=True)

    # always predict one
    trivial_pred = np.ones(y_true.shape)
    fpr, tpr, thresholds = roc_curve(y_true, trivial_pred)
    roc_auc = auc(fpr, tpr)
    assert_array_almost_equal(roc_auc, 0.50, decimal=2)
    assert_equal(fpr.shape, tpr.shape)
    assert_equal(fpr.shape, thresholds.shape)

    # always predict zero
    trivial_pred = np.zeros(y_true.shape)
    fpr, tpr, thresholds = roc_curve(y_true, trivial_pred)
    roc_auc = auc(fpr, tpr)
    assert_array_almost_equal(roc_auc, 0.50, decimal=2)
    assert_equal(fpr.shape, tpr.shape)
    assert_equal(fpr.shape, thresholds.shape)

    # hard decisions
    fpr, tpr, thresholds = roc_curve(y_true, pred)
    roc_auc = auc(fpr, tpr)
    assert_array_almost_equal(roc_auc, 0.78, decimal=2)
    assert_equal(fpr.shape, tpr.shape)
    assert_equal(fpr.shape, thresholds.shape)
def run_regression(X_train, Y_train, X_test, Y_test, lead, lag):
	num_crossval = 10
	start_time = time.time()
	logreg = Pipeline([('scale', StandardScaler()), ('logreg', linear_model.LogisticRegression())])

	#do cross-validation
	try:
		auc_crossval =  np.mean(cross_validation.cross_val_score(logreg, np.array(X_train), np.array(Y_train), scoring='roc_auc', cv=num_crossval))
	except:
		auc_crossval = 0.0

	#do training on train
	logreg.fit(X_train, Y_train)

	desired_label = 0 # want to predict if student will dropout
	desired_label_index = logreg.steps[-1][1].classes_.tolist().index(desired_label) 

	try:
		predicted_probs = logreg.predict_proba(X_train)
		fpr, tpr, thresholds = roc_curve(Y_train, predicted_probs[:, desired_label_index],  pos_label=desired_label)
		auc_train = auc(fpr, tpr)
	except:
		auc_train = 0.0

	try:
		predicted_probs = logreg.predict_proba(X_test)
		fpr, tpr, thresholds = roc_curve(Y_test, predicted_probs[:, desired_label_index],  pos_label=desired_label)
		auc_test = auc(fpr, tpr)
	except:
		auc_test = 0.0

	print "ran logistic regression for lead %s lag %s in %s seconds" % (lead, lag, time.time() - start_time)
	return (float(auc_train), float(auc_test), auc_crossval)
def eva(fff1, fff2, fff3, fff4, rocfile):
	truth = open(fff1)
	pred = open(fff2)

	y = [float(line.split(' ',1)[0]) for line in truth]
	p = [float(line) for line in pred]

	fpr, tpr, thresholds = roc_curve(y, p, pos_label=1)  
	print auc(fpr, tpr)

	plt.figure(figsize=(4, 4), dpi=80)
	x = [0.0, 1.0]
	plt.plot(x, x, linestyle='dashed', color='red', linewidth=2, label='random')

	plt.xlim(0.0, 1.0)
	plt.ylim(0.0, 1.0)
	plt.xlabel("FPR", fontsize=14)
	plt.ylabel("TPR", fontsize=14)
	plt.title("ROC Curve", fontsize=14)
	plt.plot(fpr, tpr, linewidth=2, label = "adaboost_fea1")

	truth = open(fff3)
	pred = open(fff4)

	y = [float(line.split(' ',1)[0]) for line in truth]
	p = [float(line) for line in pred]

	fpr, tpr, thresholds = roc_curve(y, p, pos_label=1)  
	print auc(fpr, tpr)
	plt.plot(fpr, tpr, linewidth=2, label = "adaboost_fea2")
	plt.legend(fontsize=10, loc='best')
	plt.tight_layout()

	plt.savefig(rocfile)
Exemplo n.º 8
0
def roc_calculation(y_pred, y_test, model, type = sys.argv[2]):
    plt.figure()
    if type == 'gender':
        fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=0)
        roc_auc = metrics.auc(fpr, tpr)
        plt.plot(fpr,tpr,label='ROC curve of class {0} (area = {1:0.2f})'
                 ''.format(GENDER_CLASSES[0], roc_auc))
        fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=1)
        roc_auc = metrics.auc(fpr, tpr)
        plt.plot(fpr,tpr,label='ROC curve of class {0} (area = {1:0.2f})'
                 ''.format(GENDER_CLASSES[1], roc_auc))
    else:
        for i in [0,1,2,3,4]:
            fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=i)
            roc_auc = metrics.auc(fpr, tpr)
            plt.plot(fpr,tpr,label='ROC curve of class {0} (area = {1:0.2f})'
                                   ''.format(AGE_CLASSES[i], roc_auc))
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    plt.show()
    plt.savefig('experiments/fensemble-roc-'+model+'.png')
Exemplo n.º 9
0
def main():
    (X,y) = skd.make_classification()
    N = X.shape[0]
    X = np.append(X,np.ones((N,1)),axis=1)
    y = 2*y-1
        
    skf = StratifiedKFold(y,5)
    for train,test in skf:
        X_train = X[train,:]
        y_train = y[train]
        
        X_test = X[test,:]
        y_test = y[test]
        
        C = 0.01
        
        # dual co-ordinate descent SVM
        clf = SVMCD(C)
        clf.fit(X_train,y_train,w_prior=np.ones(21))
        pred = clf.decision_function(X_test)
        score = clf.score(X_test,y_test)
        fpr, tpr, thresholds = metrics.roc_curve(y_test, pred)
        print score, metrics.auc(fpr, tpr), "//",
        w1  = clf.w;
        
        # standard svm
        clf = SVC(C=C,kernel='linear')
        clf.fit(X_train, y_train) 
        pred = clf.decision_function(X_test)
        score = clf.score(X_test,y_test)
        fpr, tpr, thresholds = metrics.roc_curve(y_test, pred)
        print score, metrics.auc(fpr, tpr)
        w2 = clf.coef_
        w2.shape = (21,)
Exemplo n.º 10
0
def plotROC(y_score, labels, outpdf):
    n_classes = labels.shape[1]
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(labels[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(labels.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    
    # Plot of a ROC curve for a specific class
    plt.figure()
    plt.figure(figsize = (6,6))
    
    # Plot ROC curve
    for i in range(4):
        plt.plot(fpr[i], tpr[i], label='' + classifiers[i]+ ' AUC={1:0.2f}'
                                       ''.format(i, roc_auc[i]))
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('False positive rate(1-Specificity)')
    plt.ylabel('True positive rate')
    plt.title('ROC curve')
    plt.legend(loc="lower right")
    savefig(outpdf)  
    plt.show()
Exemplo n.º 11
0
    def makeROCPlot(self, filename, title, labels, roc_data):
        y = np.array(self.create_binary_label_matrix(labels))
        n_classes = y.shape[1]
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y[:, i], roc_data[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        fpr["micro"], tpr["micro"], _ = roc_curve(y.ravel(), roc_data.ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

        # Plot ROC curve
        plt.figure()
        plt.plot(fpr["micro"], tpr["micro"],label='Average ROC curve (area = {0:0.2f})'.format(roc_auc["micro"]))
        for i in range(n_classes):
            plt.plot(fpr[i], tpr[i], label='ROC curve of class {0} (area = {1:0.2f})'.format(i+1, roc_auc[i]))

        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(title)
        plt.legend(loc="lower right")
        plt.savefig("figs/"+filename+'.png',bbox_inches='tight')
        #plt.show()
        plt.clf()
        return roc_auc
def calculate_roc(truth, predictions):
    lb_truth = label_binarize(truth.iloc[:, -1].astype(int), np.arange(n_classes))
    lb_prediction = label_binarize(predictions.iloc[:, -1].astype(int), np.arange(n_classes))

    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(letter_set)):
        fpr[i], tpr[i], _ = roc_curve(lb_truth[:, i], lb_prediction[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(lb_truth.ravel(), lb_prediction.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    
    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    
    return fpr, tpr, roc_auc
Exemplo n.º 13
0
    def compute_rocauc(self):
        """

        :return:
        """
        # Binarize the output
        y_test = label_binarize(self.y_test, classes=list(range(self.n_classes)))

        # Compute ROC curve and ROC area for each class
        y_score = self.clf.predict_proba(self.X_test)
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(self.n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])

        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

        self.report["roc_auc"] = dict(
            fpr={str(k): v.tolist() for k, v in fpr.items()},
            tpr={str(k): v.tolist() for k, v in tpr.items()},
            roc_auc={str(k): v.tolist() for k, v in roc_auc.items()}
        )
def classification_metrics (targets, preds, probs=None):

    if probs != None:
        fpr, tpr, thresholds = roc_curve(targets, probs[:, 1], 1)
        roc_auc = auc(fpr, tpr)
    else:
        fpr, tpr, thresholds = roc_curve(targets, preds, 1)
        roc_auc = auc(fpr, tpr)

    cm = confusion_matrix(targets, preds)

    #accuracy
    acc = accuracy_score(targets, preds)

    #recall? True Positive Rate or Sensitivity or Recall
    sens = recall_score(targets, preds)

    #precision
    prec = precision_score(targets, preds)

    #f1-score
    f1 = f1_score(targets, preds, np.unique(targets), 1)

    tnr = 0.0
    #True Negative Rate or Specificity (tn / (tn+fp))
    if len(cm) == 2:
        spec = float(cm[0,0])/(cm[0,0] + cm[0,1])

    return acc, sens, spec, prec, f1, fpr, tpr, roc_auc
Exemplo n.º 15
0
def GetReport(model, PlotROC, X_test, y_test):

    #Results=pd.DataFrame(zip(RelFeats, np.transpose(model.coef_)))
    accur=model.score(X_test, y_test)
    predicted = model.predict(X_test)
    ConfMat=metrics.confusion_matrix(y_test, predicted)
    Rep=metrics.classification_report(y_test, predicted)
    if (y_test[0]=='0b0') | (y_test[0]=='0b1'):
        Bin_to_Int=np.vectorize(int)
        false_positive_rate, true_positive_rate, thresholds = roc_curve(Bin_to_Int(y_test,2), model.predict_proba(X_test)[:,1])
    else:
        false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, model.predict_proba(X_test)[:,1])
    roc_auc = auc(false_positive_rate, true_positive_rate)
    
    print Rep
    print 'Accuracy = '+str(accur)
    print 'AUC = '+str(roc_auc)

    if PlotROC:
        plt.figure(11)
        plt.clf()
        plt.title('Receiver Operating Characteristic')
        plt.plot(false_positive_rate, true_positive_rate, 'b',
        label='AUC = %0.2f'% roc_auc)
        plt.legend(loc='lower right', fontsize=20)
        plt.plot([0,1],[0,1],'r--')
        plt.xlim([-0.05,1.05])
        plt.ylim([-0.05,1.05])
        plt.ylabel('True Positive Rate', fontsize=20)
        plt.xlabel('False Positive Rate', fontsize=20)
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        plt.show()
        
    return accur, ConfMat, Rep, roc_auc
Exemplo n.º 16
0
def CV(clf, X, y, n_folds=10):
    """
    returns gini values and classifier
    """
    from sklearn.cross_validation import StratifiedKFold
    from sklearn.metrics import roc_curve, auc
    from sklearn.cross_validation import train_test_split
    from sklearn.metrics import roc_curve, auc
    import pandas as pd
    
    cv = StratifiedKFold(y, n_folds=n_folds)
    auccka = []
    try:
        for train_ix, test_ix in cv:
            clf.fit(X.ix[train_ix,:], y[train_ix])
            y_pred = clf.predict_proba(X.ix[test_ix,:])[:,1]
            y_true = y[test_ix]
            fpr, tpr, tresholds = roc_curve(y_true, y_pred)
            auccka.append(auc(fpr,tpr))
    except Exception:
        # treba kdyz vyjde log(0) v nejakem foldu
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)
        # uprava: train_test_split vraci ndarray, ja chci ale DataFrame:
        X_train = pd.DataFrame(X_train)
        X_test = pd.DataFrame(X_test)
        X_train.columns = X.columns
        X_test.columns = X.columns
        # konec upravy        
        clf.fit(X_train, y_train)
        y_pred = clf.predict_proba(X_test)[:,1]
        y_true = y_test
        fpr, tpr, tresholds = roc_curve(y_true, y_pred)
        auccka.append(auc(fpr, tpr))
    gini = [2*auc-1 for auc in auccka]
    return gini, clf
def AUC(test_labels, predicted_labels, n_classes):
    y_test = testProbVector(n_classes, test_labels)
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(0,n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:,i], predicted_labels[:,i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), predicted_labels.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # Compute macro-average ROC curve and ROC area

    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    return np.asarray(roc_auc)
Exemplo n.º 18
0
def experiment_anomaly_detection(train, test, comb, num_train, anom_prob, labels):
	phi = calc_feature_vecs(comb.X)
	print phi.size

	# bayes classifier
	(DIMS, N) = phi.size
	w_bayes = co.matrix(1.0, (DIMS, 1))
	pred = w_bayes.trans()*phi[:,num_train:]
	(fpr, tpr, thres) = metric.roc_curve(labels[num_train:], pred.trans())
	bayes_auc = metric.auc(fpr, tpr)

	# train one-class svm
	kern = Kernel.get_kernel(phi[:,0:num_train], phi[:,0:num_train])
	ocsvm = OCSVM(kern, C=1.0/(num_train*anom_prob))
	ocsvm.train_dual()
	kern = Kernel.get_kernel(phi, phi)
	(oc_as, foo) = ocsvm.apply_dual(kern[num_train:,ocsvm.get_support_dual()])
	(fpr, tpr, thres) = metric.roc_curve(labels[num_train:], oc_as)
	base_auc = metric.auc(fpr, tpr)
	if (base_auc<0.5):
	    base_auc = 1.0-base_auc

	# train structured anomaly detection
	#sad = StructuredOCSVM(train, C=1.0/(num_train*anom_prob))
	sad = StructuredOCSVM(train, C=1.0/(num_train*0.5))
	(lsol, lats, thres) = sad.train_dc(max_iter=50)
	(pred_vals, pred_lats) = sad.apply(test)
	(fpr, tpr, thres) = metric.roc_curve(labels[num_train:], pred_vals)
	auc = metric.auc(fpr, tpr)
	if (auc<0.5):
	    auc = 1.0-auc

	return (auc, base_auc, bayes_auc)
Exemplo n.º 19
0
def generate_roc_graph(data=[]):
    """
    generate ROC curve of detection True positive/False negative rate
    """
    from sklearn import metrics

    if not data:
        return

    shiva_score_probs = map(lambda a: a[0], data)
    spamass_score_probs = map(lambda a: a[1], data)
    derived_results = map(lambda a: a[2], data)

    fpr_shiva, tpr_shiva, _ = metrics.roc_curve(derived_results, shiva_score_probs, pos_label=1)
    fpr_spamass, tpr_spamass, _ = metrics.roc_curve(derived_results, spamass_score_probs, pos_label=1)

    roc_auc_shiva = metrics.auc(fpr_shiva, tpr_shiva)
    roc_auc_spamass = metrics.auc(fpr_spamass, tpr_spamass)

    plot.figure()
    plot.plot(fpr_shiva, tpr_shiva, label="ROC curve SHIVA (area = %0.2f)" % roc_auc_shiva)
    plot.plot(fpr_spamass, tpr_spamass, label="ROC curve spamassassin (area = %0.2f)" % roc_auc_spamass)
    plot.plot([0, 1], [0, 1], "k--")
    plot.xlim([0.0, 1.0])
    plot.ylim([0.0, 1.05])
    plot.xlabel("False Positive Rate")
    plot.ylabel("True Positive Rate")
    plot.title("Shiva honeypot classification ROC")
    plot.legend(loc="lower right")
    plot.savefig("../../../web/images/roc_graph.png", bbox_inches="tight")
    plot.close()
Exemplo n.º 20
0
def calc_auc(model, y_test, y_score, auctype = "ROC"):
    y_score = 1 / ( 1 + np.exp(-y_score) ) # sigmoid it!
    n_classes = y_test.shape[1] # 164
    if auctype == "ROC":
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
        return roc_auc["micro"]
    elif auctype == "PR":
        prec = dict()
        rec = dict()
        pr_auc = dict()
        for i in range(n_classes):
            prec[i], rec[i], _ = precision_recall_curve(y_test[:,i], y_score[:,i])
            pr_auc[i] = auc(rec[i], prec[i])
        # Compute micro-average prec-rec curve and prec-rec AUC
        prec["micro"], rec["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel())
        pr_auc["micro"] = auc(rec["micro"], prec["micro"])
        return pr_auc["micro"]
Exemplo n.º 21
0
def roc_curve_metric(targets, predictions, **multi_optional_args):
    """
    For multi class classifiers, two different types of roc curves are made.

    TODO: explain what these are
    """
    assert targets.ndim == predictions.ndim == 2
    assert targets.shape == predictions.shape

    if targets.shape[1] == 1:
        predictions = predictions.flatten()
        targets = targets.flatten()

        roc_for_data = roc_curve(targets, predictions)
    else:
        roc_for_data = {}
        targets = np.argmax(targets, axis=1).flatten()
        for i in range(predictions.shape[1]):
            roc_for_data[i] = {}
            predictions_for_class = predictions[:, i]
            roc_for_data[i]['specific_classification'] = roc_curve(targets, predictions_for_class, pos_label=i)

            predictions_for_class = np.zeros(len(predictions))
            highest_class = np.argmax(predictions, axis=1)
            predictions_for_class[highest_class == i] = predictions[:, highest_class]
            roc_for_data[i]['general_classification'] = roc_curve(targets, predictions_for_class, pos_label=i)

    return [roc_for_data], ['roc_curve']
Exemplo n.º 22
0
def make_roc(name, clf, ytest, xtest, ax=None, labe=5, proba=True, skip=0):
    initial=False
    if not ax:
        ax=plt.gca()
        initial=True
    if proba:
        fpr, tpr, thresholds=roc_curve(ytest, clf.predict_proba(xtest)[:,1])
    else:
        fpr, tpr, thresholds=roc_curve(ytest, clf.decision_function(xtest))
    roc_auc = auc(fpr, tpr)
    if skip:
        l=fpr.shape[0]
        ax.plot(fpr[0:l:skip], tpr[0:l:skip], '.-', alpha=0.3, label='ROC curve for %s (area = %0.2f)' % (name, roc_auc))
    else:
        ax.plot(fpr, tpr, '.-', alpha=0.3, label='ROC curve for %s (area = %0.2f)' % (name, roc_auc))
    label_kwargs = {}
    label_kwargs['bbox'] = dict(
        boxstyle='round,pad=0.3', alpha=0.2,
    )
    for k in range(0, fpr.shape[0],labe):
        #from https://gist.github.com/podshumok/c1d1c9394335d86255b8
        threshold = str(np.round(thresholds[k], 2))
        ax.annotate(threshold, (fpr[k], tpr[k]), **label_kwargs)
    if initial:
        ax.plot([0, 1], [0, 1], 'k--')
        ax.set_xlim([0.0, 1.0])
        ax.set_ylim([0.0, 1.05])
        ax.set_xlabel('False Positive Rate')
        ax.set_ylabel('True Positive Rate')
        ax.set_title('ROC')
    ax.legend(loc="lower right")
    return ax
Exemplo n.º 23
0
def plotROCCurve(y_test,y_score,fileStorePath,fileName):

	#print("Inside Plot ROC curve")
	fpr = dict()
	tpr = dict()
	roc_auc = dict()
	for i in range(1):
		fpr[i], tpr[i], _ = roc_curve(y_test[:], y_score[:])
		roc_auc[i] = auc(fpr[i], tpr[i])

	# Compute micro-average ROC curve and ROC area
	fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
	roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
	plt.figure()
	plt.plot(fpr[0], tpr[0], label='ROC curve (area = %0.2f)' % roc_auc[0])
	plt.plot([0, 1], [0, 1], 'k--')
	plt.xlim([0.0, 1.0])
	plt.ylim([0.0, 1.05])
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	plt.title('Receiver operating characteristic example')
	plt.legend(loc="lower right")
	#plt.show()
	if not os.path.exists(fileStorePath):
		os.makedirs(fileStorePath)
	plt.savefig(fileStorePath+'/'+'ROC'+fileName+'.png')
	plt.clf()#clear the figure for next loop

	
Exemplo n.º 24
0
def write_score(name, gold_labels, pred_scores, classes, average_classes):
    classes, average_classes = np.array(classes), np.array(average_classes)
    gold_scores = LabelBinarizer().fit(classes).transform(gold_labels)
    pred_labels = classes[np.argmax(pred_scores, axis=1)]

    with closing(Tee('{}.txt'.format(name), 'w')):
        precision, recall, fscore, _ = precision_recall_fscore_support(gold_labels, pred_labels, labels=classes)
        for t in zip(classes, precision, recall, fscore):
            print('{}: P={:.2f}, R={:.2f}, F1={:.2f}'.format(*t))
        print('Accuracy: {:.4f}'.format(accuracy_score(gold_labels, pred_labels)))
        print('F1 average: {:.4f}'.format(np.mean(fscore[LabelEncoder().fit(classes).transform(average_classes)])))

    with PdfPages('{}.pdf'.format(name)) as pdf:
        fpr = {}
        tpr = {}
        roc_auc = {}
        for i in range(len(classes)):
            fpr[i], tpr[i], _ = roc_curve(gold_scores[:, i], pred_scores[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        fpr['micro'], tpr['micro'], _ = roc_curve(gold_scores.ravel(), pred_scores.ravel())
        roc_auc['micro'] = auc(fpr['micro'], tpr['micro'])
        plt.figure()
        plt.plot(fpr['micro'], tpr['micro'], label='micro-average (area = {:.2f})'.format(roc_auc['micro']))
        for i in range(len(classes)):
            plt.plot(fpr[i], tpr[i], label='{0} (area = {1:.2f})'.format(i, roc_auc[i]))
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curves')
        plt.legend(loc='lower right')
        pdf.savefig()
Exemplo n.º 25
0
def plotAUC(yhatNN, yhatRF, yhatET, y):
    """
    Plot ROC curve. Compare Neural Net to Random Forests.

    :param yhatNN: Neural Net predictions
    :param yhatRF: Random Forest predictions
    :param yhatET: Extra-Trees predictions
    :param y: target labels

    :return: None
    """
    fprNN, tprNN, thresholdsNN = metrics.roc_curve(y, yhatNN)
    fprRF, tprRF, thresholdsRF = metrics.roc_curve(y, yhatRF)
    fprET, tprET, thresholdsET = metrics.roc_curve(y, yhatET)

    plt.figure()
    plt.plot(fprNN, tprNN, label='Neural Net')
    plt.plot(fprRF, tprRF, label='Random Forest')
    plt.plot(fprET, tprET, label='Extra-Trees')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC')
    plt.legend(loc="lower right")
    plt.savefig('ROC.png')
    plt.close()
Exemplo n.º 26
0
def multiclass_AUC(clf, X, Y):
    # Binarize the output
    X, Y = np.array(X), np.array(Y)
    Y = label_binarize(Y, classes=list(set(Y)))
    n_classes = Y.shape[1]

    # shuffle and split training and test sets
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5,
                                                        random_state=0)
    # Learn to predict each class against the other
    classifier = OneVsRestClassifier(clf)
    Y_score = classifier.fit(X_train, Y_train).predict(X_test)

    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(Y_test[:, i], Y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(Y_test.ravel(), Y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    print "AUC for multiclass {}: {}".format(clf.__class__.__name__, roc_auc["micro"])
Exemplo n.º 27
0
def main():
    '''Sorted data'''
    inputsorted='german-sorted.xlsx'
    datasorted=readxlsx(inputsorted)
    score_sorted=datasorted[0,:]
    act_class_sorted=datasorted[1,:]
        
    '''calculating ROC AUC'''
    fpr_sorted,tpr_sorted,thresholds_sorted=metrics.roc_curve(act_class_sorted,score_sorted)
    aucvalue_sorted=metrics.auc(fpr_sorted,tpr_sorted)
    print 'AUC value of sorted data'
    print aucvalue_sorted
    #print 'Threshold'
    #print thresholds_sorted
    print ''
    
    '''Unsorted data'''
    inputunsorted='german-unsorted.xlsx'
    dataunsorted=readxlsx(inputunsorted)
    score_unsorted=dataunsorted[0,:]
    act_class_unsorted=dataunsorted[1,:]
        
    '''calculating ROC AUC'''
    fpr_unsorted,tpr_unsorted,thresholds_unsorted=metrics.roc_curve(act_class_unsorted,score_unsorted)
    aucvalue_unsorted=metrics.auc(fpr_unsorted,tpr_unsorted)
    print 'AUC value of sorted data'
    print aucvalue_unsorted
Exemplo n.º 28
0
def calculate_draw_roc(classifier, data, features, label, cv_Flod, original_data, original_label):
	mean_tpr = 0.0
	mean_fpr = np.linspace(0, 1, 100)
	all_tpr = []

	my_test = original_data[:3000]
	my_label = original_label[:3000]

	features_importance = dict()

	for i, (train, test) in enumerate(cv_Flod):
	    
	    fitted_classifier = classifier.fit(data[train], label[train])
	    probas_ = fitted_classifier.predict_proba(data[test])
	    if i == 1:
	    	save_result(probas_, "predict_result.csv")
	    	save_result(label[test], "original_result.csv")

	    # Compute ROC curve and area the curve
	    fpr, tpr, thresholds = roc_curve(label[test], probas_[:, 1])
	    mean_tpr += interp(mean_fpr, fpr, tpr)
	    mean_tpr[0] = 0.0
	    roc_auc = auc(fpr, tpr)
	    plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))


	    importances = fitted_classifier.feature_importances_

	    indices = np.argsort(importances)[::-1]

	    print("Feature ranking: ")
	    for f in range(data.shape[1]):
	    	print("%s. %d (%f)" % (features[indices[f]], indices[f], importances[indices[f]]))
	    	features_importance[features[indices[f]]] = importances[indices[f]]

	test_probs = fitted_classifier.predict_proba(my_test)
	test_fpr, test_tpr, test_thresholds = roc_curve(my_label, test_probs[:, 1])
	roc_auc = auc(test_fpr, test_tpr)
	plt.plot(test_fpr, test_tpr, lw=1, label='ROC test (area = %0.2f)' % (roc_auc))



	plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')

	mean_tpr /= len(cv_Flod)
	mean_tpr[-1] = 1.0
	mean_auc = auc(mean_fpr, mean_tpr)
	plt.plot(mean_fpr, mean_tpr, 'k--',
	         label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)

	plt.xlim([-0.05, 1.05])
	plt.ylim([-0.05, 1.05])
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	plt.title('Receiver operating characteristic example')
	plt.legend(loc="lower right")
	plt.savefig("ROC_GB_user_all_solved_lr(0.05).png")

	return features_importance
def train(train_data, test_data, fold):
    x_train = train_data[:, :-1]
    y_train = train_data[:, -1:]

    x_test = test_data[:, :-1]
    y_test = test_data[:, -1:]
    theta_vector = np.zeros(shape=(len(x_train[0]), 1))
    theta_vector = np.matrix(theta_vector)

    # update gradient descent each time

    m = len(x_train[0])
    learning_curve = {}
    learning_errors = {}
    learning_accuracy = {}
    ll_count = 1
    sigmod = lambda z: 1.0 / (1.0 + np.exp(-z))

    while ll_count < 2:
        learning_rate = 10**(-ll_count)
        alpha_by_m = learning_rate / m
        old_theta = theta_vector
        j = {}

        for i in range(0, 1000):
            h_x = np.dot(x_train, old_theta)
            diff_theta = np.dot(x_train.T, (sigmod(h_x) - y_train))
            n_theta = learning_rate * diff_theta
            old_theta = old_theta - n_theta
            #j[iterator] = compute_error(h_x, y_train, y_train, y_train)
        solution_vector = old_theta

        # #calculate mean squared error
        train_error = compute_error(sigmod(h_x), y_train)
        test_error = compute_error((sigmod(x_test.dot(solution_vector))),
                                   y_test)

        tpr, fpr, train_accuracy = compute_accuracy((sigmod(x_train.dot(solution_vector))),
                                          y_train)
        tpr, fpr, test_accuracy = compute_accuracy((sigmod(x_test.dot(solution_vector))),
                                         y_test)

        learning_accuracy[(train_accuracy, test_accuracy)] = ll_count
        learning_errors[(train_error, test_error)] = ll_count
        learning_curve[ll_count] = j

        ll_count += 1
    if fold is 4:
        roc_curve(y_train, (sigmod(x_train.dot(solution_vector))))
    # pl.plot(np.arange(iterator),
    #         learning_curve[learning_erros.get(min(learning_erros.keys()))].values())
    # pl.xlabel('Iterations')
    # pl.ylabel('Cost Function')
    #pl.show()

    print "Best Learning rate is ", 10**(- learning_errors.get(min(learning_errors.keys()))), \
         "with mse values:", min(learning_errors.keys())
    print "Accuracy is ", max(learning_accuracy.keys())
    return min(learning_errors.keys()), max(learning_accuracy.keys())
Exemplo n.º 30
0
 def getROC(clf,probstype):
     if probstype == 1:
         probs = clf.predict_proba(f_test)
         fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
     else:
         probs = clf.decision_function(f_test)
         fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
     return fpr,tpr
Exemplo n.º 31
0
    def explain(self, param, label='', auc_plot=False):
        print('------------ Explanation -------------')
        self._file.write('------------ Explanation -------------\n')
        phi = param[0]
        theta = param[1]
        psi = param[2]
        k = param[3]

        start1 = time()
        ex = Extractor(self._clf, phi, theta, psi)
        ex.extract_forest_paths()

        ex.rule_filter()

        print('max_rule', ex.max_rule, 'max_node', ex.max_node)
        print('min_rule', ex.min_rule, 'min_node', ex.min_node)
        end1 = time()
        print("EX Running time: %s seconds" % (end1 - start1))

        print("original path number: ", ex.n_original_leaves_num)
        print("original scale: ", ex.scale)
        print("path number after rule filter: ", len(ex._forest_values))
        self._file.write('original path number: {}\n'.format(
            ex.n_original_leaves_num))
        self._file.write('original scale: {}\n'.format(ex.scale))
        self._file.write('path number after rule filter: {}\n'.format(
            len(ex._forest_values)))

        start2 = time()
        sat = Z3Process(ex, k)
        sat.leaves_partition()
        if self._maxsat_on is True:
            sat.maxsat()
            print("path number after maxsat: ", sat.n_rules_after_max,
                  " after filter: ", sat.n_rules_after_filter, '\n')
            self._file.write(
                'path number after maxsat: {}\tafter filter: {}\n\nclasses:\t{}\n\n'
                .format(sat.n_rules_after_max, sat.n_rules_after_filter,
                        self._clf.classes_))
        else:
            print('no maxsat')
            self._file.write('/no MAX-SAT\n')
        sat.run_filter()
        end2 = time()

        print("SAT Running time: %s seconds" % (end2 - start2))

        print('classes:', self._clf.classes_)

        start3 = time()
        f = FormulaeEstimator(sat,
                              conjunction=self._conjunction,
                              classes=self._clf.classes_)
        f.get_formulae_text(self._file)
        print('\n------------ Performance -------------')
        self._file.write('\n------------ Performance -------------\n')
        c_ans = self._clf.predict(self._X_test)
        ans = f.classify_samples(self._X_test)
        end3 = time()
        print("ET Running time: %s seconds" % (end3 - start3))

        RF_accuracy = accuracy_score(self._y_test, c_ans)
        EX_accuracy = accuracy_score(self._y_test, ans)
        performance = accuracy_score(c_ans, ans)

        no_ans = 0
        overlap = 0
        for each in f.sat_group:
            if len(each) > 1:
                overlap += 1
            elif len(each) == 0:
                no_ans += 1

        if label == '':  # 计算AUC
            label = self._clf.classes_[0]

        fpr, tpr, thresholds = roc_curve(self._y_test,
                                         self._clf.predict_proba(
                                             self._X_test)[:, 1],
                                         pos_label=label)
        ori_auc = auc(fpr, tpr)

        ex_test = f.classify_samples_values(self._X_test)
        efpr, etpr, ethresholds = roc_curve(self._y_test,
                                            ex_test[:, 1],
                                            pos_label=label)
        ex_auc = auc(efpr, etpr)

        print('sample size:\t', len(self._y_test))
        self._file.write('sample size:\t{}\n'.format(len(self._y_test)))

        print('RF accuracy:\t', RF_accuracy)
        self._file.write('RF accuracy:\t{}\n'.format(RF_accuracy))

        print('RF AUC:\t\t\t', ori_auc)
        self._file.write('RF AUC:\t\t\t{:.2f}\n'.format(ori_auc))

        # print('错误结果覆盖:', f_count)
        print('EX accuracy:\t', EX_accuracy)
        self._file.write('EX accuracy:\t{}\n'.format(EX_accuracy))

        print('EX AUC:\t\t\t', ex_auc)
        self._file.write('EX AUC:\t\t\t{:.2f}\n'.format(ex_auc))

        print('Coverage:\t\t',
              (len(self._y_test) - no_ans) / len(self._y_test))
        self._file.write('Coverage:\t\t{}\n'.format(
            (len(self._y_test) - no_ans) / len(self._y_test)))

        print('Overlap:\t\t', overlap / len(self._y_test))
        self._file.write('Overlap:\t\t{}\n'.format(overlap /
                                                   len(self._y_test)))

        print('*Performance:\t', performance)
        self._file.write('*Performance:\t{}\n'.format(performance))

        if auc_plot is True:
            plt.plot(fpr,
                     tpr,
                     linewidth=2,
                     label="RF ROC curve (area = {:.2f})".format(ori_auc))

            plt.plot(efpr,
                     etpr,
                     linewidth=2,
                     label="Explain ROC curve (area = {:.2f})".format(ex_auc))

            plt.xlabel("false positive rate")

            plt.ylabel("true positive rate")

            plt.ylim(0, 1.05)

            plt.xlim(0, 1.05)

            plt.legend(loc=4)  # 图例的位置

            plt.show()
Exemplo n.º 32
0
print(
    classification_report(y_true=y_test,
                          y_pred=y_pred_rnd,
                          target_names=['normal', 'covid']))

fig1 = plt.figure()
sns.heatmap(data=cm,
            cmap='Blues',
            annot=True,
            annot_kws={'size': 14},
            fmt='d',
            vmin=0,
            vmax=len(y_test) / 2.)
plt.title('annotated heatmap for confusion matrix')
plt.show()
# fig1.savefig('./checkpoints/densenet121/cm_heatmap.png')

fpr, tpr, _ = roc_curve(y_true=y_test, y_score=y_pred, pos_label=None)
roc_auc = auc(x=fpr, y=tpr)
fig2 = plt.figure()
plt.plot(fpr, tpr, 'b', label='AUC = %0.4f' % roc_auc)
plt.title('Receiver Operating Characteristic')
plt.legend()
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
# fig2.savefig('./checkpoints/densenet121/roc.png')
Exemplo n.º 33
0
true_class_rev = 1 - true_class
total_test_images = len(filenames)
pred_prob = model_use.predict_generator(data_test,
                                        steps=total_test_images,
                                        verbose=1)
pred_prob_rev = 1 - pred_prob

# Prediction probability for each image being a dog
results_dict = dict(zip(filenames, pred_prob_rev))

# Confusion matrix
conf_mat = confusion_matrix(true_class, pred_prob > 0.5)
print(conf_mat)

# Get ROC AUC score and arrays for building ROC curve
fpr, tpr, thresholds = roc_curve(true_class, pred_prob)
auc = roc_auc_score(true_class, pred_prob > 0.5)

# ROC curve
plt.figure()
plt.plot(np.linspace(0, 1, num=50), '--', color='gray')
plt.plot(fpr, tpr, '-', color='red')
plt.xlabel('FPR', fontsize=14)
plt.ylabel('TPR', fontsize=14)
plt.show()


# Get a random image in the test data and display it with it's associated
# predicted probability.
def rand_check(index=None):
    # Get random image if no index is supplied
Exemplo n.º 34
0
graphviz.Source(dot_graph).view()
##########################################################################
##########################################################################

# Finally, let’s evaluate the tree’s performance on the test data. The predict() function can be used for
# this purpose. We can then build a confusion matrix

# 86+59/200 = 0.725

##########################################################################
############ Here we construct the ROC curve for the tree ################
##########################################################################
y_score = clf.predict_proba(X_test)

fpr, tpr, _ = roc_curve(y_test, y_score[:, 1])
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr,
         tpr,
         color='orange',
         label='ROC curve (area = {:0.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='blue', linestyle='--')
plt.xlim([-0.05, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve for our Decision Tree')
plt.legend(loc="lower right")
##########################################################################
Exemplo n.º 35
0
            allLTPC += 1
            if true_label == pred_label:
                corrctLTPC += 1
        elif true_label == 1:
            allTPC += 1
            if true_label == pred_label:
                corrctTPC += 1
    acc_lst.append(acc)
    class2_acc_lst.append(
        [corrctLTPC / float(allLTPC), corrctTPC / float(allTPC)])

    # auc roc
    true_class = np.array(test_label_set)  # true_class为数据的真实标签
    pred_scores = np.array([a[0] for a in result1])  # scores为分类其预测的得分
    fpr, tpr, thresholds = metrics.roc_curve(true_class,
                                             pred_scores,
                                             pos_label=0)  # bcc
    AUC = auc(fpr, tpr)
    # tpr fpr
    yuedeng = []
    for i in range(len(fpr)):
        yuedeng.append(tpr[i] - fpr[i])
    yuedeng_index = yuedeng.index(max(yuedeng))
    # print 'the best TPR FPR in subset-%d'%testIndex, tpr[yuedeng_index], fpr[yuedeng_index]

    auc_lst.append(AUC)
    trueAllLst += test_label_set
    scoreAllLst += [a[0] for a in result1]

    true_class = np.array(test_label_set)  # true_class为数据的真实标签
    pred_scores = np.array([a[1] for a in result1])  # scores为分类其预测的得分
Exemplo n.º 36
0
    r'D:\Users\zcguo\PycharmProjects\credit_score\data\test.csv')

test_X = test_data.iloc[:, 2:]
test_y = test_data.iloc[:, 1]

test_X = trans_woe(test_X, x1_name, x1_woe, x1_cut)
test_X = trans_woe(test_X, x2_name, x2_woe, x2_cut)
test_X = trans_woe(test_X, x3_name, x3_woe, x3_cut)
test_X = trans_woe(test_X, x7_name, x7_woe, x7_cut)
test_X = trans_woe(test_X, x9_name, x9_woe, x9_cut)

test_X = test_X.iloc[:, -5:]

# gbdt model roc
X3 = sm.add_constant(test_X)
resuG = gbm.predict(X3)
recall1 = metrics.recall_score(test_y, resuG.round())
acc1 = metrics.accuracy_score(test_y, resuG.round())
print(recall1)
print(acc1)
fpr1, tpr1, threshold1 = metrics.roc_curve(test_y, resuG)
rocauc1 = metrics.auc(fpr1, tpr1)
plt.plot(fpr1, tpr1, 'b', label='AUC = %0.2f' % rocauc1)
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('TPR')
plt.xlabel('FPR')
plt.show()
Exemplo n.º 37
0
def cal_auc_ks_iv(df, targets=[0, 1, 3, 7, 14, 30], text='', max_depth=2, plot=True, precision=3):
    '''
    计算 AUC KS 和 IV的值
    并画出对应的AUC图
    '''
    ks = pd.DataFrame()
    ac = pd.DataFrame()
    iv = pd.DataFrame()

    dn = [f'{n}d' for n in targets]
    cols = set(df.columns) - set(dn)

    for n in targets:
        auc_value = []
        ks_value = []
        iv_value = []

        plt.figure(figsize=(6,4), dpi=100)
        for var in cols:
            y_true = df[df[var].notnull()][f'{n}d']
            y_pred = df[df[var].notnull()][var]

            # 计算各个指标的 fpr tpr 和 thr
            fpr, tpr, thr = roc_curve(y_true, y_pred, pos_label=1)

            # 计算AUC值
            ac_single = auc(fpr, tpr)
            if ac_single < 0.5:
                fpr, tpr, thr = roc_curve(y_true, -y_pred, pos_label=1)
                ac_single = auc(fpr, tpr)
            auc_value.append(ac_single)

            # 计算K-S值
            ks_single = (tpr - fpr).max()
            ks_value.append(ks_single)

            # 计算IV值
            iv_single = cal_woe_iv(y_pred, y_true, max_depth=max_depth)[1]
            iv_value.append(iv_single)

            if plot:
                # ROC Cureve
                plt.plot(fpr, tpr, lw=1, label=f'{var}(auc=' + str(round(ac_single, precision)) + ')')
                plt.plot(fpr, tpr, lw=1)

                # Labels
                plt.grid()
                plt.plot([0,1], [0,1], linestyle='--', color=(0.6, 0.6, 0.6))
                plt.plot([0, 0, 1], [0, 1, 1], lw=1, linestyle=':', color='black')
                plt.xlabel('false positive rate')
                plt.ylabel('true positive rate')
                plt.title(f'{text}ROC for {n}d')
                plt.legend(loc='best')

        auc_part = pd.DataFrame(auc_value, columns=[f'{n}d'], index=cols)
        ac = pd.concat([ac, auc_part], axis=1)

        ks_part  = pd.DataFrame(ks_value, columns=[f'{n}d'], index=cols)
        ks = pd.concat([ks, ks_part], axis=1)

        iv_part  = pd.DataFrame(iv_value, columns=[f'{n}d'], index=cols)
        iv = pd.concat([iv, iv_part], axis=1)

    iv = np.round(iv, precision)
    ac = np.round(ac, precision)
    ks = np.round(ks, precision)
    return ac, ks, iv
Exemplo n.º 38
0
min_samples_leaf=1, ##叶节点所需的最小样本数  如果是浮点数代表是百分比
max_features=None, ##在寻找最佳分割点要考虑的特征数量auto全选/sqrt开方/log2对数/None全选/int自定义几个/float百分比
max_leaf_nodes=None, ##叶节点的数量 None不限数量
min_impurity_decrease=1e-7, ##停止分裂叶子节点的阈值
verbose=0,  ##打印输出 大于1打印每棵树的进度和性能
warm_start=False, ##True在前面基础上增量训练(重设参数减少训练次数) False默认擦除重新训练
random_state=0  ##随机种子-方便重现
)##多类别回归建议使用随机森林

model.fit(X_train,y_train)
y_pred=model.predict(X_test)


grd_enc = OneHotEncoder()
grd_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
model.fit(X_train, y_train)
grd_enc.fit(model.apply(X_train)[:, :, 0])
grd_lm.fit(grd_enc.transform(model.apply(X_test)[:, :, 0]), y_train_lr)

y_pred_grd_lm = grd_lm.predict_proba(
    grd_enc.transform(model.apply(X_test)[:, :, 0]))[:, 1]
fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred_grd_lm)



# submission["Survived"]=y_pred
# submission.to_csv(pre_path+'GBDT_LR/GradientBoostingClassifier.csv',index=None)
# accuracy_score=metrics.accuracy_score(y_pred,y_test)
# print("GradientBoostingClassifier : ",accuracy_score)

print(submission.head(100))
Exemplo n.º 39
0
    if m_el_isprompt == 0:
        histo_tmva_bkg.Fill(bdtOutput)
    elif m_el_isprompt == 1:
        histo_tmva_sig.Fill(bdtOutput)
    else:
        print "Warning: m_mu_isprompt is not 0 or 1!!!"

file.Close()

X_test = np.array(_X_test)
y_test = np.array(_y_test)

# sklearn tpr and tpr
sk_y_predicted = bdt.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, sk_y_predicted)

sig_eff = array.array('f', [rate for rate in tpr])
bkg_rej = array.array('f', [(1 - rate) for rate in fpr])

# roc_curve_sk() - skTMVA version of roc_curve
from mva_tools.build_roc_simple import roc_curve_sk
fpr_comp, tpr_comp, _ = roc_curve_sk(y_test, sk_y_predicted)

sig_eff_comp = array.array('f', [rate for rate in tpr_comp])
bkg_rej_comp = array.array('f', [(1 - rate) for rate in fpr_comp])

# Stack for keeping plots
plots = []

# Getting ROC-curve for skTMVA
Exemplo n.º 40
0
def train_5_cross(df_pre, X,y, X_test_v1,y_test_v1, thresholds=0.45, id_1='id', csv_name=0):
    """
    功能: 五折训练并输出名单
    why: 5折一般是效果比较稳定的,用于线下做的。
    X: 训练数据X(无标签/df型)
    y: 训练数据y(标签/df型)
    X_test_v1: 预测数据X(无标签/df型)
    y_test_v1: 预测数据y(无标签/df型)
    thresholds: 阈值选择,默认0.45高精确率
    csv_name: 保存csv的名称,默认不保存
    returen:
        客户名单及情况
    """
    vali_auc_num=0  # 验证集AUC
    vali_recall_num=0  # 验证集召回率
    vali_precision_num=0  # 验证集精确率
    test_auc_num=0  # 预测集AUC
    test_recall_num=0  # 预测集召回率
    test_precision_num=0  # 预测集精确率
    y_pred_input = np.zeros(len(X_test_v1))  # 相应大小的零矩阵
    print("=============开始训练================")
    folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=1234)  # 分层采样, n_splits为几折
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(X, y)):
        print("第 {} 次训练...".format(fold_+1))
        train_x, trai_y = X.loc[trn_idx], y.loc[trn_idx]
        vali_x, vali_y = X.loc[val_idx], y.loc[val_idx]
        
        # 以下为调过参的lgb模型
        clf = lgb.LGBMClassifier(max_depth=20, min_data_in_bin=5, max_bin=200,
                                min_child_samples=90, num_leaves=20, n_estimators=20000,
                                objective='binary', boosting_type='gbdt', learning_rate=0.02,
                                lambda_l2=5)
        clf.fit(train_x, trai_y, eval_set=[(train_x, trai_y), (vali_x, vali_y)], verbose=0,
               early_stopping_rounds=100, eval_metric='f1')
        
        # 不懂的去GitHub看搜LightGBM的参数解释
        
        # ===============验证集AUC操作===================
        y_prb = clf.predict_proba(vali_x)[:,1]  # 获取预测概率
        # fpr:在实际为正的样本中,被正确判断为正的比例。tpr:在实际为负的样本中,被正确判断为负的比例。thres为阈值
        fpr, tpr, thres = roc_curve(vali_y, y_prb)
        vali_roc_auc = auc(fpr, tpr)  # 获取验证集auc
        vali_auc_num += vali_roc_auc  # 将本次auc加入总值里
        print("vali auc = {0:.4}".format(vali_roc_auc))  # 本次auc的值
        # ===============预测集AUC操作===================
        y_prb_test = clf.predict_proba(X_test_v1)[:,1]  # 获取预测概率
        fpr, tpr, thres = roc_curve(y_test_v1, y_prb_test)
        test_roc_auc = auc(fpr, tpr)
        test_auc_num += test_roc_auc
        print("test auc = {0:.4}".format(test_roc_auc))
        
        # ===============验证metric操作===================
        y_pre_proba = clf.predict_proba(vali_x.values)
        y_predictions = y_pre_proba[:, 1]>thresholds  # 取阈值多少以上的为True
        cnf_matrix = confusion_matrix(vali_y, y_predictions)  # 建立矩阵
        np.set_printoptions(precision=2)  # 控制在两位数
        vali_recall = '{0:.3f}'.format(cnf_matrix[1,1]/(cnf_matrix[1,0]+cnf_matrix[1,1]))  # 召回率
        vali_precision = '{0:.3f}'.format(cnf_matrix[1,1]/(cnf_matrix[0,1]+cnf_matrix[1,1]))  # 精确率
        print("vali_metric: ", vali_recall, vali_precision)
        vali_recall_num += float(vali_recall)  # 将本次召回率加入总值里
        vali_precision_num += float(vali_precision)  # 将本次精确率加入总值里
        # ===============预测metric操作===================
        y_pre_proba_test = clf.predict_proba(X_test_v1.values)
        y_predictions_test = y_pre_proba_test[:, 1]>thresholds  # 取阈值多少以上的为True
        cnf_matrix_test = confusion_matrix(y_test_v1, y_predictions_test)  # 建立矩阵
        np.set_printoptions(precision=2)  # 控制在两位数
        test_recall = '{0:.3f}'.format(cnf_matrix_test[1,1]/(cnf_matrix_test[1,0]+cnf_matrix_test[1,1]))  # 召回率
        test_precision = '{0:.3f}'.format(cnf_matrix_test[1,1]/(cnf_matrix_test[0,1]+cnf_matrix_test[1,1]))  # 精确率
        print("test_metric: ", test_recall, test_precision)
        test_recall_num += float(test_recall)  # 将本次召回率加入总值里
        test_precision_num += float(test_precision)  # 将本次精确率加入总值里
        y_pred_input += y_pre_proba_test[:, 1]  # 将每次的预测的结果写入数组中
        
    print("5折泛化,验证集AUC:{0:.3f}".format(vali_auc_num/5))  # 前面是做了5次相加,所以这次要除以5
    print("5折泛化,预测集AUC:{0:.3f}".format(test_auc_num/5))
    
    print("5折泛化,验证集recall:{0:.3f}".format(vali_recall_num/5))
    print("5折泛化,验证集precision:{0:.3f}".format(vali_recall_num/5))
    
    print("5折泛化,预测集recall:{0:.3f}".format(test_recall_num/5))
    print("5折泛化,预测集precision:{0:.3f}".format(test_recall_num/5))
    
    print("================开始输出名单==================")
    y_pred_input_end = y_pred_input / 5  # 前面是做了5次相加,所以这次要除以5
    y_pred_input_precision = y_pred_input_end > thresholds  # 获取高精确率的标签
    submission = pd.DataFrame({"id": df_pre[id_1],
                              "概率": y_pred_input_end,
                              "高精确": y_pred_input_precision})
    if csv_name != 0:
        submission.to_csv("%s预测名单.csv" % csv_name, index=False)  # 保存
    print("================输出名单名单==================")
    print(submission.head(5))
def simple_CV_evaluation(model, X, y, k, random_state):

    my_pred = []
    my_true = []

    auPRC_list = []
    auROC_list = []
    auROC_x_list = []
    auROC_y_list = []
    auPRC_x_list = []
    auPRC_y_list = []
    for train_index, test_index in random_sample_balance(y, random_state):
        # print ("train size: %s test size: %s"%(len(train_index),len(test_index)))
        # print ("train_index:",np.random.choice(train_index,3))
        # print ("test_index:",np.random.choice(test_index,3))
        # print ("total",len(train_index)+len(test_index))
        X_train, X_test = X.loc[train_index], X.loc[test_index]
        y_train, y_test = y.loc[train_index], y.loc[test_index]

        if "RF" in k:
            current_model = dp(model)
            current_model.fit(X_train, y_train)
            pred_y = current_model.predict_proba(X_test)
            pred_y = [x[1] for x in pred_y]
        elif k == "LS-GKM":

            pos_train_file, neg_train_file, pos_test_file, neg_test_file, addon_string = get_fasta_file(
                X_train, y_train, X_test, y_test)

            pred_y = gkm_SVM_fit_transform(pos_train_file, neg_train_file,
                                           pos_test_file, neg_test_file)
            pred_y = pred_y.loc[y_test.index.tolist()]['pred'].tolist()
            os.system("rm %s*" % (addon_string))

        else:

            pred_y = X_test.tolist()

        y_test = y_test.tolist()
        my_pred += pred_y
        my_true += y_test
        try:
            auROC = roc_auc_score(y_test, pred_y)
            auPRC = average_precision_score(y_test, pred_y)
            # print ("model %s auPRC: %s. auROC: %s"%(k,auPRC,auROC))
            auPRC_list.append(auPRC)
            auROC_list.append(auROC)
            x_predict, y_predict, _ = roc_curve(y_test, pred_y)
            x_predict, y_predict = piecewise_roc(x_predict, y_predict)

            auROC_x_list += list(x_predict)
            auROC_y_list += list(y_predict)
            y_predict, x_predict, _ = precision_recall_curve(y_test, pred_y)
            x_predict, y_predict = piecewise_prc(x_predict, y_predict)
            auPRC_x_list += list(x_predict)
            auPRC_y_list += list(y_predict)
        except:
            print("y_test pos :", y_test[y_test == 1].shape)
            print("y_test neg :", y_test[y_test == 0].shape)
            print("y_train pos :", y_train[y_train == 1].shape)
            print("y_train neg :", y_train[y_train == 0].shape)

            pass

    df = pd.DataFrame()

    df['true'] = my_true
    df['pred'] = my_pred
    df['label'] = k
    return df, auROC_list, auPRC_list, [auROC_x_list, auROC_y_list
                                        ], [auPRC_x_list, auPRC_y_list]
Exemplo n.º 42
0
def metrics(X, Y, A, B, N):
    incorrect = 0
    true_pos = 0
    false_pos = 0
    true_neg = 0
    false_neg = 0

    y_true = []
    y_pred = []

    i = 0
    for x in X:
        prediction = np.argmax(stable_softmax(x, A, B))
        true_label = np.argmax(Y[i])

        y_true.append(true_label)
        y_pred.append(prediction)

        if prediction != true_label:
            incorrect += 1

        if prediction == 1 and true_label == 1:
            true_pos += 1

        if prediction == 1 and true_label == 0:
            false_pos += 1

        if prediction == 0 and true_label == 0:
            true_neg += 1

        if prediction == 0 and true_label == 1:
            false_neg += 1

        i += 1

    print("confusion matrix: ")
    print("[ ", true_neg, false_pos, " ]")
    print("[ ", false_neg, true_pos, " ]")

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Compute fpr, tpr, thresholds and roc auc
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    roc_auc = auc(fpr, tpr)

    print("AUC score: ", roc_auc)

    if true_pos == 0 and false_pos == 0:
        print("WARNING::True pos and False pos both zero")
        precision = true_pos / 0.000001
        recall = true_pos / 0.000001
        F1 = 2 * ((precision * recall) / (precision + recall))
        classification_error = incorrect / N
    else:
        precision = true_pos / (true_pos + false_pos)  # true pos rate (TRP)
        recall = true_pos / (true_pos + false_neg)  #
        F1 = 2 * ((precision * recall) / (precision + recall))
        classification_error = incorrect / N

    print()

    return classification_error, precision, recall, F1, roc_auc, fpr, tpr
Exemplo n.º 43
0
for bestK in lbestK:


   # call([mathematica, '-script', str(predictPhenotype), str(dirShared),str(bestK) ]) 
    
       
    fphenotypes=matchNamePattern(patternPhenotypes)
    dfphenotypes=pd.read_csv(fphenotypes, skiprows=1, header=None)
    dfpredicted=pd.read_csv(fpredicted, header=None)
    
    
    
    roc_auc=metrics.roc_auc_score(dfphenotypes, dfpredicted)

    fpr, tpr, thresholds=metrics.roc_curve(dfphenotypes, dfpredicted)
    
    
    
    
    # Plot ROC curve
    plt.plot(fpr, tpr, label='ROC curve (area = %0.3f)' % roc_auc)
    plt.plot([0, 1], [0, 1], 'k--')  # random predictions curve
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0,     1.0])
    plt.xlabel('False Positive Rate or (1 - Specifity)')
    plt.ylabel('True Positive Rate or (Sensitivity)')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")
    print("k: ", bestK) 
    print("auc =", roc_auc)
         label=u'预测值,$R^2$=%.3f' % lr2.score(X1_train, Y1_train))
plt.legend(loc='upper left')
plt.xlabel(u'数据编号', fontsize=18)
plt.ylabel(u'葡萄酒质量', fontsize=18)
plt.title(u'葡萄酒质量预测统计(降维处理)', fontsize=20)
plt.show()

### 从auc角度看效果===>效果不错
from sklearn.preprocessing import label_binarize
from sklearn import metrics

y_test_hot = label_binarize(Y_test, classes=(3, 4, 5, 6, 7, 8, 9)).ravel()

### 计算原始数据模型
## 得到预测的损失值
lr_y_score = lr.decision_function(X_test).ravel()
## 计算roc的值
lr_fpr, lr_tpr, lr_threasholds = metrics.roc_curve(y_test_hot, lr_y_score)
## 计算auc的值
lr_auc = metrics.auc(lr_fpr, lr_tpr)

## 计算降维后的数据模型
lr2_y_score = lr2.decision_function(X1_test).ravel()
## 计算roc的值
lr2_fpr, lr2_tpr, lr2_threasholds = metrics.roc_curve(y_test_hot, lr2_y_score)
## 计算auc的值
lr2_auc = metrics.auc(lr2_fpr, lr2_tpr)

print("原始数据AUC值:", lr_auc)
print("降维数据AUC值:", lr2_auc)
Exemplo n.º 45
0
# plt.show()
# np.argmax() : 최댓값의 첫번째 인덱스 반환
# threshold_90_precision = thresholds[np.argmax(precisions >= 0.90)]
# 훈련 세트에 대한 예측 만듬
# y_train_pred_90 = (y_scores >= threshold_90_precision)

# 0.9000345901072293
# 0.4799852425751706
# print(precision_score(y_train_5, y_train_pred_90))
# print(recall_score(y_train_5, y_train_pred_90))

# ROC 곡선

# fpr: 거짓 양성 비율 / tpr: 진짜 양성 비율

fpr, tpr, thresholds = roc_curve(y_train_5, y_scores)


def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')


# ROC곡선: 민감도(재현율)에 대한 1-특이도 그래프
# plot_roc_curve(fpr, tpr)
# plt.show()
# 0.9604938554008616
# print(roc_auc_score(y_train_5, y_scores))

forest_clf = RandomForestClassifier(random_state=42)
y_probas_forest = cross_val_predict(forest_clf,
#%%
#X_train : original ,
prediction_tn, non_need = ml_algorithms(X_train, y_train, X_test, y_test)
print(y_test.value_counts() / y_test.shape[0])

#data: embeddings

#ml_algorithms(data,y_train,data_test,y_test)
#X_train2: concatnate = original + embeddings'''
prediction_tp, prediction_tpol_prob = ml_algorithms(X_train2, y_train, X_test2,
                                                    y_test)
#%%
from sklearn.metrics import roc_curve, auc

fpr_pol, tpr_pol, _ = roc_curve((y_test == True).apply(int),
                                prediction_tpol_prob[:, 1])

fpr, tpr, _ = roc_curve((y_test == True).apply(int), prediction_tp[:, 1])
fprn, tprn, _ = roc_curve((y_test == True).apply(int), prediction_tn[:, 1])

print('AUC for Node2Vec Logistic + Poly features + Normal Features : ',
      auc(fpr_pol, tpr_pol))
print('AUC for Node2Vec Logistic + Linear Features + Normal Features : ',
      auc(fpr, tpr))
print('AUC for Normal Features LogisticNormal Features Logistic : ',
      auc(fprn, tprn))

plt.plot(fpr_pol,
         tpr_pol,
         'g',
         label='Node2Vec Logistic + Poly features + Normal Features')
Exemplo n.º 47
0
    def evaluate(self):
        print("Evaluating")
        na = ' '

        for task in self._tasks:
            print(task)
            for modelIndex, currModel in enumerate(self.model[task]):
                if not os.path.isfile(
                        self._calculateFileEvaluation(
                            task, modelIndex)) or not os.path.isfile(
                                self._calculateFilePrediction(
                                    task, modelIndex)) or not os.path.isfile(
                                        self._calculateFileTable(
                                            task, modelIndex)):
                    table = [[
                        "task", "average", "MAPs", "MAPc", "accur.", "kappa",
                        "prec.", "recall", "f1score"
                    ]]
                    table.append([" ", " ", " ", " ", " ", " ", " ", " "])
                    prediction = {}
                    yp = currModel.predict_proba(self.XTest[task])
                    yt = self.yTest[task]
                    prediction['yp'] = yp
                    prediction['yt'] = yt

                    ytn = self.lb[task].inverse_transform(yt)
                    yc = np.zeros(yt.shape, np.int)
                    for i, p in enumerate(yp):
                        yc[i][np.argmax(p)] = 1
                    ycn = self.lb[task].inverse_transform(yc)

                    metrics = {}
                    metrics['MAPs'] = MAPScorer().samplesScore(yt, yp)
                    metrics['MAPc'] = MAPScorer().classesScore(yt, yp)
                    metrics['accuracy'] = accuracy_score(yt, yc)
                    metrics['kappa'] = cohen_kappa_score(ytn, ycn)

                    metrics['precision'] = {}
                    metrics['recall'] = {}
                    metrics['f1score'] = {}

                    table.append([
                        task, na, "{:.3f}".format(metrics['MAPs']),
                        "{:.3f}".format(metrics['MAPc']),
                        "{:.3f}".format(metrics['accuracy']),
                        "{:.3f}".format(metrics['kappa']), na, na, na
                    ])
                    for avg in ['micro', 'macro', 'weighted']:
                        metrics['precision'][avg], metrics['recall'][
                            avg], metrics['f1score'][
                                avg], _ = precision_recall_fscore_support(
                                    yt, yc, average=avg)
                        table.append([
                            task, avg, na, na, na, na,
                            "{:.3f}".format(metrics['precision'][avg]),
                            "{:.3f}".format(metrics['recall'][avg]),
                            "{:.3f}".format(metrics['f1score'][avg])
                        ])

                    metrics['pr-curve'] = {}
                    metrics['pr-curve']['x'], metrics['pr-curve'][
                        'y'], metrics['pr-curve'][
                            'auc'] = self._calculateMicroMacroCurve(
                                lambda y, s: (lambda t: (t[1], t[0]))
                                (precision_recall_curve(y, s)), yt, yp)

                    metrics['roc-curve'] = {}
                    metrics['roc-curve']['x'], metrics['roc-curve'][
                        'y'], metrics['roc-curve'][
                            'auc'] = self._calculateMicroMacroCurve(
                                lambda y, s: (lambda t: (t[0], t[1]))
                                (roc_curve(y, s)), yt, yp)

                    pickle.dump(
                        metrics,
                        open(self._calculateFileEvaluation(task, modelIndex),
                             "wb"))
                    pickle.dump(
                        prediction,
                        open(self._calculateFilePrediction(task, modelIndex),
                             "wb"))
                    tableString = tabulate(table)
                    print(tableString)
                    with open(self._calculateFileTable(task, modelIndex),
                              "w") as fid:
                        fid.write(tableString + "\n")
Exemplo n.º 48
0
tree_small = rf.estimators_[5]
export_graphviz(tree_small,
                out_file='rfsampletree.dot',
                feature_names=feature_list,
                rounded=True,
                precision=1)
(graph, ) = pydot.graph_from_dot_file('rfsampletree.dot')
graph.write_png('rfsampletree.png')

# Making ROC curve
from sklearn.metrics import roc_curve, auc

Y_score = rf.predict_proba(test_features)[:, 1]
fpr = dict()
tpr = dict()
fpr, tpr, _ = roc_curve(test_labels, Y_score)
roc_auc = dict()
roc_auc = auc(fpr, tpr)

# make the plot
plt.figure(figsize=(10, 10))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([-0.05, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.grid(True)
plt.plot(fpr, tpr, label='AUC = {0}'.format(roc_auc))
plt.legend(loc="lower right", shadow=True, fancybox=True)
plt.show()
Exemplo n.º 49
0
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(x_train,
                    y_train,
                    batch_size=40,
                    epochs=epochs,
                    validation_split=0.25,
                    verbose=1,
                    callbacks=[tensorboard])

# Prediction and ROC/ AUC curve plotting
y_pred = model.predict(x_test)
fpr_keras, tpr_keras, thresholds_keras = roc_curve(np.ravel(y_test),
                                                   np.ravel(y_pred))
auc_keras = auc(fpr_keras, tpr_keras)

plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr_keras, tpr_keras, label='Keras (area = {:.3f})'.format(auc_keras))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()

test_loss, test_acc = model.evaluate(x_test, y_test, batch_size=batch_size)

model.save("CNN.h5")
Exemplo n.º 50
0
#threshold curve
fig = plt.figure(figsize=(10, 7))
fig.subplots_adjust(hspace=0.35)
fig.add_subplot(221)
plt.plot(thresholds, recalls, 'r-', label='recall')
plt.plot(thresholds, precisions, 'b-', label='precision')
plt.xlabel("z (score)")
plt.legend()

#PR-curve
ppred = cross_val_predict(pl, X, y, cv=5, method="predict_proba")[:, 1]
precisions, recalls, thresholds = precision_recall_curve(y, ppred)
precisions, recalls = (a[:-1] for a in (precisions, recalls))

fig.add_subplot(222)
plt.plot(precisions, recalls, label="PR-curve")
plt.xlabel("precision")
plt.ylabel("recall")
plt.legend()

#ROC-curve
FPR, TPR, thresholds = roc_curve(y, ppred)
AUC = roc_auc_score(y, ppred)
fig.add_subplot(223)
plt.plot(FPR, TPR, label="ROC-curve")
plt.xlabel("False Positives Rate")
plt.ylabel("True Positives Rate")
plt.text(0.2, 0.7, f"AUC = {AUC.round(2)}")
plt.plot([0, 1], [0, 1], '--', color='gray')
plt.legend()
Exemplo n.º 51
0
def mc_cv(model, xFeat, y, testSize, s):
    """
    Evaluate the model using s samples from the
    Monte Carlo cross validation approach where
    for each sample you split xFeat into
    random train and test based on the testSize.
    Returns the model performance on the training and
    test datasets.

    Parameters
    ----------
    model : sktree.DecisionTreeClassifier
        Decision tree model
    xFeat : nd-array with shape n x d
        Features of the dataset 
    y : 1-array with shape n x 1
        Labels of the dataset
    testSize : float
        Portion of the dataset to serve as a holdout. 

    Returns
    -------
    trainAuc : float
        Average AUC of the model on the training dataset
    testAuc : float
        Average AUC of the model on the validation dataset
    timeElapsed: float
        Time it took to run this function
    """
    trainAuc = 0
    testAuc = 0
    timeElapsed = 0
    # TODO FILL IN

    timeElapsed = time.time()
    xFeat = np.asarray(xFeat)
    y = np.asarray(y)

    # Repeats the same process but uses the random shuffle
    ss = ShuffleSplit(n_splits=s, test_size=testSize, random_state=0)

    for train_index, test_index in ss.split(xFeat):
        xTrain, xTest = xFeat[train_index], xFeat[test_index]
        yTrain, yTest = y[train_index], y[test_index]

        trainModel = model.fit(xTrain, yTrain)

        predictTrain = trainModel.predict_proba(xTrain)
        predictTrain = predictTrain[:, 1]

        fpr1, tpr1, thresholds = metrics.roc_curve(yTrain, predictTrain)
        trainAuc += metrics.auc(fpr1, tpr1)

        predictTest = trainModel.predict_proba(xTest)

        fpr1, tpr1, thresholds = metrics.roc_curve(yTest, predictTest[:, 1])
        testAuc += metrics.auc(fpr1, tpr1)

    trainAuc /= ss.get_n_splits(xFeat)
    testAuc /= ss.get_n_splits(xFeat)

    timeElapsed = time.time() - timeElapsed

    return trainAuc, testAuc, timeElapsed
Exemplo n.º 52
0
def kfold_cv(model, xFeat, y, k):
    """
    Split xFeat into k different groups, and then use each of the
    k-folds as a validation set, with the model fitting on the remaining
    k-1 folds. Return the model performance on the training and
    validation (test) set. 


    Parameters
    ----------
    model : sktree.DecisionTreeClassifier
        Decision tree model
    xFeat : nd-array with shape n x d
        Features of the dataset 
    y : 1-array with shape n x 1
        Labels of the dataset
    k : int
        Number of folds or groups (approximately equal size)

    Returns
    -------
    trainAuc : float
        Average AUC of the model on the training dataset
    testAuc : float
        Average AUC of the model on the validation dataset
    timeElapsed: float
        Time it took to run this function
    """
    trainAuc = 0
    testAuc = 0
    timeElapsed = 0
    # TODO FILL IN
    timeElapsed = time.time()
    xFeat = np.asarray(xFeat)
    y = np.asarray(y)

    kf = KFold(n_splits=k)
    kf.get_n_splits(xFeat)

    # Loops through all splits and repeates the process for above
    for train_index, test_index in kf.split(xFeat):
        xTrain, xTest = xFeat[train_index], xFeat[test_index]
        yTrain, yTest = y[train_index], y[test_index]

        trainModel = model.fit(xTrain, yTrain)

        predictTrain = trainModel.predict_proba(xTrain)
        predictTrain = predictTrain[:, 1]

        fpr1, tpr1, thresholds = metrics.roc_curve(yTrain, predictTrain)
        trainAuc += metrics.auc(fpr1, tpr1)

        predictTest = trainModel.predict_proba(xTest)
        predictTest = predictTest[:, 1]

        fpr1, tpr1, thresholds = metrics.roc_curve(yTest, predictTest)
        testAuc += metrics.auc(fpr1, tpr1)

    trainAuc /= kf.get_n_splits(xFeat)
    testAuc /= kf.get_n_splits(xFeat)

    timeElapsed = time.time() - timeElapsed

    return trainAuc, testAuc, timeElapsed
dt_t = dt.iloc[:, [0]]
dt_p = dt.iloc[:, [1]]

knn_t = knn.iloc[:, [0]]
knn_p = knn.iloc[:, [1]]

lr_t = lr.iloc[:, [0]]
lr_p = lr.iloc[:, [1]]

rf_t = rf.iloc[:, [0]]
rf_p = rf.iloc[:, [1]]

import sklearn.metrics as metrics
# calculate the fpr and tpr for all thresholds of the classification
fpr1, tpr1, threshold1 = metrics.roc_curve(ann_t, ann_p)
roc_auc1 = metrics.auc(fpr1, tpr1)
fpr2, tpr2, threshold2 = metrics.roc_curve(dt_t, dt_p)
roc_auc2 = metrics.auc(fpr2, tpr2)
fpr3, tpr3, threshold1 = metrics.roc_curve(knn_t, knn_p)
roc_auc3 = metrics.auc(fpr3, tpr3)
fpr4, tpr4, threshold4 = metrics.roc_curve(lr_t, lr_p)
roc_auc4 = metrics.auc(fpr4, tpr4)
fpr5, tpr5, threshold5 = metrics.roc_curve(rf_t, rf_p)
roc_auc5 = metrics.auc(fpr5, tpr5)

# method I: plt
plt.title('Receiver Operating Characteristic')
plt.plot(fpr1, tpr1, 'r', label = 'ANN(AUC = %0.2f)' % roc_auc1)
plt.plot(fpr2, tpr2, 'g', label = 'DT(AUC = %0.2f)' % roc_auc2)
plt.plot(fpr3, tpr3, 'y', label = 'KNN(AUC = %0.2f)' % roc_auc3)
def avaliacao_PerformanceC(df_train_class, predicted_train,
                           predicted_prob_train, df_test_class, predicted_test,
                           predicted_prob_test, roc_y_n):
    ### Confusion Matrix
    confusion_matrix_train = confusion_matrix(df_train_class, predicted_train)
    confusion_matrix_test = confusion_matrix(df_test_class, predicted_test)
    print("\nTraining Confusion Matrix:\n ", confusion_matrix_train)
    print("\nTesting Confusion Matrix:\n ", confusion_matrix_test)

    ### Accuracy score
    score_train = accuracy_score(df_train_class, predicted_train)
    score_test = accuracy_score(df_test_class, predicted_test)
    print("\nTraining Accuracy Score: ", score_train)
    print("\nTesting Accuracy Score: ", score_test)

    ### Precision, Recall
    precision_train = precision_score(df_train_class, predicted_train)
    precision_test = precision_score(df_test_class, predicted_test)
    print("\nTraining Precision: ", precision_train)
    print("\nTesting Precision: ", precision_test)

    recall_train = recall_score(df_train_class, predicted_train)
    recall_test = recall_score(df_test_class, predicted_test)
    print("\nTraining Recall: ", recall_train)
    print("\nTesting Recall: ", recall_test)

    ### Classification Report
    print("\nTrain Classification Report: \n",
          classification_report(df_train_class, predicted_train))
    print("\nTest Classification Report: \n",
          classification_report(df_test_class, predicted_test))

    ### F1 Score
    f1score_train = f1_score(df_train_class,
                             predicted_train)  #, average='weighted')
    f1score_test = f1_score(df_test_class,
                            predicted_test)  #, average='weighted')
    print("\nTraining F1score: ", f1score_train)
    print("\nTesting F1score: ", f1score_test)

    f1score_train = f1_score(df_train_class,
                             predicted_train,
                             average='weighted')
    f1score_test = f1_score(df_test_class, predicted_test, average='weighted')
    print("\nTraining Weigted F1score: ", f1score_train)
    print("\nTesting Weighted F1score: ", f1score_test)

    ### ROC-AUC
    if roc_y_n == 'y':
        fpr, tpr, threshold = roc_curve(df_train_class,
                                        predicted_prob_train[:, 1])
        roc_auc_train = auc(fpr, tpr)
        print("\nTraining AUC for ROC: ", roc_auc_train)
        plt.figure()
        plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc_train)
        plt.plot([0, 1], [0, 1], 'r--')
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        plt.legend(loc='lower right')
        plt.title('Training - Receiver Operating Characteristic')

        fpr, tpr, threshold = roc_curve(df_test_class, predicted_prob_test[:,
                                                                           1])
        roc_auc_test = auc(fpr, tpr)
        print("\nTesting AUC for ROC: ", roc_auc_test)
        plt.figure()
        plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc_test)
        plt.plot([0, 1], [0, 1], 'r--')
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        plt.legend(loc='lower right')
        plt.title('Testing - Receiver Operating Characteristic')
data_x['EMERGENCY'] = lbl.fit_transform(data_x['EMERGENCY'].astype(str))#将提示的包含错误数据类型这一列进行转换
data_y = data.iloc[:,[0]]

# 准备一个train/test来构建模型。
x_train, x_test, y_train, y_test = train_test_split(data_x,
                                                    data_y, 
                                                    test_size=0.2, 
                                                    random_state=52,
                                                    )

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

feature_ami = ['gender','admission','ICD9_CODE','Age','GCS_min','Urine_max','Urine_min','Urine_mean','PaO2_mean','Abnormal_HR_P','WBC_max','WBC_min','Tep_max','Tep_min','Tep_range','Tep_var','USBP_max','USBP_min','USBP_range','USBP_var','HR_range','HR_max','HR_min','HR_var','Bil_max','Bil_min','K_max','K_min','Na_max','Na_min','urea_max','urea_min','SBL_max','SBL_min','SBP_max','SBP_min','SBP_range','SBP_var','creatinine']

gbm = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=5, max_depth=5, learning_rate=0.03, n_estimators=400,feature_fraction=0.9,min_data_in_leaf=4)
# gbm.fit(x_train, y_train, feature_name=feature_ami,categorical_feature=['gender','admission'])
gbm.fit(x_train, y_train, categorical_feature=[1,2])

y_pred_gbm = gbm.predict(x_test)


y_pred_gbm_pr = gbm.predict_proba(x_test)[:,1]
fpr_gbm,tpr_gbm,thresholds  = roc_curve(y_test,y_pred_gbm_pr)


# 评价指标
print("auc面积:",roc_auc_score(y_test, y_pred_gbm_pr))
print("精确率:",precision_score(y_test, y_pred_gbm))
print("召回率:",recall_score(y_test, y_pred_gbm))
print("正确率:",accuracy_score(y_test, y_pred_gbm))
print("F1值:",f1_score(y_test, y_pred_gbm))
Exemplo n.º 56
0
Arquivo: SVM.py Projeto: saizhou1/ML
tra_label = classifier.predict(train_data)  # 训练集的预测标签
tes_label = classifier.predict(test_data)  # 测试集的预测标签
print("训练集:", accuracy_score(train_label, tra_label))
print("测试集:", accuracy_score(test_label, tes_label))

matrix = confusion_matrix(train_label, tra_label, labels=[0, 1])
TP = matrix[1, 1]
TN = matrix[0, 0]
FP = matrix[0, 1]
FN = matrix[1, 0]
sn = TP / (TP + FN)
sp = TN / (TN + FP)

decision_score = classifier.predict_proba(test_data)
fprs, tprs, thresholds = roc_curve(test_label, decision_score[:, 1])

# plt.plot(fprs, tprs)
# plt.show()
roc_auc = auc(fprs, tprs)
plt.figure()
lw = 2
plt.plot(fprs,
         tprs,
         color='darkorange',
         lw=lw,
         label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
Exemplo n.º 57
0
    def sim_same_and_diff_category_samples(self,
                                           df,
                                           cat_index=1,
                                           dist_type='cosine',
                                           equal_var=False,
                                           plot_roc=True,
                                           precalc_dist=False,
                                           calc_roc=True):
        '''
      Calculate the similarity of samples from the same and different categories. The
      cat_index gives the index of the category, where 1 in the first category
      '''

        cols = df.columns.tolist()

        if type(precalc_dist) == bool:
            # compute distnace between rows (transpose to get cols as rows)
            dist_arr = 1 - pdist(df.transpose(), metric=dist_type)
        else:
            dist_arr = precalc_dist

        # generate sample names with categories
        sample_combos = list(combinations(range(df.shape[1]), 2))

        sample_names = [
            str(ind) + '_same' if cols[x[0]][cat_index]
            == cols[x[1]][cat_index] else str(ind) + '_different'
            for ind, x in enumerate(sample_combos)
        ]

        ser_dist = pd.Series(data=dist_arr, index=sample_names)

        # find same-cat sample comparisons
        same_cat = [x for x in sample_names if x.split('_')[1] == 'same']

        # find diff-cat sample comparisons
        diff_cat = [x for x in sample_names if x.split('_')[1] == 'different']

        # make series of same and diff category sample comparisons
        ser_same = ser_dist[same_cat]
        ser_same.name = 'Same Category'
        ser_diff = ser_dist[diff_cat]
        ser_diff.name = 'Different Category'

        sim_dict = {}
        roc_data = {}
        sim_data = {}

        sim_dict['same'] = ser_same
        sim_dict['diff'] = ser_diff

        pval_dict = {}
        ttest_stat, pval_dict['ttest'] = ttest_ind(ser_diff,
                                                   ser_same,
                                                   equal_var=equal_var)

        ttest_stat, pval_dict['mannwhitney'] = mannwhitneyu(ser_diff, ser_same)

        if calc_roc:
            # calc AUC
            true_index = list(np.ones(sim_dict['same'].shape[0]))
            false_index = list(np.zeros(sim_dict['diff'].shape[0]))
            y_true = true_index + false_index

            true_val = list(sim_dict['same'].get_values())
            false_val = list(sim_dict['diff'].get_values())
            y_score = true_val + false_val

            fpr, tpr, thresholds = roc_curve(y_true, y_score)

            inst_auc = auc(fpr, tpr)

            if plot_roc:
                plt.figure()
                plt.plot(fpr, tpr)
                plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
                plt.figure(figsize=(10, 10))

                print('AUC', inst_auc)

            roc_data['true'] = y_true
            roc_data['score'] = y_score
            roc_data['fpr'] = fpr
            roc_data['tpr'] = tpr
            roc_data['thresholds'] = thresholds
            roc_data['auc'] = inst_auc

        sim_data['sim_dict'] = sim_dict
        sim_data['pval_dict'] = pval_dict
        sim_data['roc_data'] = roc_data

        return sim_data
Exemplo n.º 58
0
def plot_roc_curve(y_test, y_pred, title=None, micro=False, macro=True, per_class=False):

    if y_test.ndim == 2:
        num_instances, num_classes = y_test.shape
    else:
        num_instances = y_test.shape[0]
        num_classes = 1
    if (num_classes != 2) and (y_test.ndim == 1):
        bi_y_test = label_binarize(y_test, classes=range(num_classes))
    else:
        bi_y_test = y_test
    
    fpr = {}
    tpr = {}
    roc_auc = {}
    for i in range(num_classes):
        fpr[i], tpr[i], _ = roc_curve(bi_y_test[:, i], y_pred[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    fpr['micro'], tpr['micro'], _ = roc_curve(y_test.ravel(), y_pred.ravel())
    roc_auc['micro'] = auc(fpr['micro'], tpr['micro'])

    # Compute macro-average ROC curve and AUC
    # Aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(num_classes)]))
    # Interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(num_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    # Average and compute AUC
    mean_tpr /= num_classes

    fpr['macro'] = all_fpr
    tpr['macro'] = mean_tpr
    roc_auc['macro'] = auc(fpr['macro'], tpr['macro'])

    # Plot all ROC curves
    plt.figure(figsize=(10, 10))
    
    if per_class == True:
        for i in range(num_classes):
            plt.plot(fpr[i], tpr[i], alpha=0.2,
                     label='ROC curve of class {0} (area = {1:0.4f})'
                     ''.format(i+1, roc_auc[i]))
    if micro == True:
        plt.plot(fpr['micro'], tpr['micro'],
                 label='micro-average ROC curve (area = {0:0.4f})'
                       ''.format(roc_auc['micro']),
                 color='orangered', linestyle=':', linewidth=3)

    if macro == True:
        plt.plot(fpr['macro'], tpr['macro'],
                 label='macro-average ROC curve (area = {0:0.4f})'
                       ''.format(roc_auc['macro']),
                 color='navy', linestyle=':', linewidth=3)

    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xticks(fontsize=13)
    plt.xticks(fontsize=13)
    plt.xlabel('False Positive Rate', fontsize=16)
    plt.ylabel('True Positive Rate', fontsize=16)
    if type(title) == str:
        plt.title(title, fontsize=16)
    elif title != None:
        print('Title must be a string.')
        plt.title('ROC Curves', fontsize=16)
    else:
        plt.title('ROC Curves', fontsize=16)
    plt.legend(loc=4)
    plt.show()
Exemplo n.º 59
0
del Train_Final['Response']
Test_Response = test_numeric['Response']
del test_numeric['Response']
X = Train_Final
y = Train_Response
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.10,
                                                    random_state=42)
from sklearn.ensemble import RandomForestClassifier
clf1 = RandomForestClassifier(n_estimators=100)
clf1.fit(X_train, y_train)
Ans = pd.DataFrame(clf1.predict(X_test))
print(Ans)
clf1.score(X_test, y_test)
#clf1.score(test_numeric,Test_Response)
fpr, tpr, thresholds = metrics.roc_curve(y_test, Ans)
print(fpr)
print(tpr)
print(thresholds)
FPR1 = plt.plot(fpr, label="FPR")  #Blue
TPR1 = plt.plot(tpr, label="TPR")  #Green
#plt.legend(handles=[FPR1, TPR1],loc='best')
plt.show()
precision_score(y_test, Ans, average='macro')
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic Curve')
plt.legend(loc="lower right")
plt.show()
Exemplo n.º 60
0
for run in range(n_runs):
    cur_results = np.load(path_to_results + 'results_{}_run.npz'.format(run))
    cur_data = np.load(path_to_data + 'train_test_data_{}.npz'.format(run))

    all_train_accuracy[run] = cur_results['train_accuracy']
    all_test_accuracy[run] = cur_results['test_accuracy']

    all_test_cm[run] = compute_confusion_matrix(
        np.argmax(cur_data['test_labels'], axis=1),
        np.argmax(cur_results['test_predicted_probs'], axis=1),
        normalise=True)

    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(
        np.argmax(cur_data['test_labels'], axis=1),
        cur_results['test_predicted_probs'][:, 1])
    tprs.append(interp(mean_fpr, fpr, tpr))
    tprs[-1][0] = 0.0
    all_test_auc[run] = auc(fpr, tpr)

mean_train_accuracy = np.mean(all_train_accuracy, axis=0)
mean_test_accuracy = np.mean(all_test_accuracy, axis=0)
mean_test_cm = np.mean(all_test_cm, axis=0)
mean_test_auc = np.mean(all_test_auc)

std_train_accuracy = np.std(all_train_accuracy, axis=0)
std_test_accuracy = np.std(all_test_accuracy, axis=0)
std_test_cm = np.std(all_test_cm, axis=0)
std_test_auc = np.std(all_test_auc)