示例#1
0
def LogisticGridSearch_OLD():  
    # C=1 is best
    cs = 10.0**np.arange(-1,2,0.25)   
    aucs = []
    for c in cs:
        clf = LogisticRegression(penalty='l1',C=c).fit(f_train, y_train)
        probs = clf.predict_proba(f_test)
        fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
        roc_auc = auc(fpr,tpr)
        cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search - Logistic Regression ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(cs)):
        if aucs[i] > aucs[best]:
            best = i
    c = cs[best]
    clf = LogisticRegression(penalty='l1',C=c).fit(f_train, y_train)
    probs = clf.predict_proba(f_test)
    fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best C = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search - Logistic Regression ROC Curve')
    myplt.show()
    return clf
示例#2
0
def MultinomialNaiveBayesGridSearch_OLD():  
    # C=1 is best
    cs = 10.0**np.arange(-9,2,0.5)
    aucs = []
    for c in cs:
        clf = MultinomialNB(alpha=c).fit(f_train, y_train)
        probs = clf.predict_proba(f_test)
        fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
        roc_auc = auc(fpr,tpr)
        cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search - Multinomial Naive Bayes ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(cs)):
        if aucs[i] > aucs[best]:
            best = i
    c = cs[best]
    clf = MultinomialNB(alpha=c).fit(f_train, y_train)
    probs = clf.predict_proba(f_test)
    fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best alpha = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search - Multinomial Naive Bayes ROC Curve')
    myplt.show()
    return clf
示例#3
0
def GridSearch(data,params,classifier,classifier_name,paramname,probstype=1,clf_kwargs={}):
    f_train,f_test,y_train,y_test = data
    # C=1 is best
    def getROC(clf,probstype):
        if probstype == 1:
            probs = clf.predict_proba(f_test)
            fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
        else:
            probs = clf.decision_function(f_test)
            fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
        return fpr,tpr
    aucs = []
    mykwargs = clf_kwargs.copy()
    for c in params:
        mykwargs[paramname] = c
        clf = classifier(**mykwargs).fit(f_train, y_train)
        fpr,tpr = getROC(clf,probstype)
        roc_auc = auc(fpr,tpr)
        #cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search: '+classifier_name+' ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(params)):
        if aucs[i] > aucs[best]:
            best = i
    c = params[best]
    mykwargs[paramname] = c
    clf = classifier(**mykwargs).fit(f_train, y_train)
    fpr,tpr = getROC(clf,probstype)
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best '+paramname+' = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search: '+classifier_name+' ROC Curve')
    myplt.show()

    maxAUC = aucs[best]
    cs = params
    optC = params[best]
    
    plt.figure()
    maxauclabel = ("Max AUC = %0.2f, " %maxAUC)+paramname+(" =%s" %optC)
    plt.semilogx(cs,np.ones(len(cs))*maxAUC,'r',label=maxauclabel,linewidth=2,zorder=10)
    plt.semilogx(cs,aucs,zorder=1)
    plt.title('Grid Search: '+classifier_name+'AUC Scores')
    plt.xlabel(paramname)
    plt.ylabel('AUC Score')
    plt.legend(loc="lower right")
    #plt.legend(loc='lower left', bbox_to_anchor=(1, 0),
    #          ncol=1, fancybox=True, shadow=False)
    plt.show()
    
    return clf
示例#4
0
def SGDGridSearch_OLD():  
    # C=1 is best
    cs = 10.0**np.arange(-9,9,1)   
    aucs = []
    for c in cs:
        clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train)
        probs = clf.decision_function(f_test)
        fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
        roc_auc = auc(fpr,tpr)
        cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search - SGD Classifier ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(cs)):
        if aucs[i] > aucs[best]:
            best = i
    c = cs[best]
    clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train)
    probs = clf.decision_function(f_test)
    fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best C = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search - SGD Classifier ROC Curve')
    myplt.show()
    return clf, aucs
示例#5
0
    sel_bool_test = train == 0
    sel_ind_train = np.where(sel_bool_train)[0]
    sel_ind_test = np.where(sel_bool_test)[0]
    
    f_train = features[sel_ind_train]
    f_test = features[sel_ind_test]
    
    # N
    approved = 1-rejected
    y_train = np.array(approved[sel_bool_train]).astype(int)
    y_test = np.array(approved[sel_bool_test]).astype(int)
    
    return f_train,f_test,y_train,y_test

# CLASSIFIERS
'''
clf1 = MultinomialNB().fit(f_train, y_train)
probs = clf1.predict_proba(f_test)
fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
roc_auc = auc(fpr,tpr)
st.plotROC(fpr,tpr,roc_auc,"MultinomialNB")


clf2 = LogisticRegression(penalty='l1').fit(f_train, y_train)
probs = clf2.predict_proba(f_test)
fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
roc_auc = auc(fpr,tpr)
st.plotROC(fpr,tpr,roc_auc,"LogReg")


clf3 = SGDClassifier(penalty='l1').fit(f_train, y_train)