Пример #1
0
def get_opt_model(x,y):

    # grid search and SVM
    clf = svm.SVC(kernel='rbf', class_weight='auto')
    clf.probability = True
    #clf = svm.SVC(kernel='rbf')
    clf, best_score = plib.grid_search(clf, x, y, n_folds=10, verbose=False)
    clf.fit(x,y)
    return clf
Пример #2
0
def get_opt_model(x, y):

    # grid search and SVM
    clf = svm.SVC(kernel='rbf', class_weight='auto')
    clf.probability = True
    #clf = svm.SVC(kernel='rbf')
    clf, best_score = plib.grid_search(clf, x, y, n_folds=10, verbose=False)
    clf.fit(x, y)
    return clf
Пример #3
0
def compute_acc_conf(x,y,confounds,verbose=False,balanced=True,loo=False,nfolds=10,gs_kfolds=5,optimize=True,C=.01):
    encoder = preprocessing.LabelEncoder()
    encoder.fit(y)

    # remove intra matrix mean and var
    #x = ts.normalize_data(x)
    #cv = cross_validation.KFold(len(y),n_folds=10)
    if loo:
        cv = cross_validation.LeaveOneOut(len(y))
    else:
        cv = StratifiedKFold(y=encoder.transform(y), n_folds=nfolds)

    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    all_tpr = []

    total_test_score=[]
    y_pred=[]
    #clf_array = []
    bc_all = []

    prec = []
    recall = []
    
    if len(np.unique(y))==1:
        print 'Unique class: 100%', np.sum(encoder.transform(y)==0)/len(y)
        return (1., 0.,len(y))
    
    for i, (train, test) in enumerate(cv):

        select_x = x.copy()
        
        #betacluster = bc.BetaCluster(crm.transform(confounds[train,:],select_x[train,:]),encoder.transform(y[train]),100,k_feature=200)
        #bc_all.append(betacluster)

        if balanced:
            clf = SVC(kernel='linear', class_weight='auto', C=C)
        else:
            clf = SVC(kernel='linear',C=C)

        if len(confounds) == 0:
            xtrain = select_x[train,:]
            xtest  = select_x[test,:]
        else:
            crm    = ConfoundsRm(confounds[train,:],select_x[train,:])
            xtrain = crm.transform(confounds[train,:],select_x[train,:])
            xtest  = crm.transform(confounds[test,:],select_x[test,:])

        ytrain = encoder.transform(y[train])
        ytest = encoder.transform(y[test])

        #clf.probability = True
        if optimize:
            clf, score = plib.grid_search(clf, xtrain,ytrain, n_folds=gs_kfolds, verbose=verbose)

        clf.fit(xtrain,ytrain)
        total_test_score.append( clf.score(xtest,ytest))
        #clf_array.append(clf)

        prec.append(metrics.precision_score(ytest, clf.predict(xtest)))
        recall.append(metrics.recall_score(ytest, clf.predict(xtest)))

        if loo:
            y_pred.append(clf.predict(xtest))
        if verbose:
            print('nSupport: ',clf.n_support_)
            print "Train:",clf.score(xtrain,ytrain)
            print "Test :",clf.score(xtest,ytest)
            print "Prediction :",clf.predict(xtest)
            print "Real Labels:",ytest
            print('Precision:',prec[-1],'Recall:',recall[-1])
    y_pred = np.array(y_pred)[:,0]
    if loo:
        total_std_test_score = estimate_std(metrics.accuracy_score(encoder.transform(y), np.array(y_pred)),len(y))
        print('Mean:', np.mean(total_test_score),'Std:', total_std_test_score,'AvgPrecision:',np.mean(prec),'AvgRecall:',np.mean(recall) )
        return [np.mean(total_test_score), total_std_test_score, len(y),y_pred]
    else:
        print('Mean:', np.mean(total_test_score),'Std:', np.std(total_test_score),'AvgPrecision:',np.mean(prec),'AvgRecall:',np.mean(recall) )
        return [np.mean(total_test_score), np.std(total_test_score),len(y)]
Пример #4
0
def compute_acc_conf(x,
                     y,
                     confounds,
                     verbose=False,
                     balanced=True,
                     loo=False,
                     optimize=True,
                     C=.01):
    encoder = preprocessing.LabelEncoder()
    encoder.fit(y)

    # remove intra matrix mean and var
    #x = ts.normalize_data(x)
    #cv = cross_validation.KFold(len(y),n_folds=10)
    if loo:
        cv = cross_validation.LeaveOneOut(len(y))
    else:
        cv = StratifiedKFold(y=encoder.transform(y), n_folds=10)

    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    all_tpr = []

    total_test_score = []
    y_pred = []
    #clf_array = []
    bc_all = []

    prec = []
    recall = []

    if len(np.unique(y)) == 1:
        print 'Unique class: 100%', np.sum(encoder.transform(y) == 0) / len(y)
        return (1., 0., len(y))

    for i, (train, test) in enumerate(cv):

        select_x = x.copy()

        #betacluster = bc.BetaCluster(crm.transform(confounds[train,:],select_x[train,:]),encoder.transform(y[train]),100,k_feature=200)
        #bc_all.append(betacluster)

        if balanced:
            clf = SVC(kernel='linear', class_weight='auto', C=C)
        else:
            clf = SVC(kernel='linear', C=C)

        if len(confounds) == 0:
            xtrain = select_x[train, :]
            xtest = select_x[test, :]
        else:
            crm = ConfoundsRm(confounds[train, :], select_x[train, :])
            xtrain = crm.transform(confounds[train, :], select_x[train, :])
            xtest = crm.transform(confounds[test, :], select_x[test, :])

        ytrain = encoder.transform(y[train])
        ytest = encoder.transform(y[test])

        #clf.probability = True
        if optimize:
            clf, score = plib.grid_search(clf,
                                          xtrain,
                                          ytrain,
                                          n_folds=10,
                                          verbose=verbose)

        clf.fit(xtrain, ytrain)
        total_test_score.append(clf.score(xtest, ytest))
        #clf_array.append(clf)

        prec.append(metrics.precision_score(ytest, clf.predict(xtest)))
        recall.append(metrics.recall_score(ytest, clf.predict(xtest)))

        if loo:
            y_pred.append(clf.predict(xtest))
        if verbose:
            print('nSupport: ', clf.n_support_)
            print "Train:", clf.score(xtrain, ytrain)
            print "Test :", clf.score(xtest, ytest)
            print "Prediction :", clf.predict(xtest)
            print "Real Labels:", ytest
            print('Precision:', prec[-1], 'Recall:', recall[-1])

    if loo:
        total_std_test_score = estimate_std(
            metrics.accuracy_score(encoder.transform(y), np.array(y_pred)),
            len(y))
        print('Mean:', np.mean(total_test_score), 'Std:', total_std_test_score,
              'AvgPrecision:', np.mean(prec), 'AvgRecall:', np.mean(recall))
        return (np.mean(total_test_score), total_std_test_score, len(y))
    else:
        print('Mean:', np.mean(total_test_score), 'Std:',
              np.std(total_test_score), 'AvgPrecision:', np.mean(prec),
              'AvgRecall:', np.mean(recall))
        return (np.mean(total_test_score), np.std(total_test_score))