def get_opt_model(x,y): # grid search and SVM clf = svm.SVC(kernel='rbf', class_weight='auto') clf.probability = True #clf = svm.SVC(kernel='rbf') clf, best_score = plib.grid_search(clf, x, y, n_folds=10, verbose=False) clf.fit(x,y) return clf
def get_opt_model(x, y): # grid search and SVM clf = svm.SVC(kernel='rbf', class_weight='auto') clf.probability = True #clf = svm.SVC(kernel='rbf') clf, best_score = plib.grid_search(clf, x, y, n_folds=10, verbose=False) clf.fit(x, y) return clf
def compute_acc_conf(x,y,confounds,verbose=False,balanced=True,loo=False,nfolds=10,gs_kfolds=5,optimize=True,C=.01): encoder = preprocessing.LabelEncoder() encoder.fit(y) # remove intra matrix mean and var #x = ts.normalize_data(x) #cv = cross_validation.KFold(len(y),n_folds=10) if loo: cv = cross_validation.LeaveOneOut(len(y)) else: cv = StratifiedKFold(y=encoder.transform(y), n_folds=nfolds) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] total_test_score=[] y_pred=[] #clf_array = [] bc_all = [] prec = [] recall = [] if len(np.unique(y))==1: print 'Unique class: 100%', np.sum(encoder.transform(y)==0)/len(y) return (1., 0.,len(y)) for i, (train, test) in enumerate(cv): select_x = x.copy() #betacluster = bc.BetaCluster(crm.transform(confounds[train,:],select_x[train,:]),encoder.transform(y[train]),100,k_feature=200) #bc_all.append(betacluster) if balanced: clf = SVC(kernel='linear', class_weight='auto', C=C) else: clf = SVC(kernel='linear',C=C) if len(confounds) == 0: xtrain = select_x[train,:] xtest = select_x[test,:] else: crm = ConfoundsRm(confounds[train,:],select_x[train,:]) xtrain = crm.transform(confounds[train,:],select_x[train,:]) xtest = crm.transform(confounds[test,:],select_x[test,:]) ytrain = encoder.transform(y[train]) ytest = encoder.transform(y[test]) #clf.probability = True if optimize: clf, score = plib.grid_search(clf, xtrain,ytrain, n_folds=gs_kfolds, verbose=verbose) clf.fit(xtrain,ytrain) total_test_score.append( clf.score(xtest,ytest)) #clf_array.append(clf) prec.append(metrics.precision_score(ytest, clf.predict(xtest))) recall.append(metrics.recall_score(ytest, clf.predict(xtest))) if loo: y_pred.append(clf.predict(xtest)) if verbose: print('nSupport: ',clf.n_support_) print "Train:",clf.score(xtrain,ytrain) print "Test :",clf.score(xtest,ytest) print "Prediction :",clf.predict(xtest) print "Real Labels:",ytest print('Precision:',prec[-1],'Recall:',recall[-1]) y_pred = np.array(y_pred)[:,0] if loo: total_std_test_score = estimate_std(metrics.accuracy_score(encoder.transform(y), np.array(y_pred)),len(y)) print('Mean:', np.mean(total_test_score),'Std:', total_std_test_score,'AvgPrecision:',np.mean(prec),'AvgRecall:',np.mean(recall) ) return [np.mean(total_test_score), total_std_test_score, len(y),y_pred] else: print('Mean:', np.mean(total_test_score),'Std:', np.std(total_test_score),'AvgPrecision:',np.mean(prec),'AvgRecall:',np.mean(recall) ) return [np.mean(total_test_score), np.std(total_test_score),len(y)]
def compute_acc_conf(x, y, confounds, verbose=False, balanced=True, loo=False, optimize=True, C=.01): encoder = preprocessing.LabelEncoder() encoder.fit(y) # remove intra matrix mean and var #x = ts.normalize_data(x) #cv = cross_validation.KFold(len(y),n_folds=10) if loo: cv = cross_validation.LeaveOneOut(len(y)) else: cv = StratifiedKFold(y=encoder.transform(y), n_folds=10) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] total_test_score = [] y_pred = [] #clf_array = [] bc_all = [] prec = [] recall = [] if len(np.unique(y)) == 1: print 'Unique class: 100%', np.sum(encoder.transform(y) == 0) / len(y) return (1., 0., len(y)) for i, (train, test) in enumerate(cv): select_x = x.copy() #betacluster = bc.BetaCluster(crm.transform(confounds[train,:],select_x[train,:]),encoder.transform(y[train]),100,k_feature=200) #bc_all.append(betacluster) if balanced: clf = SVC(kernel='linear', class_weight='auto', C=C) else: clf = SVC(kernel='linear', C=C) if len(confounds) == 0: xtrain = select_x[train, :] xtest = select_x[test, :] else: crm = ConfoundsRm(confounds[train, :], select_x[train, :]) xtrain = crm.transform(confounds[train, :], select_x[train, :]) xtest = crm.transform(confounds[test, :], select_x[test, :]) ytrain = encoder.transform(y[train]) ytest = encoder.transform(y[test]) #clf.probability = True if optimize: clf, score = plib.grid_search(clf, xtrain, ytrain, n_folds=10, verbose=verbose) clf.fit(xtrain, ytrain) total_test_score.append(clf.score(xtest, ytest)) #clf_array.append(clf) prec.append(metrics.precision_score(ytest, clf.predict(xtest))) recall.append(metrics.recall_score(ytest, clf.predict(xtest))) if loo: y_pred.append(clf.predict(xtest)) if verbose: print('nSupport: ', clf.n_support_) print "Train:", clf.score(xtrain, ytrain) print "Test :", clf.score(xtest, ytest) print "Prediction :", clf.predict(xtest) print "Real Labels:", ytest print('Precision:', prec[-1], 'Recall:', recall[-1]) if loo: total_std_test_score = estimate_std( metrics.accuracy_score(encoder.transform(y), np.array(y_pred)), len(y)) print('Mean:', np.mean(total_test_score), 'Std:', total_std_test_score, 'AvgPrecision:', np.mean(prec), 'AvgRecall:', np.mean(recall)) return (np.mean(total_test_score), total_std_test_score, len(y)) else: print('Mean:', np.mean(total_test_score), 'Std:', np.std(total_test_score), 'AvgPrecision:', np.mean(prec), 'AvgRecall:', np.mean(recall)) return (np.mean(total_test_score), np.std(total_test_score))