def test_permutation_test_score_allow_nans(): # Check that permutation_test_score allows input data with NaNs X = np.arange(200, dtype=np.float64).reshape(10, -1) X[2, :] = np.nan y = np.repeat([0, 1], X.shape[0] / 2) p = Pipeline([("imputer", Imputer(strategy="mean", missing_values="NaN")), ("classifier", MockClassifier())]) cval.permutation_test_score(p, X, y, cv=5)
def test_permutation_test_score_allow_nans(): # Check that permutation_test_score allows input data with NaNs X = np.arange(200, dtype=np.float64).reshape(10, -1) X[2, :] = np.nan y = np.repeat([0, 1], X.shape[0] / 2) p = Pipeline([ ('imputer', Imputer(strategy='mean', missing_values='NaN')), ('classifier', MockClassifier()), ]) cval.permutation_test_score(p, X, y, cv=5)
def test_permutation_score(): iris = load_iris() X = iris.data X_sparse = coo_matrix(X) y = iris.target svm = SVC(kernel="linear") cv = cval.StratifiedKFold(y, 2) score, scores, pvalue = cval.permutation_test_score(svm, X, y, n_permutations=30, cv=cv, scoring="accuracy") assert_greater(score, 0.9) assert_almost_equal(pvalue, 0.0, 1) score_label, _, pvalue_label = cval.permutation_test_score( svm, X, y, n_permutations=30, cv=cv, scoring="accuracy", labels=np.ones(y.size), random_state=0 ) assert_true(score_label == score) assert_true(pvalue_label == pvalue) # check that we obtain the same results with a sparse representation svm_sparse = SVC(kernel="linear") cv_sparse = cval.StratifiedKFold(y, 2) score_label, _, pvalue_label = cval.permutation_test_score( svm_sparse, X_sparse, y, n_permutations=30, cv=cv_sparse, scoring="accuracy", labels=np.ones(y.size), random_state=0, ) assert_true(score_label == score) assert_true(pvalue_label == pvalue) # test with custom scoring object def custom_score(y_true, y_pred): return ((y_true == y_pred).sum() - (y_true != y_pred).sum()) / y_true.shape[0] scorer = make_scorer(custom_score) score, _, pvalue = cval.permutation_test_score(svm, X, y, n_permutations=100, scoring=scorer, cv=cv, random_state=0) assert_almost_equal(score, 0.93, 2) assert_almost_equal(pvalue, 0.01, 3) # set random y y = np.mod(np.arange(len(y)), 3) score, scores, pvalue = cval.permutation_test_score(svm, X, y, n_permutations=30, cv=cv, scoring="accuracy") assert_less(score, 0.5) assert_greater(pvalue, 0.2)
def test_permutation_score(): iris = load_iris() X = iris.data X_sparse = coo_matrix(X) y = iris.target svm = SVC(kernel='linear') cv = cval.StratifiedKFold(y, 2) score, scores, pvalue = cval.permutation_test_score( svm, X, y, zero_one_score, cv) assert_greater(score, 0.9) np.testing.assert_almost_equal(pvalue, 0.0, 1) score_label, _, pvalue_label = cval.permutation_test_score(svm, X, y, zero_one_score, cv, labels=np.ones( y.size), random_state=0) assert_true(score_label == score) assert_true(pvalue_label == pvalue) # check that we obtain the same results with a sparse representation svm_sparse = SparseSVC(kernel='linear') cv_sparse = cval.StratifiedKFold(y, 2, indices=True) score_label, _, pvalue_label = cval.permutation_test_score(svm_sparse, X_sparse, y, zero_one_score, cv_sparse, labels=np.ones( y.size), random_state=0) assert_true(score_label == score) assert_true(pvalue_label == pvalue) # set random y y = np.mod(np.arange(len(y)), 3) score, scores, pvalue = cval.permutation_test_score( svm, X, y, zero_one_score, cv) assert_less(score, 0.5) assert_greater(pvalue, 0.4)
def test_permutation_score(): iris = load_iris() X = iris.data X_sparse = coo_matrix(X) y = iris.target svm = SVC(kernel='linear') cv = cval.StratifiedKFold(y, 2) score, scores, pvalue = cval.permutation_test_score( svm, X, y, n_permutations=30, cv=cv, scoring="accuracy") assert_greater(score, 0.9) assert_almost_equal(pvalue, 0.0, 1) score_label, _, pvalue_label = cval.permutation_test_score( svm, X, y, n_permutations=30, cv=cv, scoring="accuracy", labels=np.ones(y.size), random_state=0) assert_true(score_label == score) assert_true(pvalue_label == pvalue) # check that we obtain the same results with a sparse representation svm_sparse = SVC(kernel='linear') cv_sparse = cval.StratifiedKFold(y, 2) score_label, _, pvalue_label = cval.permutation_test_score( svm_sparse, X_sparse, y, n_permutations=30, cv=cv_sparse, scoring="accuracy", labels=np.ones(y.size), random_state=0) assert_true(score_label == score) assert_true(pvalue_label == pvalue) # test with custom scoring object def custom_score(y_true, y_pred): return (((y_true == y_pred).sum() - (y_true != y_pred).sum()) / y_true.shape[0]) scorer = make_scorer(custom_score) score, _, pvalue = cval.permutation_test_score( svm, X, y, n_permutations=100, scoring=scorer, cv=cv, random_state=0) assert_almost_equal(score, .93, 2) assert_almost_equal(pvalue, 0.01, 3) # set random y y = np.mod(np.arange(len(y)), 3) score, scores, pvalue = cval.permutation_test_score( svm, X, y, n_permutations=30, cv=cv, scoring="accuracy") assert_less(score, 0.5) assert_greater(pvalue, 0.2)
def permutation_cross_validation(estimator, X, y, n_fold=3, isshuffle = True, cvmeth = 'shufflesplit', score_type = 'r2', n_perm = 1000): """ An easy way to evaluate the significance of a cross-validated score by permutations ------------------------------------------------- Parameters: estimator: linear model estimator X: IV y: DV n_fold: fold number cross validation cvmeth: kfold or shufflesplit. shufflesplit is the random permutation cross-validation iterator score_type: scoring type, 'r2' as default n_perm: permutation numbers Return: score: model scores permutation_scores: model scores when permutation labels pvalues: p value of permutation scores """ if X.ndim == 1: X = np.expand_dims(X, axis = 1) if y.ndim == 1: y = np.expand_dims(y, axis = 1) X = preprocessing.scale(X) y = preprocessing.scale(y) if cvmeth == 'kfold': cvmethod = cross_validation.KFold(y.shape[0], n_fold, shuffle = isshuffle) elif cvmeth == 'shufflesplit': testsize = 1.0/n_fold cvmethod = cross_validation.ShuffleSplit(y.shape[0], n_iter = 100, test_size = testsize, random_state = 0) score, permutation_scores, pvalues = cross_validation.permutation_test_score(estimator, X, y, scoring = score_type, cv = cvmethod, n_permutations = n_perm) return score, permutation_scores, pvalues
def permutation_cross_validation(estimator, X, y, n_fold=3, isshuffle=True, cvmeth='shufflesplit', score_type='r2', n_perm=1000): """ An easy way to evaluate the significance of a cross-validated score by permutations ------------------------------------------------- Parameters: estimator: linear model estimator X: IV y: DV n_fold: fold number cross validation cvmeth: kfold or shufflesplit. shufflesplit is the random permutation cross-validation iterator score_type: scoring type, 'r2' as default n_perm: permutation numbers Return: score: model scores permutation_scores: model scores when permutation labels pvalues: p value of permutation scores """ try: from sklearn import cross_validation, preprocessing except ImportError: raise Exception('To call this function, please install sklearn') if X.ndim == 1: X = np.expand_dims(X, axis = 1) if y.ndim == 1: y = np.expand_dims(y, axis = 1) X = preprocessing.scale(X) y = preprocessing.scale(y) if cvmeth == 'kfold': cvmethod = cross_validation.KFold(y.shape[0], n_fold, shuffle = isshuffle) elif cvmeth == 'shufflesplit': testsize = 1.0/n_fold cvmethod = cross_validation.ShuffleSplit(y.shape[0], n_iter = 100, test_size = testsize, random_state = 0) score, permutation_scores, pvalues = cross_validation.permutation_test_score(estimator, X, y, scoring = score_type, cv = cvmethod, n_permutations = n_perm) return score, permutation_scores, pvalues
def automatic_bernulli(): data = pd.read_csv('/home/vasiliy/Study/StadiumProject/Classifier/signs.csv', sep=';') Y = np.array(data['fight'].get_values()) np.random.shuffle(Y) data.drop(['match', 'city', 'date', 'fight'], 1, inplace=True) # data = data[['anger_over_value_relation', 'avg_likes', 'sc_max_surprise', 'sc_median_fear', # 'fear_over_value_relation']] X = data.as_matrix() features_number = 0 result = {} for features_number in range(3, 16): X_new = SelectKBest(f_classif, k=features_number).fit_transform(X, Y) # X_new = X classifier = ExtraTreesClassifier() super_means = [] for i in range(1000): kf = KFold(len(X_new), n_folds=6, shuffle=True) means = [] for training, testing in kf: classifier.fit(X_new[training], Y[training]) prediction = classifier.predict(X_new[testing]) curmean = np.mean(prediction == Y[testing]) means.append(curmean) super_means.append(np.mean(means)) print 'features_number=', features_number, 'Mean accuracy: {:.1%} '.format( np.mean(super_means)) # result['fn'+str(features_number)+'n_n'+str(n_neib)] = np.mean(super_means) score, permutation_scores, pvalue = permutation_test_score(classifier, X_new, Y, scoring="accuracy", cv=kf, n_permutations=len(Y), n_jobs=1) print ("Classification score %s (pvalue : %s)" % (score, pvalue))
def classify(x, y, classifier='lda', kern='rbf', n_folds=10, rep=10, kind='sf', n_jobs=1, n_knn=3, n_perm=0, n_tree=100, cvkind='skfold'): "da, all_scores, permutation_scores, pvalue" # Check format : x = checkfeat(x,y) n_epoch, n_feat = x.shape priors = n.array([1/len(n.unique(y))]*len(n.unique(y))) # - Classifier's choice : if (type(classifier) is int) | (type(classifier) is str): clf = classifier_choice(classifier, kern=kern, n_knn=n_knn, n_tree=n_tree, priors=priors) else : clf = classifier # - Cross validation definition : if kind == 'mf' and n_perm == 0: # Multi feature classification da, all_scores, cv_model = classify_fcn(x, y, clf, n_folds=n_folds, rep=rep, n_jobs=n_jobs, cvkind=cvkind) elif kind == 'sf' and n_perm == 0: # Single features classification da = n.zeros((1, n_feat)) all_scores = n.zeros((rep, n_folds, n_feat)) for k in range(0, n_feat): da[:, k], all_scores[:, :, k], cv_model = classify_fcn(x[:, k], y, clf, n_folds=n_folds, rep=rep, n_jobs=n_jobs, cvkind=cvkind) # Statistical evaluation : if n_perm == 0: permutation_scores, pvalue = 0, [[0]] else: all_scores = 0 cv_model = crossval_choice(y, cvkind=cvkind, n_folds=n_folds, rndstate=0) if kind == 'mf': # Multi feature classification da, permutation_scores, pvalue = cross_validation.permutation_test_score(clf, x, y, scoring="accuracy", cv=cv_model, n_permutations=n_perm, n_jobs=n_jobs) elif kind == 'sf': # Single features classification permutation_scores = n.zeros((n_perm, n_feat)) da = n.zeros((1, n_feat)) pvalue = n.zeros((1, n_feat)) for k in range(0, n_feat): da[0, k], permutation_scores[:, k], pvalue[0, k] = cross_validation.permutation_test_score(clf, x[:, k], y, scoring="accuracy", cv=cv_model, n_permutations=n_perm, n_jobs=n_jobs) return 100*da, 100*all_scores, permutation_scores, list(pvalue[0])
def computeScore(svm, X, y, cv): score, permutation_scores, pvalue = permutation_test_score(svm, \ X, y, \ scoring='accuracy', \ cv=cv, \ n_permutations=100, \ n_jobs=1) print("Classification score %s (pvalue: %s)" % (score, pvalue)) return score, permutation_scores, pvalue
def check_trop_score(X_data, trop_clusters): cv = Bootstrap(X_data.shape[0], n_iter=3, train_size=0.7) pred = KMeans(n_clusters=len(set(trop_clusters))) t_score, scores, pval = permutation_test_score(pred, X_data, n_permutations=100, y = trop_clusters, n_jobs=20, scoring=rand_linker, cv=cv) return t_score, scores, pval
def test_permutation_test_score(self): import sklearn.svm as svm iris = datasets.load_iris() df = pdml.ModelFrame(iris) clf = svm.SVC(kernel=str('linear'), C=1) result = df.cross_validation.permutation_test_score(clf, cv=5) expected = cv.permutation_test_score(clf, iris.data, y=iris.target, cv=5) self.assertEqual(len(result), 3) self.assertEqual(result[0], expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1]) self.assertEqual(result[2], expected[2])
def test_permutation_score(): iris = load_iris() X = iris.data X_sparse = coo_matrix(X) y = iris.target svm = SVC(kernel='linear') cv = cval.StratifiedKFold(y, 2) score, scores, pvalue = cval.permutation_test_score( svm, X, y, zero_one_score, cv) assert_greater(score, 0.9) np.testing.assert_almost_equal(pvalue, 0.0, 1) score_label, _, pvalue_label = cval.permutation_test_score( svm, X, y, zero_one_score, cv, labels=np.ones(y.size), random_state=0) assert_true(score_label == score) assert_true(pvalue_label == pvalue) # check that we obtain the same results with a sparse representation svm_sparse = SVC(kernel='linear') cv_sparse = cval.StratifiedKFold(y, 2, indices=True) score_label, _, pvalue_label = cval.permutation_test_score( svm_sparse, X_sparse, y, zero_one_score, cv_sparse, labels=np.ones(y.size), random_state=0) assert_true(score_label == score) assert_true(pvalue_label == pvalue) # set random y y = np.mod(np.arange(len(y)), 3) score, scores, pvalue = cval.permutation_test_score(svm, X, y, zero_one_score, cv) assert_less(score, 0.5) assert_greater(pvalue, 0.4)
def permutation(): file = 'data/n228_bcdefgh.mat' dat = data.load(file) X, y = data.build(dat, range(0, 96), 'fr1', 17) # Univariate Feature Selection select = SelectKBest(f_classif,k=27).fit(X,y) Xa = select.transform(X) # Select good cell with heuristic channel = data.goodCell(dat) Xb, y = data.build(dat, channel, 'fr1', 17) # PCA Dimentionnality Reduction pca = PCA(n_components=38) Xc = pca.fit_transform(X) dat = [X, Xa, Xb, X, Xc,Xa] pNB = PoissonNB() gNB = GaussianNB() classifiers = [pNB,pNB,pNB,gNB,gNB,gNB] label = ['Poisson Unreduced', 'Poisson Univariate Reduction', 'Poisson Heuristic Reduction', 'Gaussion No reduction', 'Gaussian PCA reduction', 'Gaussian Univariate Reduction'] scores = [] perm_scores = [] p_value = [] for i in range(0,len(dat)): score, permutation_score, pvalue = permutation_test_score(classifiers[i], dat[i], y, cv=StratifiedKFold(y, n_folds=3, shuffle=True, random_state=42),n_permutations=100, n_jobs=-1, random_state=42, scoring=make_scorer(error_distance, greater_is_better=False)) scores.append(score) perm_scores.append(np.mean(permutation_score)) p_value.append(pvalue) ind = np.arange(len(scores)) plt.bar(ind, scores) # ax.set_xticks(ind) # ax.set_xticklabels(label) plt.plot(ind, perm_scores) plt.show() print "Average Distance between real location and predicted location" print score print "Chance Performance, from permutation" print np.mean(permutation_score) print "p-value" print pvalue
def handle_bayes(): input_data = pd.read_csv('/home/vasiliy/Study/StadiumProject/Classifier/signs.csv', sep=';') signs = ['ms_avg_sadness', 'ms_avg_sadness', 'ms_disgust', 'ms_contempt', 'ms_max_sadness', 'ms_median_surprise', 'ms_avg_happiness'] signs = ['ms_avg_sadness', 'ms_avg_sadness', 'ms_disgust', 'ms_contempt', 'ms_max_sadness', 'ms_median_surprise'] signs = ['ms_median_sadness', 'likes', 'ms_min_anger', 'ms_min_disgust', 'ms_min_fear', 'ms_avg_anger' ] X = input_data[signs] X = X.as_matrix() Y = np.array(input_data['fight'].get_values()) classifier = GaussianNB() kf = KFold(len(signs), n_folds=6, shuffle=True) for training, testing in kf: classifier.fit(X[training], Y[training]) score, permutation_scores, pvalue = permutation_test_score( classifier, X, Y, scoring="accuracy", cv=kf, n_permutations=len(Y), n_jobs=1) print ("Classification score %s (pvalue : %s)" % (score, pvalue))
def test_permutation_score(): iris = load_iris() X = iris.data X_sparse = coo_matrix(X) y = iris.target svm = SVC(kernel='linear') cv = cval.StratifiedKFold(y, 2) score, scores, pvalue = cval.permutation_test_score( svm, X, y, cv=cv, scoring="accuracy") assert_greater(score, 0.9) assert_almost_equal(pvalue, 0.0, 1) score_label, _, pvalue_label = cval.permutation_test_score( svm, X, y, cv=cv, scoring="accuracy", labels=np.ones(y.size), random_state=0) assert_true(score_label == score) assert_true(pvalue_label == pvalue) # test with custom scoring object scorer = make_scorer(fbeta_score, beta=2) score_label, _, pvalue_label = cval.permutation_test_score( svm, X, y, scoring=scorer, cv=cv, labels=np.ones(y.size), random_state=0) assert_almost_equal(score_label, .97, 2) assert_almost_equal(pvalue_label, 0.01, 3) # check that we obtain the same results with a sparse representation svm_sparse = SVC(kernel='linear') cv_sparse = cval.StratifiedKFold(y, 2) score_label, _, pvalue_label = cval.permutation_test_score( svm_sparse, X_sparse, y, cv=cv_sparse, scoring="accuracy", labels=np.ones(y.size), random_state=0) assert_true(score_label == score) assert_true(pvalue_label == pvalue) # set random y y = np.mod(np.arange(len(y)), 3) score, scores, pvalue = cval.permutation_test_score(svm, X, y, cv=cv, scoring="accuracy") assert_less(score, 0.5) assert_greater(pvalue, 0.2) # test with deprecated interface with warnings.catch_warnings(record=True): score, scores, pvalue = cval.permutation_test_score( svm, X, y, score_func=accuracy_score, cv=cv) assert_less(score, 0.5) assert_greater(pvalue, 0.2)
def regr_one(self, train_x, train_y, test_size, predict_ornot): if predict_ornot: train_x, test_x, train_y, test_y = train_test_split( train_x, train_y, test_size=test_size, random_state=10) # else: test_x, test_y = train_x, train_y regr = linear_model.LogisticRegression() # regr = linear_model.LinearRegression() regr.fit(X=train_x, y=train_y) predict_result = regr.predict(X=test_x) i, j = 0, 0 for a, b in zip(*(predict_result, test_y)): i += 1 if a != b: j += 1 #print a,b print 'accuracy:', (i - j) / (i * 1.0) score, permutation_scores, pvalue = permutation_test_score( regr, train_x, train_y, scoring="accuracy") print 'score, pvalue = ', score, pvalue return regr, predict_result, test_y
def bayes_classification(permutation, test): input_data = pd.read_csv('/home/vasiliy/Study/StadiumProject/Classifier/signs.csv', sep=';') output_data = [] Y = np.array(input_data['fight'].get_values()) if permutation == True: np.random.shuffle(Y) input_data = input_data.drop(['match', 'city', 'date', 'fight'], 1) data_array = input_data.as_matrix() for features_number in range(3,30,1): X_new = SelectKBest(f_classif, k=features_number).fit_transform(data_array, Y) classifier = GaussianNB() kf = KFold(len(X_new), n_folds=6, shuffle=True) means = [] for training, testing in kf: classifier.fit(X_new[training], Y[training]) prediction = classifier.predict(X_new[testing]) curmean = np.mean(classifier.score(X_new[testing], Y[testing])) means.append(curmean) output_data.append(np.mean(means)) score, permutation_scores, pvalue = permutation_test_score( classifier, X_new, Y, scoring="accuracy", cv=kf, n_permutations=len(Y), n_jobs=1) if test: print ("Classification score %s (pvalue : %s)" % (score, pvalue)) return output_data
#cv_scores = cross_val_score(svc, fmri_masked, target, cv=cv, n_jobs=-1, verbose=10) session_label = labels['chunks'] session_label = session_label[condition_mask] cv = LeaveOneLabelOut(labels=session_label) cv_scores_one = cross_val_score(svc, fmri_masked, target, cv=cv) #使用F1评分 #cv_scores = cross_val_score(svc, fmri_masked, target, cv=cv, scoring='f1') #计算平均分类准确率 classification_accuracy = np.mean(cv_scores) classification_accuracy_one = np.mean(cv_scores_one) #计算随机分类器的交叉验证得分 null_cv_scores = cross_val_score(DummyClassifier(), fmri_masked, target, cv=cv) #置换检验 null_cv_scores_2 = permutation_test_score(svc, fmri_masked, target, cv=cv) # Retrieve the SVC discriminating weights coef_ = svc.coef_ # Reverse masking thanks to the Nifti Masker coef_img = nifti_masker.inverse_transform(coef_) # Save the coefficients as a Nifti image coef_img.to_filename('haxby_svc_weights.nii') from nilearn import image from nilearn.plotting import plot_stat_map, show import nibabel as nib
tmp = cls_all[j][band] data_cls.append( np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0)) data_pln = [] for j in range(len(pln_all)): tmp = pln_all[j][band] data_pln.append( np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0)) data_cls = np.asarray(data_cls) data_pln = np.asarray(data_pln) X = np.vstack([data_cls, data_pln]) y = np.concatenate([np.zeros(len(data_cls)), np.ones(len(data_pln))]) cv = StratifiedKFold(y, n_folds=6, shuffle=True) model = joblib.load(source_folder + "graph_data/sk_models/eigen_ada_pln_%s.plk" % band) score, perm_scores, pval = permutation_test_score(model, X, y, cv=cv, n_permutations=5000, n_jobs=1) result = {"score": score, "perm_scores": perm_scores, "pval": pval} results_all[band] = result np.save(source_folder + "graph_data/perm_test_eigen_pln.npy", results_all)
X = iris.data y = iris.target n_classes = np.unique(y).size # Some noisy data not correlated random = np.random.RandomState(seed=0) E = random.normal(size=(len(X), 2200)) # Add noisy data to the informative features for make the task harder X = np.c_[X, E] svm = SVC(kernel="linear") cv = StratifiedKFold(y, 2) score, permutation_scores, pvalue = permutation_test_score( svm, X, y, scoring="accuracy", cv=cv, n_permutations=100, n_jobs=1 ) print("Classification score %s (pvalue : %s)" % (score, pvalue)) ############################################################################### # View histogram of permutation scores pl.hist(permutation_scores, 20, label="Permutation scores") ylim = pl.ylim() # BUG: vlines(..., linestyle='--') fails on older versions of matplotlib # pl.vlines(score, ylim[0], ylim[1], linestyle='--', # color='g', linewidth=3, label='Classification Score' # ' (pvalue %s)' % pvalue) # pl.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--', # color='k', linewidth=3, label='Luck') pl.plot(2 * [score], ylim, "--g", linewidth=3, label="Classification Score" " (pvalue %s)" % pvalue)
data_pln = [] for j in range(len(pln_all)): tmp = pln_all[j][band] data_pln.append( np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0)) data_cls = np.asarray(data_cls) data_pln = np.asarray(data_pln) X = np.vstack([data_cls, data_pln]) y = np.concatenate([np.zeros(len(data_cls)), np.ones(len(data_pln))]) cv = StratifiedKFold(y, n_folds=6, shuffle=True) model = joblib.load(source_folder + "graph_data/sk_models/path-strength_ada_%s_pln.plk" % band) score, perm_scores, pval = permutation_test_score(model, X, y, cv=cv, n_permutations=10000, n_jobs=1, verbose=2) result = {"score": score, "perm_scores": perm_scores, "pval": pval} results_all[band] = result np.save(source_folder + "graph_data/perm_test_path-strength_pln.npy", results_all)
def search_all( log_dir="data/step4/left_hemi_select_rois", conn_filter_fn=lambda conn: np.all([ i['name'] in get_jhu_names("data/jhu_rois_left_adjusted.csv") for i in all_jhu_coordinates()[conn].itervalues() ])): def _log_dir(f_name): import os return os.path.join(log_dir, f_name) import logging logging.basicConfig(filename=_log_dir('search_results.log'), level=logging.DEBUG, filemode='w+') data_types = {'atw': ['z'], 'adw': ['z']} for data_type in data_types: full = pd.read_csv("data/step3/full_%s.csv" % data_type) if conn_filter_fn is not None: full = filter_roi_conns(full, conn_filter_fn) for target_col in data_types[data_type]: target_col = data_type + '_' + target_col print("about to process: %s" % target_col) logger = logging.getLogger(target_col) logger.info("results for %s" % target_col) search = run(full, target_col) search_normalize = run(full, target_col, normalize=True) (search, normalized) = (search, "no") if search.best_score_ > search_normalize.best_score_ \ else (search_normalize, "yes") logger.info("normalized: %s" % normalized) logger.info("best score: %s" % search.best_score_) logger.info("best params: %s" % search.best_params_) data, target = separate(full, target_col) best_svr = search.best_estimator_.named_steps['svr'] best_svr.reset_perm_coefs() def save_csv(desc, arr): f_name = _log_dir('%s_%s.csv' % (target_col, desc)) np.savetxt(f_name, arr, delimiter=',') save_csv('best_coefs', best_svr.coef_) score, permutation_pred_scores, p_value = permutation_test_score( search.best_estimator_, data.get_values(), target.get_values(), scoring=search.scoring, cv=search.cv, n_permutations=100) logger.info("best score perms: %s" % score) save_csv('permute_pred_scores', permutation_pred_scores) save_csv('permute_max_coefs', best_svr.permute_max_coefs()) save_csv('permute_min_coefs', best_svr.permute_min_coefs()) logger.info("p-value: %s" % p_value) if p_value >= .05: logger.warn("p_value of %s >= .05") train_sizes, train_scores, test_scores = learning_curve( search.best_estimator_, data.get_values(), target.get_values(), cv=search.cv, train_sizes=np.linspace(.1, 1.0, 5)) save_csv("learning_curve_train_sizes", train_sizes) save_csv("learning_curve_train_scores", train_scores) save_csv("learning_curve_test_scores", test_scores)
clf = RandomForestClassifier(n_estimators=500, max_features=None) elif args.clf == "GradientBoostingClassifier": clf = GradientBoostingClassifier( n_estimators=100, learning_rate=1.0, max_depth=1, random_state=prng ) else: raise ValueError("--clf not understood") # Go acc, perm, p = permutation_test_score( clf, X, y, score_func=None, cv=cv, n_permutations=args.null, n_jobs=5, labels=None, random_state=prng, verbose=0, scoring="accuracy" ) # Save f = open(args.o[0], "a") f.write("{0},{1},{2},{3}\n".format( np.round(acc, decimals=3), np.round(np.mean(perm), decimals=3), np.round(p, decimals=4), args.name) ) f.close()
def do_session( ds, clf=SVC(kernel="linear", probability=True), scoring=score, targets="quantized_distance", n_jobs=1, learning_curve=False, permutation_test=False, ): ds.sa["chunks"] = ["{}:{}".format(sid, scan) for sid, scan in zip(ds.sa["session_id"], ds.sa["run"])] ds.sa["targets"] = ds.sa[targets] # fixme: do wiener filter here from mvpa2.mappers.detrend import PolyDetrendMapper detrender = PolyDetrendMapper(polyord=1, chunks_attr="chunks") ds = ds.get_mapped(detrender) ds = ds[numpy.logical_not(numpy.logical_or(ds.sa.move, ds.sa.cue)), :] if ds.nfeatures > 3000: fs = SelectKBest(k=3000) fs.fit(ds.samples, ds.sa.search > 0) ds = ds[ds.sa.search > 0, :] if ds.nfeatures > 3000: ds = ds[:, fs.get_support()] logger.info("Configuring cross validation") cv = StratifiedKFold(ds.sa.quantized_distance, n_folds=6) # FIXME: make this a function parameter logger.info("Beginning cross validation") scores = cross_val(clf, ds.samples, ds.targets, cv, scoring) if learning_curve: from sklearn.learning_curve import learning_curve logger.info("Beginning learning curve analysis") train_sizes_abs, train_scores, test_scores = learning_curve( clf, ds.samples, ds.targets, n_jobs=n_jobs, verbose=50, scoring="accuracy" ) if permutation_test: logger.info("Beginning permutation test") score, permutation_scores, pvalue = permutation_test_score( clf, ds.samples, ds.targets, cv=cv, n_jobs=n_jobs, verbose=50, scoring="accuracy" ) result = {} result["datetime"] = datetime.datetime.now() if ds.nfeatures > 3000: result["fs"] = fs result["mapper"] = ds.mapper # result['clf'] = clf # result['cv'] = cv # result['scoring'] = scoring result["scores"] = scores if learning_curve: result["learning_curve"] = (train_sizes_abs, train_scores, test_scores) else: result["learning_curve"] = None if permutation_test: result["pvalue"] = pvalue else: result["pvalue"] = None return result
def search_all(log_dir="data/step4/left_hemi_select_rois", conn_filter_fn=lambda conn: np.all( [i['name'] in get_jhu_names("data/jhu_rois_left_adjusted.csv") for i in all_jhu_coordinates()[conn].itervalues()]) ): def _log_dir(f_name): import os return os.path.join(log_dir, f_name) import logging logging.basicConfig(filename=_log_dir('search_results.log'), level=logging.DEBUG, filemode='w+') data_types = {'atw': ['z'], 'adw': ['z']} for data_type in data_types: full = pd.read_csv("data/step3/full_%s.csv" % data_type) if conn_filter_fn is not None: full = filter_roi_conns(full, conn_filter_fn) for target_col in data_types[data_type]: target_col = data_type + '_' + target_col print("about to process: %s" % target_col) logger = logging.getLogger(target_col) logger.info("results for %s" % target_col) search = run(full, target_col) search_normalize = run(full, target_col, normalize=True) (search, normalized) = (search, "no") if search.best_score_ > search_normalize.best_score_ \ else (search_normalize, "yes") logger.info("normalized: %s" % normalized) logger.info("best score: %s" % search.best_score_) logger.info("best params: %s" % search.best_params_) data, target = separate(full, target_col) best_svr = search.best_estimator_.named_steps['svr'] best_svr.reset_perm_coefs() def save_csv(desc, arr): f_name = _log_dir('%s_%s.csv' % (target_col, desc)) np.savetxt(f_name, arr, delimiter=',') save_csv('best_coefs', best_svr.coef_) score, permutation_pred_scores, p_value = permutation_test_score( search.best_estimator_, data.get_values(), target.get_values(), scoring=search.scoring, cv=search.cv, n_permutations=100 ) logger.info("best score perms: %s" % score) save_csv('permute_pred_scores', permutation_pred_scores) save_csv('permute_max_coefs', best_svr.permute_max_coefs()) save_csv('permute_min_coefs', best_svr.permute_min_coefs()) logger.info("p-value: %s" % p_value) if p_value >= .05: logger.warn("p_value of %s >= .05") train_sizes, train_scores, test_scores = learning_curve( search.best_estimator_, data.get_values(), target.get_values(), cv=search.cv, train_sizes=np.linspace(.1, 1.0, 5)) save_csv("learning_curve_train_sizes", train_sizes) save_csv("learning_curve_train_scores", train_scores) save_csv("learning_curve_test_scores", test_scores)
k=60 feature_selection = SelectKBest(f_classif, k=k) pipeline_anova = Pipeline([('anova', feature_selection), ('scale', scaler),('classif_name', svm)]) pipeline = Pipeline([('scale', scaler),('classif_name', svm)]) grid = GridSearchCV(pipeline_anova, param_grid={'anova__k':[20,60,100,200]}, verbose=1) gr=GraphTransformer(rest=rest, coords=coords, kind='mixed', method='correlation',spars=0.5,geo_alpha=0.00015) param = [ {'graph__kind': ['geometric'], 'graph__method':['distance'],'graph__spars':[0.,0.5],'graph__geo_alpha':[0.00015]}, {'graph__kind': ['functional'], 'graph__method':['covariance','correlation'],'graph__spars':[0.1,0.3,0.5,0.7],'anova__k':[10,30,60,100,200]}, {'graph__kind': ['mixed'], 'graph__method':['covariance','correlation'],'graph__spars':[0.3,0.5,0.7]}, ] pipeline_graph_anova = Pipeline([('graph',gr),('anova', feature_selection), ('scale', scaler),('classif_name', svm)]) grid_graph = GridSearchCV(pipeline_graph_anova, param_grid=param, verbose=1) #nested_cv_scores = cross_val_score(grid, cond, y,cv=cv) #print("Nested CV score: %.4f" % np.mean(nested_cv_scores)) ######################## # Cat IMP/DES CROSS VALIDATION STIM cv = LeaveOneLabelOut(block) score_cv = cross_val_score(pipeline_anova, cond, y,cv=cv) null_score_cv= permutation_test_score(pipeline_anova, cond, y,cv=cv)#weights=pipeline_anova.named_steps['classif_name'].coef_ #plot_selectedregions(pipeline_anova,masker,weights=weights,anova_name='anova')
for train, test in cv.StratifiedKFold(pdata.classtype, 18): model = LinearRegression() model.fit(Xnew[train], y[train]) result.append([y[test], model.predict(Xnew[test])]) result_lsas = result y_true = []; y_pred = [] for a,b in result: y_true.extend(a.tolist()) y_pred.extend(b.tolist()) result = np.array(np.vstack((y_true, y_pred))).T # <codecell> value, distribution, pvalue = cv.permutation_test_score(LinearRegression(), Xnew, y, score_func=skm.mean_square_error, cv=cv.StratifiedKFold(pdata.classtype, 18), n_permutations=2000, ) # <codecell> hist(distribution, 32, alpha=0.5, color='gray') plot([value, value], [0,200], 'r') title('p=%.2f' % (1-pvalue)) xlabel('Mean square error') # <codecell> print np.corrcoef(result.T) Rmodel(result.T[0], result.T[1])
iris = datasets.load_iris() X = iris.data y = iris.target n_classes = np.unique(y).size # Some noisy data not correlated random = np.random.RandomState(seed=0) E = random.normal(size=(len(X), 2200)) # Add noisy data to the informative features for make the task harder X = np.c_[X, E] svm = SVC(kernel='linear') cv = StratifiedKFold(y, 2) score, permutation_scores, pvalue = permutation_test_score( svm, X, y, zero_one_score, cv=cv, n_permutations=100, n_jobs=1) print "Classification score %s (pvalue : %s)" % (score, pvalue) ############################################################################### # View histogram of permutation scores pl.hist(permutation_scores, 20, label='Permutation scores') ylim = pl.ylim() # BUG: vlines(..., linestyle='--') fails on older versions of matplotlib #pl.vlines(score, ylim[0], ylim[1], linestyle='--', # color='g', linewidth=3, label='Classification Score' # ' (pvalue %s)' % pvalue) #pl.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--', # color='k', linewidth=3, label='Luck') pl.plot(2 * [score], ylim, '--g', linewidth=3, label='Classification Score'
_, pdata = get_subject_data(X) X = pdata.subject y = pdata.lsas_pre - pdata.lsas_post n_subjects, = X.shape """ result = [] for train, test in cv.StratifiedKFold(pdata.classtype, 18): model = BrainReg().fit(X[train], y[train]) result.append((y[test], model.predict(X[test]))) """ value, distribution, pvalue = cv.permutation_test_score(BrainReg(), X, y, skm.mean_square_error, cv=cv.StratifiedKFold( pdata.classtype, 18), n_permutations=200, n_jobs=4) print distribution print value print pvalue plt.figure() plt.hist(distribution, 128) plt.plot([value, value], [0, 50], color='r') plt.title('p = %.3f' % pvalue) plt.savefig(os.path.join(outdir,"permtest_hist.png"),dpi=100,format="png") #model, varidx, labels, nlabels = _fit(X, y, pdata.lsas_pre[:,None])
#cv_scores = cross_val_score(svc, fmri_masked, target, cv=cv, n_jobs=-1, verbose=10) session_label = labels['chunks'] session_label = session_label[condition_mask] cv = LeaveOneLabelOut(labels=session_label) cv_scores_one = cross_val_score(svc, fmri_masked, target, cv=cv) #使用F1评分 #cv_scores = cross_val_score(svc, fmri_masked, target, cv=cv, scoring='f1') #计算平均分类准确率 classification_accuracy = np.mean(cv_scores) classification_accuracy_one = np.mean(cv_scores_one) #计算随机分类器的交叉验证得分 null_cv_scores = cross_val_score(DummyClassifier(), fmri_masked, target, cv=cv) #置换检验 null_cv_scores_2 = permutation_test_score(svc, fmri_masked, target, cv=cv) # Retrieve the SVC discriminating weights coef_ = svc.coef_ # Reverse masking thanks to the Nifti Masker coef_img = nifti_masker.inverse_transform(coef_) # Save the coefficients as a Nifti image coef_img.to_filename('haxby_svc_weights.nii') from nilearn import image from nilearn.plotting import plot_stat_map, show import nibabel as nib # Plot the mean image because we have no anatomic data
X = np.array(X) y = np.array(y) base_pipe = Pipeline([('saxizer', SAXTransformer(points_per_symbol=1)), ('features', FeatureUnion([('countvect', CountVectorizer(min_df=1, analyzer='char', ngram_range=(1, 10))), ('tfidfvect', TfidfVectorizer(min_df=1, analyzer='char', ngram_range=(1, 2)))])), ('svc', svm.LinearSVC())]) bop_pipe = Pipeline([('saxizer', SAXTransformer(points_per_symbol=1)), ('features', FeatureUnion([('countvect', CountVectorizer(min_df=1, analyzer='char', ngram_range=(1, 10))), ('tfidfvect', TfidfVectorizer(min_df=1, analyzer='char', ngram_range=(1, 2)))])), ('svc', svm.LinearSVC())]) for i in [bop_pipe, base_pipe]: score, permutation_scores, pvalue = permutation_test_score( i, X, y, scoring="accuracy", cv=StratifiedKFold(y, 2), n_permutations=5, n_jobs=4) print("Score %s (pvalue : %s)" % (score, pvalue)) # svm_pipe = Pipeline([('svc', clf)]) # score, permutation_scores, pvalue = permutation_test_score( # svm_pipe, X, y, scoring="accuracy", cv=StratifiedKFold(y, 2), n_permutations=100, n_jobs=4) # print("Baseline Classification score %s (pvalue : %s)" % (score, pvalue)) # X2 = np.array([SAX(i).sax() for i in X]) # svm_pipe = Pipeline([('svc', clf)]) # score, permutation_scores, pvalue = permutation_test_score( # svm_pipe, X2, y, scoring="accuracy", cv=StratifiedKFold(y, 2), n_permutations=100, n_jobs=1) # print("Baseline Classification score %s (pvalue : %s)" % (score, pvalue))
def do_session(ds, clf=SVC(kernel='linear', probability=True), scoring=score, targets='quantized_distance', n_jobs=1, n_features=3000, learning_curve=False, permutation_test=False): ds.sa['chunks'] = [ '{}:{}'.format(sid, scan) for sid, scan in zip(ds.sa['session_id'], ds.sa['run']) ] ds.sa['targets'] = ds.sa[targets] #fixme: do wiener filter here from mvpa2.mappers.detrend import PolyDetrendMapper detrender = PolyDetrendMapper(polyord=1, chunks_attr='chunks') ds = ds.get_mapped(detrender) ds = ds[numpy.logical_not(numpy.logical_or(ds.sa.move, ds.sa.cue)), :] if ds.nfeatures > n_features: fs = SelectKBest(k=n_features) fs.fit(ds.samples, ds.sa.search > 0) ds = ds[ds.sa.search > 0, :] if ds.nfeatures > n_features: ds = ds[:, fs.get_support()] logger.info('Configuring cross validation') cv = StratifiedKFold(ds.sa.quantized_distance, n_folds=6) #FIXME: make this a function parameter logger.info('Beginning cross validation') scores = cross_val(clf, ds.samples, ds.targets, cv, scoring) if learning_curve: from sklearn.learning_curve import learning_curve logger.info('Beginning learning curve analysis') train_sizes_abs, train_scores, test_scores = learning_curve( clf, ds.samples, ds.targets, n_jobs=n_jobs, verbose=50, scoring='accuracy') if permutation_test: logger.info('Beginning permutation test') score, permutation_scores, pvalue = permutation_test_score(clf, ds.samples, ds.targets, cv=cv, n_jobs=n_jobs, verbose=50, scoring='accuracy') result = {} result['datetime'] = datetime.datetime.now() if ds.nfeatures > n_features: result['fs'] = fs result['mapper'] = ds.mapper #result['clf'] = clf #result['cv'] = cv #result['scoring'] = scoring result['scores'] = scores if learning_curve: result['learning_curve'] = (train_sizes_abs, train_scores, test_scores) else: result['learning_curve'] = None if permutation_test: result['pvalue'] = pvalue else: result['pvalue'] = None return result
iris = datasets.load_iris() X = iris.data y = iris.target n_classes = np.unique(y).size # Some noisy data not correlated random = np.random.RandomState(seed=0) E = random.normal(size=(len(X), 2200)) # Add noisy data to the informative features for make the task harder X = np.c_[X, E] svm = SVC(kernel='linear') cv = StratifiedKFold(y, 2) score, permutation_scores, pvalue = permutation_test_score( svm, X, y, scoring="accuracy", cv=cv, n_permutations=100, n_jobs=1) print("Classification score %s (pvalue : %s)" % (score, pvalue)) ############################################################################### # View histogram of permutation scores pl.hist(permutation_scores, 20, label='Permutation scores') ylim = pl.ylim() # BUG: vlines(..., linestyle='--') fails on older versions of matplotlib #pl.vlines(score, ylim[0], ylim[1], linestyle='--', # color='g', linewidth=3, label='Classification Score' # ' (pvalue %s)' % pvalue) #pl.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--', # color='k', linewidth=3, label='Luck') pl.plot(2 * [score], ylim, '--g', linewidth=3, label='Classification Score'
def complex_networks_mapping_uri_data(directory): """ Parameters ---------- directory: string The path of the directory containing all data files. """ # Computing the graph encoding graphs = [] classes = [] subjects = [] vects = [] # have 100 graphs already built niter = 100 for subjid in ['pandit', 'ctrl']: thresh_dens = '0.1' for n in range(niter): subj_name = '%s_%d' % (subjid, n) g_name = 'iter%d.a.%s.dens_%s.edgelist.gz' % \ (n, subjid, thresh_dens) el = nx.read_edgelist(os.path.join(directory, g_name), nodetype=int) g = nx.Graph() # there are 148 regions, or nodes g.add_nodes_from(range(148)) g.add_edges_from(el.edges()) graphs.append(g) subjects.append(subj_name) classes.append(subjid) vects.append(complex_network_mapping(graphs[-1])) print "Graph built for subject %s and class %s." % \ (subj_name, subjid) # Reordering data for the leave-one-subject-out cross-validation nm_graphs = [None] * len(graphs) nm_classes = [None] * len(classes) nm_subjects = [None] * len(subjects) nm_vects = [None] * len(vects) for i in range(len(graphs) / 2): nm_graphs[i*2] = graphs[i] nm_graphs[i*2 + 1] = graphs[(len(graphs) / 2) + i] nm_classes[i*2] = classes[i] nm_classes[i*2 + 1] = classes[(len(classes) / 2) + i] nm_subjects[i*2] = subjects[i] nm_subjects[i*2 + 1] = subjects[(len(subjects) / 2) + i] nm_vects[i*2] = vects[i] nm_vects[i*2 + 1] = vects[(len(vects) / 2) + i] print nm_subjects print nm_classes nm_vects = np.array(nm_vects) # nm_vects = np.where(nm_vects == inf, 10, nm_vects) # nm_vects = np.where(nm_vects == nan, 10, nm_vects) ss = StandardScaler() X = ss.fit_transform(nm_vects) print X print np.mean(X) print np.max(X) tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}] tuned_parameters2 = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}, {'kernel': ['sigmoid'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}] from sklearn.grid_search import GridSearchCV clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=KFold(len(nm_classes), niter, shuffle=False)) clf2 = GridSearchCV(SVC(C=1), tuned_parameters2, cv=KFold(len(nm_classes), niter, shuffle=False)) clf.fit(X, np.array(nm_classes)) clf.best_params_ clf = SVC(C=100, kernel='linear') print "Now getting cross validation " cvr = SVC(C=1000, gamma=.001, kernel='rbf') cv_scores = cross_val_score(cvr, X, np.array(nm_classes), cv=KFold(len(nm_classes), niter, shuffle=False)) cv_scores = cross_val_score(cvr, X, np.array(nm_classes), cv=KFold(len(nm_classes), niter, shuffle=False)) cv_scores = cross_val_score(clf, X, np.array(nm_classes), cv=KFold(len(nm_classes), niter, shuffle=False)) from sklearn.linear_model import SGDClassifier clfGD = SGDClassifier(loss='log') clfGD.fit(X, np.array(nm_classes)) cv_scores = cross_val_score(clfGD, X, np.array(nm_classes), cv=KFold(len(nm_classes), niter, shuffle=False)) print cv_scores print np.mean(cv_scores) print("Accuracy: %0.2f (+/- %0.2f)" % (cv_scores.mean(), cv_scores.std() * 2)) from sklearn.dummy import DummyClassifier null_scores = cross_val_score(DummyClassifier(), X, np.array(nm_classes), cv=KFold(len(nm_classes), niter, shuffle=False)) print null_scores.mean() from sklearn.cross_validation import permutation_test_score null_scores_perm = permutation_test_score(cvr, X, np.array(nm_classes), cv=KFold(len(nm_classes), niter, shuffle=False)) print null_scores_perm.mean()
data_cls = [] for j in range(len(cls_all)): tmp = cls_all[j][band] data_cls.append( np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0)) data_pln = [] for j in range(len(pln_all)): tmp = pln_all[j][band] data_pln.append( np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0)) data_cls = np.asarray(data_cls) data_pln = np.asarray(data_pln) X = np.vstack([data_cls, data_pln]) y = np.concatenate([np.zeros(len(data_cls)), np.ones(len(data_pln))]) cv = StratifiedKFold(y, n_folds=6, shuffle=True) model = joblib.load(source_folder + "graph_data/sk_models/path-strength_ada_%s_pln.plk" % band) score, perm_scores, pval = permutation_test_score( model, X, y, cv=cv, n_permutations=10000, n_jobs=1, verbose=2) result = {"score": score, "perm_scores": perm_scores, "pval": pval} results_all[band] = result np.save(source_folder + "graph_data/perm_test_path-strength_pln.npy", results_all)
import numpy as np from sklearn import linear_model from sklearn.cross_validation import StratifiedKFold, permutation_test_score from sklearn import datasets X, y = datasets.make_classification(n_samples=100, n_features=5) n_classes = np.unique(y).size cls = linear_model.LogisticRegression() cv = StratifiedKFold(y, 2) score, permutation_scores, pvalue = permutation_test_score(cls, X, y, scoring="f1", cv=cv, n_permutations=10, n_jobs=1) print("Classification score %s (pvalue : %s)" % (score, pvalue)) print("Permutation scores %s" % (permutation_scores))
for k, band in enumerate(bands.keys()): data_cls = [] for j in range(len(cls_all)): tmp = cls_all[j][band] data_cls.append( np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0)) data_pln = [] for j in range(len(pln_all)): tmp = pln_all[j][band] data_pln.append( np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0)) data_cls = np.asarray(data_cls) data_pln = np.asarray(data_pln) X = np.vstack([data_cls, data_pln]) y = np.concatenate([np.zeros(len(data_cls)), np.ones(len(data_pln))]) cv = StratifiedKFold(y, n_folds=6, shuffle=True) model = joblib.load(source_folder + "graph_data/sk_models/eigen_ada_pln_%s.plk" % band) score, perm_scores, pval = permutation_test_score( model, X, y, cv=cv, n_permutations=5000, n_jobs=1) result = {"score": score, "perm_scores": perm_scores, "pval": pval} results_all[band] = result np.save(source_folder + "graph_data/perm_test_eigen_pln.npy", results_all)
#print "The different cross_scores: ", cross_score_LDA #### Naive bayes #### from sklearn.naive_bayes import GaussianNB ngb = GaussianNB() cross_score_NB = cross_val_score(ngb, X_scl, y, scoring="accuracy", cv = loo, n_jobs = 8, verbose = True) print "Cross val score: ", cross_score_NB.mean() print "The different cross_scores: ", cross_score_NB score_NB, permutation_score_NB, pvalue_NB = permutation_test_score(ngb, X_scl, y, scoring="accuracy", cv = cv, n_permutations = 2000, n_jobs = n_jobs, verbose = True) print 'Classification score:', score_NB, 'p-value:', pvalue_NB #### SVM #### from sklearn.svm import LinearSVC svc = LinearSVC() cross_score_SVM = cross_val_score(svc, X_scl, y, scoring="accuracy", cv = loo, n_jobs = 8, verbose = True) print "Cross val score: ", cross_score_SVM.mean() print "The different cross_scores: ", cross_score_SVM score_SVM, permutation_score_SVM, pvalue_SVM = permutation_test_score(svc, X, y,
n_classes = np.unique(y).size # Some noisy data not correlated random = np.random.RandomState(seed=0) E = random.normal(size=(len(X), 2200)) # Add noisy data to the informative features for make the task harder X = np.c_[X, E] svm = SVC(kernel='linear') cv = StratifiedKFold(y, 2) score, permutation_scores, pvalue = permutation_test_score(svm, X, y, zero_one_score, cv=cv, n_permutations=100, n_jobs=1) print "Classification score %s (pvalue : %s)" % (score, pvalue) ############################################################################### # View histogram of permutation scores pl.hist(permutation_scores, 20, label='Permutation scores') ylim = pl.ylim() # BUG: vlines(..., linestyle='--') fails on older versions of matplotlib #pl.vlines(score, ylim[0], ylim[1], linestyle='--', # color='g', linewidth=3, label='Classification Score' # ' (pvalue %s)' % pvalue) #pl.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',