### Define the dimension reduction to be used. # Here we use a classical univariate feature selection based on F-test, # namely Anova. We set the number of features to be selected to 500 feature_selection = SelectKBest(f_classif, k=500) ### We combine the dimension reduction and the prediction function anova_svc = Pipeline([('anova', feature_selection), ('svc', clf)]) ### Define the cross-validation scheme used for validation. # Here we use a LeaveOneLabelOut cross-validation on the session, which # corresponds to a leave-one-session-out cv = LeaveOneLabelOut(session) ### Compute the prediction accuracy for the different folds (i.e. session) cv_scores = cross_val_score(anova_svc, X, y, cv=cv, n_jobs=-1, verbose=1) ### Return the corresponding mean prediction accuracy classification_accuracy = np.mean(cv_scores) #### Same test using the supervised clustering #estimator = SVC(kernel='linear', C=1.) #A = grid_to_graph(n_x=img_shape[0], n_y=img_shape[1], n_z=img_shape[2], mask=mask) #print "computed connectivity matrix" #sc = SupervisedClusteringClassifier(estimator=estimator, connectivity=A, n_jobs=1, # cv=5, n_iterations=50, verbose=1) #cv_scores = cross_val_score(sc, X, y, cv=cv, n_jobs=4, verbose=1) # #sc.fit(X, y) #computed_coefs = sc.inverse_transform()
# We compute the score for each patient for i in range(6): # Using the data corresponding to the patient X = data[i].data y = data[i].target mask = data[i].mask img_shape = mask.shape X = X[:, mask!=0] # Binarizing y to perform classification y = y.astype(np.bool) # Computing connectivity matrix A = grid_to_graph(n_x=img_shape[0], n_y=img_shape[1], n_z=img_shape[2], mask=mask) estimator = SVC(kernel='linear', C=1.) sc = SupervisedClusteringClassifier(estimator=estimator, n_jobs=1, n_iterations=150, cv=6, connectivity=A, verbose=0) cv = StratifiedKFold(y, 10) print "Computing score for the patient %d on 6" % i cv_scores = cross_val_score(sc, X, y, cv=cv, n_jobs=8, verbose=0) sc.fit(X, y) print ". Classification score for patient %d : %f" % (i, np.mean(cv_scores)) print ". Number of parcels : %d" % len(np.unique(sc.labels_)) scores.append(np.mean(cv_scores)) print "====================================" print "Average score for the whole dataset : %f", np.mean(scores)
mask = data[i].mask img_shape = mask.shape X = X[:, mask != 0] # Binarizing y to perform classification y = y.astype(np.bool) # Computing connectivity matrix A = grid_to_graph(n_x=img_shape[0], n_y=img_shape[1], n_z=img_shape[2], mask=mask) estimator = SVC(kernel='linear', C=1.) sc = SupervisedClusteringClassifier(estimator=estimator, n_jobs=1, n_iterations=150, cv=6, connectivity=A, verbose=0) cv = StratifiedKFold(y, 10) print "Computing score for the patient %d on 6" % i cv_scores = cross_val_score(sc, X, y, cv=cv, n_jobs=8, verbose=0) sc.fit(X, y) print ". Classification score for patient %d : %f" % (i, np.mean(cv_scores)) print ". Number of parcels : %d" % len(np.unique(sc.labels_)) scores.append(np.mean(cv_scores)) print "====================================" print "Average score for the whole dataset : %f", np.mean(scores)