N_FOLDS_EVAL = 3 else: N_FOLDS_NESTED = 5 N_FOLDS_EVAL = 10 OUT_DIR = os.path.join(DB_PATH, 'results', 'svm_feature_selection/') if not os.path.exists(OUT_DIR): os.makedirs(OUT_DIR) WF_NAME_PATTERN = "svm_feature_selection_{images}" ######################### # Oth step: access data # ######################### csv_file_name = data_api.get_clinic_file_path(DB_PATH) df = data_api.read_clinic_file(csv_file_name) babel_mask = nibabel.load(data_api.get_mask_file_path(DB_PATH)) mask = babel_mask.get_data() binary_mask = mask != 0 h5file = tables.openFile(LOCAL_PATH) #################### # Create workflows # #################### # Base workflow: SVM + feature selection svms = pipelines = epac.Methods(*[ epac.Pipe( sklearn.feature_selection.SelectKBest(
default=DEFAULT_WF_NAME, help='Name of the workflow (default: %s)' % (DEFAULT_WF_NAME)) args = parser.parse_args() if TEST_MODE: DB_PATH = '/volatile/DB/micro_subdepression/' LOCAL_PATH = '/volatile/DB/cache/micro_subdepression.hdf5' else: DB_PATH = '/neurospin/brainomics/2013_imagen_subdepression' LOCAL_PATH = '/volatile/DB/cache/imagen_subdepression.hdf5' # Compute a priori probabilities clinic_file_path = data_api.get_clinic_file_path(DB_PATH) df = data_api.read_clinic_file(clinic_file_path) N_SUBJECTS = float(df.shape[0]) counts = df['group_sub_ctl'].value_counts() N_CONTROL = float(counts['control']) P_CONTROL = N_CONTROL / N_SUBJECTS N_SUBDEP = float(counts['sub']) P_SUBDEP = N_SUBDEP / N_SUBJECTS OUT_DIR = os.path.join(DB_PATH, 'results', 'svm') WORKFLOW_PATH = os.path.join(OUT_DIR, args.wf_name) if not os.path.exists(WORKFLOW_PATH): raise Exception('{path} not found'.format(path=WORKFLOW_PATH)) svms_auto_cv = epac.map_reduce.engine.SomaWorkflowEngine.load_from_gui( WORKFLOW_PATH) print "Workflow loaded"