plot_decision_function(X, y, clf, ax[0], '(a)') clf = make_pipeline(SMOTE(random_state=0), LinearSVC(random_state=0)) clf.fit(X, y) plot_decision_function(X, y, clf, ax[1], '(b)') fig.tight_layout() plt.savefig( join(analysis_path, 'resampling_decision_function.pdf'), bbox_inches='tight', pad_inches = 0 ) if __name__=='__main__': data_path, results_path, analysis_path = generate_paths() # load datasets datasets = load_datasets(data_dir=data_path) # load results results = [] for name in RESULTS_NAMES: file_path = join(results_path, f'{name}.pkl') results.append(pd.read_pickle(file_path)) # combine and select results results = combine_results(*results) results = select_results(results, oversamplers_names=OVRS_NAMES, classifiers_names=CLFS_NAMES) # datasets description
('KNN', KNeighborsClassifier(), {'n_neighbors': [3, 5, 8]}), ('RF', RandomForestClassifier(), {'max_depth': [None, 3, 6], 'n_estimators': [50, 100, 200]}) ], 'scoring': ['accuracy', 'f1_macro', 'geometric_mean_score_macro'], 'n_splits': 5, 'n_runs': 3, 'rnd_seed': 0, 'n_jobs': -1 } if __name__ == '__main__': # Extract paths data_dir, results_dir, _ = generate_paths() # Load datasets datasets = load_datasets(data_dir=data_dir) # Extract oversamplers oversamplers = CONFIG['oversamplers'] # Generate oversamplers for oversampler in oversamplers: # Define and fit experiment experiment = ImbalancedExperiment( oversamplers=[oversampler], classifiers=CONFIG['classifiers'], scoring=CONFIG['scoring'],
], 'scoring': ['accuracy', 'f1_macro', 'geometric_mean_score_macro'], 'n_splits': 5, 'n_runs': 3, 'rnd_seed': 0, 'n_jobs': -1 } if __name__ == '__main__': # Extract paths data_path, results_path, _ = generate_paths() # Load lucas dataset datasets = load_datasets(data_path=data_path, data_type='csv') # Extract oversamplers oversamplers = CONFIG['oversamplers'] # Generate oversamplers for oversampler in oversamplers: # Define and fit experiment experiment = ImbalancedExperiment( oversamplers=[oversampler], classifiers=CONFIG['classifiers'], scoring=CONFIG['scoring'],
def createGens(input_example=INPUT_EXAMPLE, output_example=OUTPUT_EXAMPLE, shape=(256, 256), batch_size=32, split=0.2): ''' Denotes and returns Train and Validation genrators ... ''' input_root, input_suffix, output_root, output_suffix, output_separator = generate_paths( input_example, output_example) print("Input: ", input_root) print("Output root: ", output_root) image_paths = glob(os.path.join(input_root, "*" + input_suffix)) print("-- #Image_paths: ", len(image_paths)) mask_paths = glob(os.path.join(output_root, "*" + output_suffix)) print("-- #Mask_paths: ", len(mask_paths)) if MASK_PATHS != "": # update mask paths to the dedicated ones if it is denoted ... with open(MASK_PATHS, 'r') as f: dedicated_mask_paths = json.load(f) attribute = OUTPUT_EXAMPLE.split(output_separator)[-1].split('.')[0] mask_paths = dedicated_mask_paths[attribute] print(" -- However we gonna consume only ", len(mask_paths)) if PREPROPROCESSING_MASK_EXAMPLE: # TODO: we care only about preprocessing_output and suffix _, _, preprocessing_root, preprocessing_suffix, _ = generate_paths \ (INPUT_EXAMPLE, PREPROPROCESSING_MASK_EXAMPLE) preprocessing_paths = glob(os.path.join(preprocessing_root, "*"+\ preprocessing_suffix)) preprocessing_ids = list(map(lambda path: path.split('/')[-1]\ .split(preprocessing_suffix)[0], \ preprocessing_paths)) else: preprocessing_paths = [] # apparently not all images have a corresponding mask so we need the intersection of those image_ids = list(map(lambda x: x.split('/')[-1].split(input_suffix)[0], \ image_paths)) mask_ids = list(map(lambda x: x.split('/')[-1].split(output_suffix)[0], \ mask_paths)) if preprocessing_paths: intersection = list(set(image_ids) & set(mask_ids) & \ set(preprocessing_ids)) preprocessing_paths = list(map(lambda x: os.path.join( \ preprocessing_root, x+preprocessing_suffix), \ intersection)) else: intersection = list(set(image_ids) & set(mask_ids)) image_paths = list(map(lambda x: os.path.join( \ input_root, x + input_suffix), \ intersection)) mask_paths = list(map(lambda x: os.path.join( \ output_root, x + output_suffix), \ intersection)) # output_suffix is whatever is placed after the id ... dataset_len = len(image_paths) train_image_paths = image_paths[:floor(dataset_len * (1 - split))] train_mask_paths = mask_paths[:floor(dataset_len * (1 - split))] if preprocessing_paths: train_preprocessing_paths = preprocessing_paths[:floor(dataset_len * (1 - split))] else: train_preprocessing_paths = [] validation_image_paths = image_paths[ceil(dataset_len * (1 - split)):] validation_mask_paths = mask_paths[ceil(dataset_len * (1 - split)):] if preprocessing_paths: validation_preprocessing_paths = preprocessing_paths[ceil(dataset_len * (1 - split)):] else: validation_preprocessing_paths = [] return Generator(train_image_paths, train_mask_paths, shape, batch_size, train_preprocessing_paths), Generator( validation_image_paths, validation_mask_paths, shape, batch_size, validation_preprocessing_paths)