def main(): parser = get_input_arguments() # check for data directory if not os.path.isdir(parser.data_directory): print(f'Cannot locate data directory: {parser.data_directory}, please enter another directory.') exit(1) #if not os.path.isdir(parser.validation_directory): #print(f'Cannot locate data directory: {parser.validation_directory}, please enter another directory.') # check for save directory if not os.path.isdir(parser.save_dir): print(f'Creating directory: {parser.save_dir}') os.makedirs(parser.save_dir) device = set_device(parser.use_gpu) # Map categories to their respective names cat_to_name = load_json(parser.category_json) #with open(parser.category_json, 'r') as f: #cat_to_name = json.load(f) output_size = len(cat_to_name) print(f'There are {output_size} categories in the dataset, meaning an output layer of {output_size} units') '''data_dir = parser.data_directory train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' test_dir = data_dir + '/test' ''' train_transform, valid_transform, test_transform = transform_data() train_dataset, valid_dataset, test_dataset = load_datasets(parser.data_directory, train_transform, valid_transform, test_transform) #train_dataset, valide_dataset, test_dataset # takes data from train, validation and test set folders, performs transforms, loads sets with batch sizes trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True) validationloader = torch.utils.data.DataLoader(valid_dataset, batch_size=64) testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64) # pass architecture and hidden units as arguments, returns the loaded architecture model, classifier, optimizer and NLLLoss function model, criterion, optimizer, classifier = select_model(parser.hidden_units, output_size, parser.learnrate, device, parser.arch) train_model(device, parser.epochs, trainloader, validationloader, model, optimizer, criterion) test_model(device, testloader, model) save_checkpoint(parser.save_dir, train_dataset, model, classifier, optimizer, parser.epochs, parser.arch)
i = 0 epochs = 100 for epoch in range(epochs): i = i + 1 print(i) ## Split dataset to 80% training 20% test sets. x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True, stratify=y) sc = MinMaxScaler(feature_range=(0, 1)) x_train = sc.fit_transform(x_train) x_test = sc.transform(x_test) ## Define which model, parameters we want to tune and their range, and also the inner cross validation method(n_splits, n_repeats) model, param_grid, cv = select_model(models) ## Based on the chosen model, create a grid to search for the optimal model grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=cv, scoring='roc_auc', n_jobs=-1) ## Get the grid results and fit to training set grid_result = grid.fit(x_train, y_train) ## Print out the best model chosen in the grid print('Best model:', grid_result.best_estimator_) ## Print out the best hyper-parameters chosen in the grid print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) ## Calculate the AUC means and standard deviation for each hyper-parameters used during tuning. Print this out. means = grid_result.cv_results_['mean_test_score']
def collect(): name_list = [ "mnist", #"mnistcov" ] nr_of_epochs = 8000 record_all_flag = False rec_test_flag = True learning_r = [0.0004] save_data_flag = False show_flag = True separate_flag = False save_MI_and_plot_flag = False bin_size_or_nr = [True] bins = [0.01, 0.07, 0.15, 0.3] color_list = [ "red", "blue", "green", "orange", "purple", "brown", "pink", "teal", "goldenrod" ] models = [3, 4, 2, 1, 5, 6] for set_name in name_list: nrs = [3, 8, 1] #samples = "full" samples = 1600 seed(1337) set_random_seed(1337) X_train, X_test, y_train, y_test = data_selection.select_data( set_name, shuffle=True, samples_per_class=samples, list_of_nrs=nrs) batch_size = [256, X_train.shape[0], 128, 512] print("calculations starting for: ", set_name) for i in models: if (i <= 4) and ("cov" in set_name): continue if (i > 4) and ("cov" not in set_name): continue for batch in batch_size: seed(1337) set_random_seed(1337) # objects to record parameters outputs = classes.Outputs() # define and train model output_recording = LambdaCallback( on_epoch_end=lambda epoch, logs: Callbacks.record_activations( outputs, model, epoch, X_train, X_test, y_test, batch, record_all_flag, rec_test_flag)) model, architecture = model_selection.select_model( i, nr_of_epochs, set_name, X_train.shape, y_train) adam = optimizers.Adam(lr=learning_r) model.compile(loss="categorical_crossentropy", optimizer=adam, metrics=["accuracy"]) history = model.fit(X_train, y_train, epochs=nr_of_epochs, batch_size=batch, validation_split=0.2, callbacks=[output_recording]) # final model score score = model.evaluate(X_test, y_test, verbose=0) score = score[1] # save data common_name = architecture + "_lr_" + str( learning_r) + "_batchsize_" + str(batch) if "mnist" in set_name: common_name = str(samples) + str(nrs) + common_name aname = common_name + "_activations" outputs.model_score = score if save_data_flag == True: util.save(outputs, aname) hname = common_name + "_history" h_obj = history.history h_obj["model_score"] = score if save_data_flag == True: util.save(h_obj, hname) plotting.plot_history(h_obj, common_name, show_flag, save_MI_and_plot_flag) if rec_test_flag == True: plotting.plot_test_development(outputs.int_model_score, common_name, show_flag, save_MI_and_plot_flag) # compute binning MI for flag in bin_size_or_nr: for nr_of_bins in bins: if flag == True and nr_of_bins > 1: continue if flag == False and nr_of_bins < 1: continue seed(1337) set_random_seed(1337) est_type_flag = 1 info_plane.create_infoplane(common_name, X_train, y_train, outputs, est_type_flag, color_list, nr_of_bins, flag, show_flag, separate_flag, save_MI_and_plot_flag, par_flag=False) seed(1337) set_random_seed(1337) # compute EDGE if batch == 256: est_type_flag = 2 info_plane.create_infoplane( common_name, X_train, y_train, outputs, est_type_flag, color_list, show_flag, separate_flag, save_flag=save_MI_and_plot_flag, par_flag=True) seed(1337) set_random_seed(1337) # compute KDE upper est_type_flag = 3 info_plane.create_infoplane(common_name, X_train, y_train, outputs, est_type_flag, color_list, show_flag, separate_flag, save_flag=save_MI_and_plot_flag, par_flag=False) seed(1337) set_random_seed(1337) # compute KDE lower if batch == 256: est_type_flag = 4 info_plane.create_infoplane( common_name, X_train, y_train, outputs, est_type_flag, color_list, show_flag, separate_flag, save_flag=save_MI_and_plot_flag, par_flag=False) seed(1337) set_random_seed(1337) # compute KSG discrete est_type_flag = 5 info_plane.create_infoplane(common_name, X_train, y_train, outputs, est_type_flag, color_list, show_flag, separate_flag, save_flag=save_MI_and_plot_flag, par_flag=False)
i = 0 epochs = 25 for epoch in range(epochs): i = i + 1 print(i) ## Split dataset to 80% training 20% test sets. x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True) ## Scale the dataset by removing mean and scaling to unit variance sc = StandardScaler() x_train = sc.fit_transform(x_train) x_test = sc.transform(x_test) ## Define which model, parameters we want to tune and their range, and also the cross validation method(n_splits, n_repeats) model, param_grid, cv = select_model("L2_Logistic_Regression") ## Based on the chosen model, create a grid to search for the optimal model grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=cv, scoring="roc_auc", n_jobs=1) ## Get the grid results and fit to training set grid_result = grid.fit(x_train, y_train) print('Best C:', grid_result.best_estimator_.get_params()['C']) print('Best model:', grid_result.best_estimator_) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) means = grid_result.cv_results_['mean_test_score'] stds = grid_result.cv_results_['std_test_score'] params = grid_result.cv_results_['params']
def main(argv): """ This script will implement the training and testing of a deep learning model for the classification of Alzheimer's disease, based on structural MRI data. All settings and model parameters can be defined in the config.py configuration file. In short, data is split in a test, train and validation set. Cross-validation is performed for which in each fold data is augmented, normalized and a CNN is trained and evaluated. As output this script provides train, validation & test performance metrics, which are all stored in a dictionary called 'results.npy'. Plots of the performance per epoch and of the ROC-AUC of all folds will be provided. Also the configurations file will be saved, together with the model information. Live performance can be monitored through TensorBoard (only when running local) $ tensorboard --logdir=<results dir>/logs """ # start timer start = time.time() start_localtime = time.localtime() # if temp job dir is provided as input use this as data dir (server) if len(argv) > 1: config.data_dir = sys.argv[1] + "/" config.aug_dir = sys.argv[1] + "/augmented/" # if job nr is provided use this to define output dir (server) if len(argv) > 2: config.output_dir = f"{config.all_results_dir}{sys.argv[2]}_{config.roi}_{config.task}_{config.model}{config.comments}/" # save configuration file create_data_directory(config.output_dir) copyfile(config.config_file, f"{config.output_dir}configuration_{config.model}.py") # initialization of results dictionary results = { "train": { "loss": [], "acc": [], "fpr": [], "tpr": [], "auc": [], "sensitivity": [], "specificity": [] }, "validation": { "loss": [], "acc": [], "fpr": [], "tpr": [], "auc": [], "sensitivity": [], "specificity": [] }, "test": { "loss": [], "acc": [], "fpr": [], "tpr": [], "auc": [], "sensitivity": [], "specificity": [] } } # create labels partition_labels, labels = create_labels() # train test split partition_train_test = split_train_test(partition_labels, labels) np.save(config.output_dir + "train_test.npy", partition_train_test) # train val split partition_train_validation = split_train_val(partition_train_test, labels) np.save(config.output_dir + "train_val.npy", partition_train_validation) # START CROSS VALIDATION for i in range(config.k_cross_validation): # select model model = select_model(i) print("\n----------- CROSS VALIDATION " + str(i) + " ----------------\n") # augmentation of training data if config.augmentation: partition_train_validation["train"][i], labels = augmentation( partition_train_validation["train"][i], labels) count_sets(partition_train_validation, labels) # create results directory for fold results_dir = config.output_dir + "k" + str(i) create_data_directory(results_dir) file = open(results_dir + "/results.txt", 'w') # get mean + std of train data to standardize all data in generator if config.all_data: mean = np.load(config.mean_file) std = np.load(config.std_file) else: mean, std = standardization_matrix( partition_train_validation["train"][i]) # save mean + std np.save(results_dir + "/mean.npy", mean) np.save(results_dir + "/std.npy", std) # create data generators train_generator = DataGenerator(partition_train_validation["train"][i], labels, mean, std, batch_size=config.batch_size, dim=config.input_shape, n_channels=1, n_classes=2, shuffle=True) validation_generator = DataGenerator( partition_train_validation["validation"][i], labels, mean, std, batch_size=config.batch_size, dim=config.input_shape, n_channels=1, n_classes=2, shuffle=True) test_generator = DataGenerator(partition_train_test["test"][i], labels, mean, std, batch_size=1, dim=config.input_shape, n_channels=1, n_classes=2, shuffle=False) CM_train_generator = DataGenerator( partition_train_validation["train"][i], labels, mean, std, batch_size=1, dim=config.input_shape, n_channels=1, n_classes=2, shuffle=False) CM_validation_generator = DataGenerator( partition_train_validation["validation"][i], labels, mean, std, batch_size=1, dim=config.input_shape, n_channels=1, n_classes=2, shuffle=False) # set callbacks callback_list = callbacks_list(CM_train_generator, CM_validation_generator, labels, results_dir) if not config.test_only: # TRAINING history = model.fit_generator(generator=train_generator, validation_data=validation_generator, class_weight=None, callbacks=callback_list, epochs=config.epochs, verbose=1, use_multiprocessing=False, workers=0) # plot acc + loss plot_acc_loss(history, results_dir, i) # plot performance per epoch if config.epoch_performance: plot_epoch_performance(callback_list[0]) plot_epoch_performance(callback_list[1]) # load model of epoch with best performance model = load_best_model(results_dir) # TRAIN EVALUATION # roc auc Y_pred = model.predict_generator(CM_train_generator, verbose=0) y_pred = np.argmax(Y_pred, axis=1) y_true = [] for id in CM_train_generator.list_IDs: y_true.append(labels[id]) fpr, tpr, thresholds = roc_curve(y_true, Y_pred[:, 1]) roc_auc = auc(fpr, tpr) # save classification per subject (for statistical test) np.save(results_dir + "/train_IDs.npy", CM_train_generator.list_IDs) np.save(results_dir + "/train_y_true.npy", y_true) np.save(results_dir + "/train_y_pred.npy", y_pred) # sen / spe report = classification_report( y_true, y_pred, target_names=[config.class0, config.class1], output_dict=True) # loss, acc score = model.evaluate_generator(generator=train_generator, verbose=1) results["train"]["loss"].append(score[0]) results["train"]["acc"].append(score[1]) results["train"]["fpr"].append(fpr) results["train"]["tpr"].append(tpr) results["train"]["auc"].append(roc_auc) results["train"]["sensitivity"].append( report[config.class1]["recall"]) results["train"]["specificity"].append( report[config.class0]["recall"]) # report train results train_results = f"\nTrain\n loss: {score[0]:.4f}\n acc: {score[1]:.4f}\n AUC: {roc_auc:.4f}\n " \ f"sens: {report[config.class1]['recall']:.4f}\n spec: {report[config.class0]['recall']:.4f}\n\n" file.write(train_results), print(train_results) # VALIDATION EVALUATION # roc auc Y_pred = model.predict_generator(CM_validation_generator, verbose=0) y_pred = np.argmax(Y_pred, axis=1) y_true = [] for id in CM_validation_generator.list_IDs: y_true.append(labels[id]) fpr, tpr, thresholds = roc_curve(y_true, Y_pred[:, 1]) roc_auc = auc(fpr, tpr) # save classification per subject (for statistical test) np.save(results_dir + "/val_IDs.npy", CM_validation_generator.list_IDs) np.save(results_dir + "/val_y_true.npy", y_true) np.save(results_dir + "/val_y_pred.npy", y_pred) # sen / spe report = classification_report( y_true, y_pred, target_names=[config.class0, config.class1], output_dict=True) # loss, acc score = model.evaluate_generator(generator=validation_generator, verbose=1) results["validation"]["loss"].append(score[0]) results["validation"]["acc"].append(score[1]) results["validation"]["fpr"].append(fpr) results["validation"]["tpr"].append(tpr) results["validation"]["auc"].append(roc_auc) results["validation"]["sensitivity"].append( report[config.class1]["recall"]) results["validation"]["specificity"].append( report[config.class0]["recall"]) # report val results val_results = f"\nValidation\n loss: {score[0]:.4f}\n acc: {score[1]:.4f}\n AUC: {roc_auc:.4f}\n " \ f"sens: {report[config.class1]['recall']:.4f}\n spec: {report[config.class0]['recall']:.4f}\n\n" file.write(val_results), print(val_results) # TEST EVALUATION # roc auc Y_pred = model.predict_generator(test_generator, verbose=0) y_pred = np.argmax(Y_pred, axis=1) y_true = [] for id in test_generator.list_IDs: y_true.append(labels[id]) fpr, tpr, thresholds = roc_curve(y_true, Y_pred[:, 1]) roc_auc = auc(fpr, tpr) # save classification per subject (for statistical test) np.save(results_dir + "/test_IDs.npy", test_generator.list_IDs) np.save(results_dir + "/test_y_true.npy", y_true) np.save(results_dir + "/test_y_pred.npy", y_pred) # sen / spe report = classification_report( y_true, y_pred, target_names=[config.class0, config.class1], output_dict=True) # loss, acc score = model.evaluate_generator(generator=test_generator, verbose=1) results["test"]["loss"].append(score[0]) results["test"]["acc"].append(score[1]) results["test"]["fpr"].append(fpr) results["test"]["tpr"].append(tpr) results["test"]["auc"].append(roc_auc) results["test"]["sensitivity"].append(report[config.class1]["recall"]) results["test"]["specificity"].append(report[config.class0]["recall"]) # report test results test_results = f"\nTest\n loss: {score[0]:.4f}\n acc: {score[1]:.4f}\n AUC: {roc_auc:.4f}\n " \ f"sens: {report[config.class1]['recall']:.4f}\n spec: {report[config.class0]['recall']:.4f}\n\n" file.write(test_results), print(test_results) file.close() # delete augmented images if config.augmentation: os.system('rm -rf %s/*' % config.aug_dir) print("\n---------------------- RESULTS ----------------------\n\n") # plot test ROC of all folds + average + std plot_ROC(results["test"]["tpr"], results["test"]["fpr"], results["test"]["auc"]) # end timer end = time.time() end_localtime = time.localtime() # save results + model np.save(config.output_dir + "results.npy", results) save_DL_model(model) save_results(results, start, start_localtime, end, end_localtime) print('\nend')
if not os.path.exists(args.path_query_image): raise ValueError('The path to the source image you provide does not exist ! ') if not os.path.exists(args.path_reference_image): raise ValueError('The path to the target image you provide does not exist ! ') if not os.path.isdir(args.write_dir): os.makedirs(args.write_dir) try: query_image = imageio.imread(args.path_query_image) reference_image = imageio.imread(args.path_reference_image) query_image, reference_image = pad_to_same_shape(query_image, reference_image) except: raise ValueError('It seems that the path for the images you provided does not work ! ') with torch.no_grad(): network = select_model(args.model, args.pre_trained_model, args.optim_iter, local_optim_iter, path_to_pre_trained_models=args.pre_trained_models_dir) # convert numpy to torch tensor and put it in right shape query_image_ = torch.from_numpy(query_image).permute(2, 0, 1).unsqueeze(0) reference_image_ = torch.from_numpy(reference_image).permute(2, 0, 1).unsqueeze(0) # ATTENTION, here source and target images are Torch tensors of size 1x3xHxW, without further pre-processing # specific pre-processing (/255 and rescaling) are done within the function. # pass both images to the network, it will pre-process the images and ouput the estimated flow in dimension 1x2xHxW if args.flipping_condition and 'GLUNet' in args.model: estimated_flow = network.estimate_flow_with_flipping_condition(query_image_, reference_image_, mode='channel_first') else: estimated_flow = network.estimate_flow(query_image_, reference_image_, mode='channel_first') estimated_flow_numpy = estimated_flow.squeeze().permute(1, 2, 0).cpu().numpy() warped_query_image = remap_using_flow_fields(query_image, estimated_flow.squeeze()[0].cpu().numpy(),