def main(param): data_path = param.data_path numbers = range(1, 7) names = ["ds", "ms", "mw"] X = [] y = [] for number in numbers: for name in names: for pathname in glob.glob(data_path + '/filmy_' + name + '_' + str(number) + '_l.mov/*'): landmark = utils.read_landmark( os.path.join(pathname, "landmark.txt")) if (landmark[0] != 0.0): X.append(landmark) y.append(number - 1) print(len(X), len(y)) X, y = shuffle(X, y, random_state=2) trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.3, random_state=4) model = KNeighborsClassifier() #model = GaussianNB() model.fit(trainX, trainY) dump(model, 'model.joblib') y_pred = model.predict(testX) class_report = classification_report(testY, y_pred) conf_matrix = confusion_matrix(testY, y_pred) conf_matrix_printed = utils.print_confusion_matrix(conf_matrix, testY) # with open("report_naive_bayes_gaussian.txt", "w") as f: with open("report_5_neighbours.txt", "w") as f: f.write(class_report) f.write('\n') f.write(conf_matrix_printed) print(class_report) print(conf_matrix)
for fnames, fundus_rescale_mean_subtract_lesions, grades in train_batch_fetcher( ): loss, acc = network.train_on_batch( fundus_rescale_mean_subtract_lesions, grades) losses += [loss] * fundus_rescale_mean_subtract_lesions.shape[0] accs += [acc] * fundus_rescale_mean_subtract_lesions.shape[0] utils.print_metrics(epoch + 1, training_loss=np.mean(losses), training_acc=np.mean(accs)) # evaluate on the validation set if epoch in validation_epochs: pred_grades, true_grades = [], [] for fnames, fundus_rescale_mean_subtract_lesions, grades in val_batch_fetcher( ): pred = network.predict(fundus_rescale_mean_subtract_lesions, batch_size=batch_size, verbose=0) pred_grades += pred.tolist() true_grades += grades.tolist() utils.print_confusion_matrix(true_grades, pred_grades, "DR") # save the weight if epoch in validation_epochs: network.save_weights( os.path.join(model_out_dir, "network_{}.h5".format(epoch + 1))) duration = time.time() - start_time print "duration for {}th epoch: {}s".format(epoch + 1, duration) sys.stdout.flush()
# get performance for each filtered dataset dataset_acc = dict() for dataset_name in dataset_path: path = dataset_path[dataset_name] dataset_for_test = dataset.listDataset(root_path=path, transform=transform, encoder=encoder, img_size=(img_width, img_height)) assert dataset_for_test print("num. " + str(dataset_name) + " data : " + str(len(dataset_for_test))) acc, mean_ed, conf_mat = \ get_performace(dataset_name, dataset_for_test, correct_dir, incorrect_dir) print('confusion mat for ' + str(dataset_name)) utils.print_confusion_matrix(conf_mat) conf_mat_path = os.path.join(result_dir, str(dataset_name) +'_conf_mat.txt') utils.write_confusion_matrix(conf_mat, conf_mat_path) print(str(dataset_name) + '. acc : ' + str(acc)) print(str(dataset_name) + '. MED : ' + str(mean_ed)) dataset_acc[dataset_name] = [acc, mean_ed] fout = open(os.path.join(result_dir, 'results.txt'), 'w') for iter_acc in dataset_acc: print("Acc. of " + str(iter_acc) + ": %.5f" % (dataset_acc[iter_acc][0])) print("MED. of " + str(iter_acc) + ": %.5f" % (dataset_acc[iter_acc][1])) fout.write("Acc. of " + str(iter_acc) + ": %.5f\n" % (dataset_acc[iter_acc][0])) fout.write("MED. of " + str(iter_acc) + ": %.5f\n" % (dataset_acc[iter_acc][1])) fout.close()
pred_grades, true_grades = [], [] for fnames, imgs_mean_subt, imgs_z, vessels, grades_onehot in val_batch_fetcher( ): if check_validation_batch: utils.check_input(imgs_mean_subt, imgs_z, vessels, val_img_check_dir) check_validation_batch = False pred = network.predict([imgs_mean_subt, imgs_z, vessels], batch_size=batch_size, verbose=0) pred_grades += np.argmax(pred, axis=1).tolist() true_grades += np.argmax(grades_onehot, axis=1).tolist() loss, acc = network.evaluate([imgs_mean_subt, imgs_z, vessels], grades_onehot, batch_size=batch_size, verbose=0) losses += [loss] * imgs_mean_subt.shape[0] accs += [acc] * imgs_mean_subt.shape[0] utils.print_metrics(epoch + 1, validation_loss=np.mean(losses), validation_acc=np.mean(accs)) utils.print_confusion_matrix(true_grades, pred_grades, FLAGS.grade_type) # save the weight if epoch in validation_epochs: network.save_weights( os.path.join(model_out_dir, "network_{}.h5".format(epoch + 1))) sys.stdout.flush()
if __name__ == '__main__': #####Instantiate models, data and labels##### labels_w = ['Arriba', 'Abajo', 'Adelante', 'Atrás', 'Derecha', 'Izquierda'] labels_v = ['/a/', '/e/', '/i/', '/o/', '/u/'] labels_c = [ '/a/', '/e/', '/i/', '/o/', '/u/', 'Arriba', 'Abajo', 'Adelante', 'Atrás', 'Derecha', 'Izquierda' ] model_types = ['shallow', 'deep', 'eegnet'] data_types = ['vowels', 'words', 'all_classes'] for model_type in model_types: for data_type in data_types: y_true_all, y_pred_all = get_stats( model_type, data_type) #run the main function cm = confusion_matrix(y_true_all, y_pred_all) cm_filename = f"results_folder/results/misc/{model_type}_{data_type}" #folder to save to if data_type == 'words': labels = labels_w elif data_type == 'vowels': labels = labels_v elif data_type == 'all_classes': labels = labels_c print_confusion_matrix(cm, labels, filename=cm_filename, normalize=True)
def main(train_images_path, train_labels_path, model_save_path): """ Load data, train model, display metrics, save model.""" # Load data. #dataset = data.MnistDataset( # train_images_path, train_labels_path, # image_transform=None, # label_transform=None, # ) dataset = torchvision.datasets.CIFAR10('data/', train=True, transform=None, target_transform=None, download=True) #dataset = torchvision.datasets.MNIST( # 'data/', train=True, # transform=None, target_transform=None, # download=True) # Split data into train/validation sets. train_split_size = int(len(dataset) * 0.8) validation_split_size = len(dataset) - train_split_size training_dataset, validation_dataset = torch.utils.data.random_split( dataset, [train_split_size, len(dataset) - train_split_size]) # These values come from a pytorch github issue, and are # computed on the whole dataset. #normalization = torchvision.transforms.Normalize( # (0.4914, 0.4822, 0.4465), # (0.247, 0.243, 0.261) #) # These values are close enough, and do not leak validation into training # set. (I could also compute them myself every split, but laziness) normalization = torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.25, 0.25, 0.25)) crop_size = 26 augmentation = torchvision.transforms.Compose([ #torchvision.transforms.RandomRotation(22.5, fill=(0,)), torchvision.transforms.RandomHorizontalFlip(p=0.5), torchvision.transforms.RandomCrop(crop_size, 0, fill=(0, )), torchvision.transforms.ColorJitter(0.1, 0.1, 0.1, 0.1), torchvision.transforms.ToTensor(), normalization, ]) test_transform = torchvision.transforms.Compose([ torchvision.transforms.CenterCrop(crop_size), torchvision.transforms.ToTensor(), normalization, ]) training_dataset.dataset.transform = augmentation validation_dataset.dataset.transform = test_transform batch_size = 2**8 steps_per_epoch = (train_split_size // batch_size) #print(training_dataset.image_transform, validation_dataset.image_transform) training_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=batch_size, shuffle=True) validation_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=batch_size, shuffle=True) # Get a single batch from the training data. # We use this batch to set the shapes of layers according to the shape # of the actual data. for inputs, labels in training_dataloader: sample = inputs break model = Model(nb_classes=10, input_image=sample[0]) epochs = 15 print(steps_per_epoch, "steps per epoch.") #strategy = None #strategy = 'ReduceOnPlateau' #strategy = 'CyclicLR' strategy = 'OneCycle' swa = False for group in model.optimizer.param_groups: base_lr = group['lr'] min_lr = base_lr #* 0.1 max_lr = base_lr * 25 if input('LR range test ? y/[n]: ') == 'y': import torch_lr_finder for group in model.optimizer.param_groups: group['lr'] = base_lr * 1e-2 lr_finder = torch_lr_finder.LRFinder(model, model.optimizer, model.criterion) lr_finder.range_test(training_dataloader, end_lr=base_lr * 1e2, num_iter=steps_per_epoch * 1) lr_finder.plot() return print("LR (min, base, max) = ({}, {}, {}))".format(min_lr, base_lr, max_lr)) strategy_functions = { 'before_epoch_fn': None, 'after_epoch_fn': None, 'before_batch_fn': None, 'after_batch_fn': None, 'before_validation_fn': None, 'after_validation_fn': None, } if strategy == 'WarmUpReduceOnPlateau': pct_start = max(1 / epochs, 1 / 8) div_factor = max_lr / (base_lr * 0.1) warmup_steps = pct_start * epochs * steps_per_epoch print("OneCycle with {} warmup steps ({:.2f} epochs)".format( warmup_steps, warmup_steps / steps_per_epoch)) scheduler = torch.optim.lr_scheduler.OneCycleLR( model.optimizer, max_lr=max_lr, div_factor=div_factor, final_div_factor=1, epochs=epochs, steps_per_epoch=steps_per_epoch, pct_start=pct_start, anneal_strategy='linear', ) strategy_functions['after_batch_fn'] = lambda *a, **k: scheduler.step() if strategy == 'ReduceOnPlateau': for group in model.optimizer.param_groups: group['lr'] = max_lr scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( model.optimizer, mode='min', factor=0.2, patience=0, verbose=True, threshold=1e-2, threshold_mode='rel', cooldown=0, min_lr=base_lr * 1e-2) strategy_functions['after_epoch_fn'] = lambda *a, **k: scheduler.step( k['metrics']['loss']) if strategy == 'CyclicLR': # Cycle variables cycle_length = 0.5 #max(1, epochs // 8) steps_per_cycle = int(steps_per_epoch * cycle_length) step_up_ratio = 1 / 8 step_down_ratio = 1 - step_up_ratio step_size_up = int(steps_per_epoch * step_up_ratio * cycle_length) step_size_down = steps_per_cycle - step_size_up #int(steps_per_epoch * step_down_ratio * cycle_length) print("Cyclic LR with steps of sizes:", step_size_up, step_size_down) print("Cycling between {} and {}".format(min_lr, max_lr)) scheduler = torch.optim.lr_scheduler.CyclicLR( model.optimizer, min_lr, max_lr, step_size_up=step_size_up, step_size_down=step_size_down, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='iterations', cycle_momentum=False, base_momentum=0.8, max_momentum=0.9, last_epoch=-1) strategy_functions['after_batch_fn'] = lambda *a, **k: scheduler.step() if strategy == 'OneCycle': pct_start = 3 / epochs #max(1/epochs, 3/10) max_lr = base_lr * 200 div_factor = 200 #max_lr / (base_lr * 0.1) final_div_factor = 1 / 100 warmup_steps = pct_start * epochs * steps_per_epoch print("OneCycle with {} warmup steps ({:.2f} epochs)".format( warmup_steps, warmup_steps / steps_per_epoch)) print("start_lr, max_lr, final_lr = {:.4f}, {:.4f}, {:.4f}".format( max_lr / div_factor, max_lr, (max_lr / div_factor) / final_div_factor)) scheduler = torch.optim.lr_scheduler.OneCycleLR( model.optimizer, max_lr=max_lr, div_factor=div_factor, final_div_factor=final_div_factor, epochs=epochs, steps_per_epoch=steps_per_epoch, pct_start=pct_start, anneal_strategy='linear', ) strategy_functions['after_batch_fn'] = lambda *a, **k: scheduler.step() # Fit model. history = model.fit_generator( training_dataloader, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=validation_dataloader, validation_steps=min(300, validation_split_size // batch_size), **strategy_functions, ) strategy_functions = { 'before_epoch_fn': None, 'after_epoch_fn': None, 'before_batch_fn': None, 'after_batch_fn': None, 'before_validation_fn': None, 'after_validation_fn': None, } # Stochastic Weight Averaging if swa: min_lr = base_lr max_lr = base_lr * 1e0 model.optimizer = torchcontrib.optim.SWA(model.optimizer, swa_start=0, swa_freq=steps_per_epoch // 2, swa_lr=max_lr) scheduler = torch.optim.lr_scheduler.CyclicLR( model.optimizer, min_lr, max_lr, step_size_up=int(steps_per_epoch * 0.3), step_size_down=int(steps_per_epoch * 0.7), mode='triangular', gamma=1.0, scale_fn=None, scale_mode='iterations', cycle_momentum=False, base_momentum=0.8, max_momentum=0.9, last_epoch=-1) #strategy_functions['after_batch_fn'] = lambda *a, **k: scheduler.step() #print("SWA with lr: {} {}".format(min_lr, max_lr)) print("SWA with lr: {}".format(max_lr)) #strategy_functions['before_validation_fn'] = lambda *a, **k: \ # model.optimizer.swap_swa_sgd() #strategy_functions['after_validation_fn'] = lambda *a, **k: \ # model.optimizer.swap_swa_sgd() swa_history = model.fit_generator( training_dataloader, steps_per_epoch=steps_per_epoch, epochs=max(3, int(epochs * 0.25)), validation_data=validation_dataloader, validation_steps=300, **strategy_functions, ) for metric_name, values in swa_history.items(): history[metric_name] = history[metric_name] + values model.optimizer.swap_swa_sgd() model.optimizer.bn_update(training_dataloader, model) # Disable augmentations. training_dataset.dataset.transform = test_transform validation_dataset.dataset.transform = test_transform training_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=batch_size, shuffle=True) validation_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=batch_size, shuffle=True) model.bn_update(training_dataloader) # Compute metrics on whole sets. training_metrics, training_confusion_matrix = model.evaluate( training_dataloader, steps=None, confusion=True) validation_metrics, validation_confusion_matrix = model.evaluate( validation_dataloader, steps=None, confusion=True) print() print("{:<12}".format("Training:"), utils.format_metrics(training_metrics)) print("{:<12}".format("Validation:"), utils.format_metrics(validation_metrics)) print() class_labels = training_dataset.dataset.classes utils.print_confusion_matrix(training_confusion_matrix, class_labels, normalize=True) print() utils.print_confusion_matrix(validation_confusion_matrix, class_labels, normalize=True) print() # Save the model and optimizer states. torch.save( { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': model.optimizer.state_dict(), 'learning_history': history, }, model_save_path) # Plot learning curves. for metric_name, metric_values in history.items(): plt.plot(metric_values, label=metric_name) plt.plot(np.full(epochs, 1.0), 'k--') plt.gca().set_ylim(bottom=0.0) plt.xlabel('epoch') plt.ylabel('metric value') plt.legend() plt.show()
network_file = utils.all_files_under(load_model_dir, extension=".json") weight_file = utils.all_files_under(load_model_dir, extension=".h5") assert len(network_file) == 1 and len(weight_file) == 1 with open(network_file[0], 'r') as f: network = model_from_json(f.read()) network.load_weights(weight_file[0]) # run inference filepaths, filenames, pred_grades, true_grades = [], [], [], [] for fnames, fundus_rescale, fundus_rescale_mean_subtract, grades in val_batch_fetcher( ): pred = network.predict(fundus_rescale_mean_subtract, batch_size=batch_size, verbose=0) pred_grades += pred[0].tolist() true_grades += grades.tolist() filenames += [ os.path.basename(fname).replace(".tif", "") for fname in fnames.tolist() ] filepaths += fnames.tolist() final_prediction = utils.adjust_threshold(true_grades, pred_grades) df = pd.DataFrame({"Image No": filenames, "DR Grade": final_prediction}) df.to_csv("VRT_Disease_Grading_DR.csv", index=False) # segmented_dir_tempalte = "../outputs//{}/" # ori_img_dir = "../data/merged_training_set/" # utils.save_wrong_files(true_grades, pred_grades, filepaths, segmented_dir_tempalte, ori_img_dir) utils.print_confusion_matrix(true_grades, final_prediction, "DR")
batch_size=BATCH_SIZE), steps_per_epoch=train_idx.sum() // BATCH_SIZE, epochs=EPOCHS, validation_data=test_datagen.flow( Xv, yv, batch_size=BATCH_SIZE), validation_steps=valid_idx.sum() // BATCH_SIZE, verbose=2) print('Done!') # 评估 test_gen = test_datagen.flow(Xv, yv, batch_size=BATCH_SIZE, shuffle=False) probabilities = model.predict_generator(test_gen, steps=len(yv) // BATCH_SIZE + 1) # 绘制多分类混淆矩阵 cnf_matrix = confusion_matrix(np.argmax(yv, axis=1), np.argmax(probabilities, axis=1)) _ = print_confusion_matrix(cnf_matrix, selected_breed_list) report = classification_report(np.argmax(probabilities, axis=1), np.argmax(yv, axis=1), target_names=selected_breed_list) print(report) # 存储训练得到的模型权重 # !mkdir models # model.save_weights('../tmp/models/tl_xception_weights.h5') save_path = '/home/colin/Github/Computer-Vision/models/' # 保存路径 if not os.path.exists(save_path): # 判断路径是否存在,不存在就创建 os.makedirs(save_path) # !mkdir models model.save_weights( '/home/colin/Github/Computer-Vision/models/tl_xception_weights.h5'