def main(): device = get_device() models = {1: Model1Stage, 3: Model3Stages} pth_paths = {0: opt.pth_path0, 1: opt.pth_path1, 2: opt.pth_path2} workers = opt.workers stages = opt.stages evaluator = Evaluator() model = models[stages](device) for i in range(3): test_set = ColorCheckerDataset(train=False, folds_num=i) dataloader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=workers) print('\n Length of fold {}: {} \n'.format(i, len(test_set))) model.load(path_to_pretrained=pth_paths[i]) model.evaluation_mode() with torch.no_grad(): for _, data in enumerate(dataloader): img, label, file_name = data img, label = Variable(img.to(device)), Variable( label.to(device)) pred = model.predict(img) loss = model.get_angular_loss(pred, label) evaluator.add_error(loss.item()) print('\t - Input: %s, AE: %f' % (file_name[0], loss.item())) metrics = evaluator.compute_metrics() print("\n Mean ............ : {}".format(metrics["mean"])) print(" Median .......... : {}".format(metrics["median"])) print(" Trimean ......... : {}".format(metrics["trimean"])) print(" Best 25% ........ : {}".format(metrics["bst25"])) print(" Worst 25% ....... : {}".format(metrics["wst25"])) print(" Percentile 95 ... : {} \n".format(metrics["pct95"]))
def __init__(self, model: Model, opt, log_dir: str, log_name: str, visdom_port: int, single_stage: bool): self.__device = get_device() self.__evaluator = Evaluator() self.__single_stage = single_stage vh = VisdomHandler(port=visdom_port, env=opt.env + '-' + datetime.now().isoformat()) self.__mt = MetricsTracker(vh, single_stage) self.__epochs = opt.nepoch self.__fold_num = opt.foldnum batch_size = opt.batch_size workers = opt.workers self.__log_dir = log_dir self.__log_name = log_name self.__model = model self.__training_loader = self.__create_data_loader( batch_size=batch_size, workers=workers, train=True) self.__test_loader = self.__create_data_loader(batch_size=1, workers=workers, train=False)
def main(): evaluator = Evaluator() path_to_log = os.path.join(PATH_TO_LOGS, "{}_{}_{}".format(MODEL_TYPE, DATA_FOLDER, str(time.time()))) os.makedirs(path_to_log) path_to_metrics_log = os.path.join(path_to_log, "metrics.csv") path_to_experiment_log = os.path.join(path_to_log, "experiment.json") log_experiment(MODEL_TYPE, DATA_FOLDER, LEARNING_RATE, path_to_experiment_log) print("\nLoading data from '{}':".format(DATA_FOLDER)) training_set = TemporalColorConstancy(mode="train", split_folder=DATA_FOLDER) train_loader = DataLoader(dataset=training_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) test_set = TemporalColorConstancy(mode="test", split_folder=DATA_FOLDER) test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=8) training_set_size, test_set_size = len(training_set), len(test_set) print("Training set size: ... {}".format(training_set_size)) print("Test set size: ....... {}\n".format(test_set_size)) model = MODELS[MODEL_TYPE]() if RELOAD_CHECKPOINT: print('\n Reloading checkpoint - pretrained model stored at: {} \n'.format(PATH_TO_PTH_CHECKPOINT)) model.load(PATH_TO_PTH_CHECKPOINT) else: if PATH_TO_PTH_SUBMODULE != '': print('\n Loading pretrained submodules stored at: {} \n'.format(PATH_TO_PTH_SUBMODULE)) model.load_submodules(PATH_TO_PTH_SUBMODULE) model.print_network() model.log_network(path_to_log) model.set_optimizer(learning_rate=LEARNING_RATE) print('\n Training starts... \n') best_val_loss, best_metrics = 100.0, evaluator.get_best_metrics() train_l1, train_l2, train_l3, train_mal = LossTracker(), LossTracker(), LossTracker(), LossTracker() val_l1, val_l2, val_l3, val_mal = LossTracker(), LossTracker(), LossTracker(), LossTracker() for epoch in range(EPOCHS): model.train_mode() train_l1.reset() train_l2.reset() train_l3.reset() train_mal.reset() start = time.time() for i, (sequence, mimic, label, file_name) in enumerate(train_loader): model.reset_gradient() sequence, mimic, label = sequence.to(DEVICE), mimic.to(DEVICE), label.to(DEVICE) o1, o2, o3 = model.predict(sequence, mimic) l1, l2, l3, mal = model.compute_loss([o1, o2, o3], label) mal.backward() model.optimize() train_l1.update(l1.item()) train_l2.update(l2.item()) train_l3.update(l3.item()) train_mal.update(mal.item()) if i % 5 == 0: print("[ Epoch: {}/{} - Batch: {}/{} ] | " "[ Train L1: {:.4f} | Train L2: {:.4f} | Train L3: {:.4f} | Train MAL: {:.4f} ]" .format(epoch, EPOCHS, i, training_set_size, l1.item(), l2.item(), l3.item(), mal.item())) train_time = time.time() - start log_time(time=train_time, time_type="train", path_to_log=path_to_experiment_log) val_l1.reset() val_l2.reset() val_l3.reset() val_mal.reset() start = time.time() if epoch % 5 == 0: print("\n--------------------------------------------------------------") print("\t\t Validation") print("--------------------------------------------------------------\n") with torch.no_grad(): model.evaluation_mode() evaluator.reset_errors() for i, (sequence, mimic, label, file_name) in enumerate(test_loader): sequence, mimic, label = sequence.to(DEVICE), mimic.to(DEVICE), label.to(DEVICE) o1, o2, o3 = model.predict(sequence, mimic) l1, l2, l3, mal = model.compute_loss([o1, o2, o3], label) val_l1.update(l1.item()) val_l2.update(l2.item()) val_l3.update(l3.item()) val_mal.update(mal.item()) evaluator.add_error(l3.item()) if i % 5 == 0: print("[ Epoch: {}/{} - Batch: {}/{} ] | " "[ Val L1: {:.4f} | Val L2: {:.4f} | Val L3: {:.4f} | Val MAL: {:.4f} ]" .format(epoch, EPOCHS, i, test_set_size, l1.item(), l2.item(), l3.item(), mal.item())) print("\n--------------------------------------------------------------\n") val_time = time.time() - start log_time(time=val_time, time_type="val", path_to_log=path_to_experiment_log) metrics = evaluator.compute_metrics() print("\n********************************************************************") print(" Train Time ... : {:.4f}".format(train_time)) print(" Train MAL .... : {:.4f}".format(train_mal.avg)) print(" Train L1 ..... : {:.4f}".format(train_l1.avg)) print(" Train L2 ..... : {:.4f}".format(train_l2.avg)) print(" Train L3 ..... : {:.4f}".format(train_l3.avg)) if val_time > 0.1: print("....................................................................") print(" Val Time ..... : {:.4f}".format(val_time)) print(" Val MAL ...... : {:.4f}".format(val_mal.avg)) print(" Val L1 ....... : {:.4f}".format(val_l1.avg)) print(" Val L2 ....... : {:.4f}".format(val_l2.avg)) print(" Val L3 ....... : {:.4f} (Best: {:.4f})".format(val_l3.avg, best_val_loss)) print("....................................................................") print_val_metrics(metrics, best_metrics) print("********************************************************************\n") if 0 < val_l3.avg < best_val_loss: best_val_loss = val_l3.avg best_metrics = evaluator.update_best_metrics() print("Saving new best model... \n") model.save(os.path.join(path_to_log, "model.pth")) log_metrics(train_mal.avg, val_mal.avg, metrics, best_metrics, path_to_metrics_log)
def main(opt): model_type = opt.model_type data_folder = opt.data_folder split_folder = opt.split_folder path_to_pth = os.path.join("trained_models", data_folder, model_type, split_folder, "model.pth") path_to_log = os.path.join( PATH_TO_LOGS, "{}_{}_{}_{}".format(model_type, data_folder, split_folder, time())) os.makedirs(path_to_log) evaluator = Evaluator() eval_data = {"file_names": [], "predictions": [], "ground_truths": []} inference_times = [] test_set = GrayBall(mode="test", num_folds=1) test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=20) print('Test set size: {}'.format(len(test_set))) model = MODELS[model_type]() print('\n Loading pretrained {} model stored at: {} \n'.format( model_type, path_to_pth)) model.load(path_to_pth) model.evaluation_mode() print("\n *** Testing model {} on {}/{} *** \n".format( model_type, data_folder, split_folder)) with torch.no_grad(): for i, (seq, mimic, label, file_name) in enumerate(test_loader): seq, mimic, label = seq.to(DEVICE), mimic.to(DEVICE), label.to( DEVICE) tic = perf_counter() pred = model.predict(seq, mimic) toc = perf_counter() inference_times.append(toc - tic) loss = model.get_angular_loss(pred, label).item() evaluator.add_error(loss) eval_data["file_names"].append(file_name[0]) eval_data["predictions"].append(pred.cpu().numpy()) eval_data["ground_truths"].append(label.cpu().numpy()) if i % 1 == 0: print(" - Item {}: {}, AE: {:.4f}".format( i, file_name[0].split(os.sep)[-1], loss)) print(" \n Average inference time: {:.4f} \n".format( np.mean(inference_times))) eval_data["errors"] = evaluator.get_errors() metrics = evaluator.compute_metrics() print_test_metrics(metrics) pd.DataFrame({k: [v] for k, v in metrics.items() }).to_csv(os.path.join(path_to_log, "metrics.csv"), index=False) pd.DataFrame(eval_data).to_csv(os.path.join(path_to_log, "eval.csv"), index=False)
class Trainer: def __init__(self, model: Model, opt, log_dir: str, log_name: str, visdom_port: int, single_stage: bool): self.__device = get_device() self.__evaluator = Evaluator() self.__single_stage = single_stage vh = VisdomHandler(port=visdom_port, env=opt.env + '-' + datetime.now().isoformat()) self.__mt = MetricsTracker(vh, single_stage) self.__epochs = opt.nepoch self.__fold_num = opt.foldnum batch_size = opt.batch_size workers = opt.workers self.__log_dir = log_dir self.__log_name = log_name self.__model = model self.__training_loader = self.__create_data_loader( batch_size=batch_size, workers=workers, train=True) self.__test_loader = self.__create_data_loader(batch_size=1, workers=workers, train=False) def __create_data_loader(self, batch_size: int, workers: int, train: bool = True): dataset = ColorCheckerDataset(train=train, folds_num=self.__fold_num) print("{} set size ... : {}".format("Training" if train else "Test", len(dataset))) return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=workers) def train(self): print("\n Training starts... \n") for epoch in range(self.__epochs): self.__mt.reset_losses() # --- Training --- self.__model.train_mode() start = time.time() for _, data in enumerate(self.__training_loader): self.__model.reset_gradient() img, label, file_name = data img, label = img.to(self.__device), label.to(self.__device) train_loss = self.__model.compute_loss(img, label) self.__mt.update_train_loss(train_loss, epoch) self.__model.optimize() train_time = time.time() - start # --- Validation --- start = time.time() if epoch % 5 == 0: self.__evaluator.reset_errors() self.__model.evaluation_mode() with torch.no_grad(): for _, data in enumerate(self.__test_loader): img, label, file_name = data img, label = img.to(self.__device), label.to( self.__device) val_loss = self.__model.compute_loss(img, label) self.__mt.update_val_loss(val_loss, epoch) self.__evaluator.add_error( val_loss if self.__single_stage else val_loss[3]) val_time = time.time() - start train_loss, val_loss = self.__mt.get_train_loss_value( ), self.__mt.get_val_loss_value() if self.__single_stage: print( "Epoch: {}, Train_loss: {:.5}, Val_loss: {:.5}, T_time: {}, V_time: {}" .format(epoch + 1, train_loss, val_loss, train_time, val_time)) else: train_l3 = self.__mt.get_train_loss_tracker().get_step_loss(3) val_l3 = self.__mt.get_val_loss_tracker().get_step_loss(3) print( "Epoch: {}, TL: {:.5}, TL3: {:.5}, VL: {:.5}, VL3: {:.5}, T_time: {:.5}, V_time: {:.5}" .format(epoch + 1, train_loss, train_l3, val_loss, val_l3, train_time, val_time)) metrics = self.__evaluator.compute_metrics() if 0 < self.__mt.get_val_loss_value( ) < self.__mt.get_best_val_loss(): self.__mt.update_metrics(metrics) self.__model.save( os.path.join(self.__log_dir, "fold" + str(self.__fold_num) + ".pth")) self.__mt.log_metrics(self.__log_name, epoch)
def main(): evaluator = Evaluator() for n in range(NUM_FOLDS): path_to_log = os.path.join(PATH_TO_LOGS, "ctccnet2_fold_{}_{}".format(n, time.time())) os.makedirs(path_to_log) path_to_metrics_log = os.path.join(path_to_log, "metrics.csv") path_to_experiment_log = os.path.join(path_to_log, "experiment.json") log_experiment("ctccnet2", "fold_{}".format(n), LEARNING_RATE, path_to_experiment_log) print("\n Loading data for FOLD {}:".format(n)) training_set = GrayBall(mode="train", fold=n, num_folds=NUM_FOLDS, return_labels=True) train_loader = DataLoader(dataset=training_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) test_set = GrayBall(mode="test", fold=n, num_folds=NUM_FOLDS, return_labels=True) test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=8) training_set_size, test_set_size = len(training_set), len(test_set) print("\n TRAINING SET") print("\t Size: ..... {}".format(training_set_size)) print("\t Scenes: ... {}".format(training_set.get_scenes())) print("\n TEST SET") print("\t Size: ..... {}".format(test_set_size)) print("\t Scenes: ... {}".format(test_set.get_scenes())) model = ModelCTCCNet2() model.print_network() model.log_network(path_to_log) model.set_optimizer(learning_rate=LEARNING_RATE) print('\n Training starts... \n') best_val_loss, best_metrics = 100.0, evaluator.get_best_metrics() train_losses, train_losses_cor, val_losses, val_losses_cor = [], [], [], [] for _ in range(NUM_STAGES + 1): train_losses.append(LossTracker()) train_losses_cor.append(LossTracker()) val_losses.append(LossTracker()) val_losses_cor.append(LossTracker()) for epoch in range(EPOCHS): model.train_mode() for tl, tlc in zip(train_losses, train_losses_cor): tl.reset() tlc.reset() start = time.time() for i, (seq_temp, seq_shot, labels, _) in enumerate(train_loader): seq_temp, seq_shot, labels = seq_temp.to(DEVICE), seq_shot.to(DEVICE), labels.to(DEVICE) outputs = model.predict(seq_temp, seq_shot, return_preds=True) cas_loss, cas_mal, cor_loss, cor_mal = model.compute_corr_loss(outputs, labels) for (tl, sl), (tlc, slc) in zip(zip(train_losses[:-1], cas_loss), zip(train_losses_cor[:-1], cor_loss)): tl.update(sl.item()) tlc.update(slc.item()) train_losses[-1].update(cas_mal.item()) train_losses[-1].update(cor_mal.item()) if i % 5 == 0: mal = cas_mal.item() + cor_mal.item() tl_log = " | ".join(["L{}: {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(cas_loss)]) tlc_log = " | ".join(["L{}: {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(cor_loss)]) print(" TRAIN: [ Epoch: {}/{} - Batch: {}/{} ] | Loss: {:.4f} |" " Cascade: [ {} | MAL: {:.4f} ] |" " Corrections: [ {} | MAL: {:.4f} ]" .format(epoch + 1, EPOCHS, i + 1, training_set_size, mal, tl_log, cas_mal.item(), tlc_log, cor_mal.item())) train_time = time.time() - start log_time(time=train_time, time_type="train", path_to_log=path_to_experiment_log) for vl, vlc in zip(val_losses, val_losses_cor): vl.reset() vlc.reset() start = time.time() if epoch % 5 == 0: print("\n--------------------------------------------------------------") print("\t\t Validation") print("--------------------------------------------------------------\n") with torch.no_grad(): model.evaluation_mode() evaluator.reset_errors() for i, (seq_temp, seq_shot, labels, _) in enumerate(test_loader): seq_temp, seq_shot, labels = seq_temp.to(DEVICE), seq_shot.to(DEVICE), labels.to(DEVICE) outputs = model.predict(seq_temp, seq_shot, return_preds=True) cas_loss, cas_mal, cor_loss, cor_mal = model.get_corr_loss(outputs, labels) losses = zip(zip(val_losses[:-1], cas_loss), zip(val_losses_cor[:-1], cor_loss)) for (vl, sl), (vlc, slc) in losses: vl.update(sl.item()) vlc.update(slc.item()) val_losses[-1].update(cas_mal.item()) val_losses[-1].update(cor_mal.item()) evaluator.add_error(cas_loss[-1].item()) if i % 5 == 0: mal = cas_mal.item() + cor_mal.item() log_cas = ["L{}: {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(cas_loss)] log_cas = " | ".join(log_cas) log_cor = ["L{}: {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(cor_loss)] log_cor = " | ".join(log_cor) print(" VAL: [ Epoch: {}/{} - Batch: {}/{} ] | Loss: {:.4f} |" " Cascade: [ {} | MAL: {:.4f} ] |" " Corrections: [ {} | MAL: {:.4f} ]" .format(epoch + 1, EPOCHS, i + 1, test_set_size, mal, log_cas, cas_mal.item(), log_cor, cor_mal.item())) print("\n--------------------------------------------------------------\n") val_time = time.time() - start log_time(time=val_time, time_type="val", path_to_log=path_to_experiment_log) metrics = evaluator.compute_metrics() print("\n********************************************************************") print(" Train Time ....... : {:.4f}".format(train_time)) tl_log = " | ".join(["L{} {:.4f}".format(i + 1, tl.avg) for i, tl in enumerate(train_losses[:-1])]) print(" AVG Train Loss ... : [ {} | MAL: {:.4f} ]".format(tl_log, train_losses[-1].avg)) if val_time > 0.1: print("....................................................................") print(" Val Time ......... : {:.4f}".format(val_time)) vl_log = " | ".join(["L{} {:.4f}".format(i + 1, vl.avg) for i, vl in enumerate(val_losses[:-1])]) print(" AVG Val Loss: .... : [ {} | MAL: {:.4f} ]".format(vl_log, val_losses[-1].avg)) print("....................................................................") print_val_metrics(metrics, best_metrics) print("********************************************************************\n") curr_val_loss = val_losses[-2].avg if 0 < curr_val_loss < best_val_loss: best_val_loss = curr_val_loss best_metrics = evaluator.update_best_metrics() print("Saving new best model... \n") model.save(os.path.join(path_to_log, "model.pth")) log_metrics(train_losses[-1].avg, val_losses[-1].avg, metrics, best_metrics, path_to_metrics_log)
def main(): evaluator = Evaluator() for n in range(NUM_FOLDS): path_to_log = os.path.join( PATH_TO_LOGS, "{}_fold_{}_{}".format(MODEL_TYPE, n, time.time())) os.makedirs(path_to_log) path_to_metrics_log = os.path.join(path_to_log, "metrics.csv") path_to_experiment_log = os.path.join(path_to_log, "experiment.json") log_experiment(MODEL_TYPE, "fold_{}".format(n), LEARNING_RATE, path_to_experiment_log) print("\n Loading data for FOLD {}:".format(n)) training_set = GrayBall(mode="train", fold=n, num_folds=NUM_FOLDS) train_loader = DataLoader(dataset=training_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) test_set = GrayBall(mode="test", fold=n, num_folds=NUM_FOLDS) test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=8) training_set_size, test_set_size = len(training_set), len(test_set) print("\n TRAINING SET") print("\t Size: ..... {}".format(training_set_size)) print("\t Scenes: ... {}".format(training_set.get_scenes())) print("\n TEST SET") print("\t Size: ..... {}".format(test_set_size)) print("\t Scenes: ... {}".format(test_set.get_scenes())) model = MODELS[MODEL_TYPE]() if RELOAD_CHECKPOINT: print( '\n Reloading checkpoint - pretrained model stored at: {} \n'. format(PATH_TO_PTH_CHECKPOINT)) model.load(PATH_TO_PTH_CHECKPOINT) model.print_network() model.log_network(path_to_log) model.set_optimizer(learning_rate=LEARNING_RATE) print('\n Training starts... \n') best_val_loss, best_metrics = 100.0, evaluator.get_best_metrics() train_loss, val_loss = LossTracker(), LossTracker() for epoch in range(EPOCHS): # --- Training --- model.train_mode() train_loss.reset() start = time.time() for i, data in enumerate(train_loader): model.reset_gradient() sequence, mimic, label, file_name = data sequence = sequence.unsqueeze(1).to(DEVICE) if len( sequence.shape) == 4 else sequence.to(DEVICE) mimic = mimic.to(DEVICE) label = label.to(DEVICE) loss = model.compute_loss(sequence, label, mimic) model.optimize() train_loss.update(loss) if i % 5 == 0: print( "[ Epoch: {}/{} - Item: {}/{} ] | [ Train loss: {:.4f} ]" .format(epoch, EPOCHS, i, training_set_size, loss)) train_time = time.time() - start log_time(time=train_time, time_type="train", path_to_log=path_to_experiment_log) # --- Validation --- start = time.time() val_loss.reset() if epoch % 5 == 0: print( "\n--------------------------------------------------------------" ) print("\t\t Validation") print( "--------------------------------------------------------------\n" ) with torch.no_grad(): model.evaluation_mode() evaluator.reset_errors() for i, data in enumerate(test_loader): sequence, mimic, label, file_name = data sequence = sequence.unsqueeze(1).to(DEVICE) if len( sequence.shape) == 4 else sequence.to(DEVICE) mimic = mimic.to(DEVICE) label = label.to(DEVICE) o = model.predict(sequence, mimic) loss = model.get_angular_loss(o, label).item() val_loss.update(loss) evaluator.add_error(loss) if i % 5 == 0: print( "[ Epoch: {}/{} - Item: {}/{}] | Val loss: {:.4f} ]" .format(epoch, EPOCHS, i, test_set_size, loss)) print( "\n--------------------------------------------------------------\n" ) val_time = time.time() - start log_time(time=val_time, time_type="val", path_to_log=path_to_experiment_log) metrics = evaluator.compute_metrics() print( "\n********************************************************************" ) print(" Train Time ... : {:.4f}".format(train_time)) print(" Train Loss ... : {:.4f}".format(train_loss.avg)) if val_time > 0.1: print( "...................................................................." ) print(" Val Time ..... : {:.4f}".format(val_time)) print(" Val Loss ..... : {:.4f}".format(val_loss.avg)) print( "...................................................................." ) print_val_metrics(metrics, best_metrics) print( "********************************************************************\n" ) if 0 < val_loss.avg < best_val_loss: best_val_loss = val_loss.avg evaluator.update_best_metrics() print("Saving new best model... \n") model.save(os.path.join(path_to_log, "model.pth")) log_metrics(train_loss.avg, val_loss.avg, metrics, best_metrics, path_to_metrics_log)
def main(opt): data_folder = opt.data_folder epochs = opt.epochs learning_rate = opt.lr evaluator = Evaluator() path_to_log = os.path.join( "train", "tcc", "logs", "ctccnet2_{}_{}".format(data_folder, str(time.time()))) os.makedirs(path_to_log) path_to_metrics_log = os.path.join(path_to_log, "metrics.csv") path_to_experiment_log = os.path.join(path_to_log, "experiment.json") log_experiment("ctccnet2", data_folder, learning_rate, path_to_experiment_log) print("\n Loading data from '{}':".format(data_folder)) training_set = TemporalColorConstancy(mode="train", split_folder=data_folder) train_loader = DataLoader(dataset=training_set, batch_size=1, shuffle=True, num_workers=8) test_set = TemporalColorConstancy(mode="test", split_folder=data_folder) test_loader = DataLoader(dataset=test_set, batch_size=1, num_workers=8) training_set_size, test_set_size = len(training_set), len(test_set) print("Training set size: ... {}".format(training_set_size)) print("Test set size: ....... {}\n".format(test_set_size)) model = ModelCTCCNet2() if RELOAD_CHECKPOINT: print('\n Reloading checkpoint - pretrained model stored at: {} \n'. format(PATH_TO_PTH_CHECKPOINT)) model.load(PATH_TO_PTH_CHECKPOINT) model.print_network() model.log_network(path_to_log) model.set_optimizer(learning_rate) print('\n Training starts... \n') best_val_loss, best_metrics = 100.0, evaluator.get_best_metrics() train_losses, val_losses = [], [] for _ in range(NUM_STAGES + 1): train_losses.append(LossTracker()) val_losses.append(LossTracker()) for epoch in range(epochs): model.train_mode() for tl in train_losses: tl.reset() start = time.time() for i, (seq_temp, seq_shot, label, _) in enumerate(train_loader): seq_temp, seq_shot, label = seq_temp.to(DEVICE), seq_shot.to( DEVICE), label.to(DEVICE) outputs = model.predict(seq_temp, seq_shot) stages_loss, mal = model.compute_loss(outputs, label) for tl, sl in zip(train_losses[:-1], stages_loss): tl.update(sl.item()) train_losses[-1].update(mal.item()) if i % 5 == 0: tl_log = " | ".join([ "TL{} {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(stages_loss) ]) print( "[ Epoch: {}/{} - Batch: {}/{} ] | [ {} | Train MAL: {:.4f} ]" .format(epoch + 1, epochs, i + 1, training_set_size, tl_log, stages_loss[-1].item())) train_time = time.time() - start log_time(time=train_time, time_type="train", path_to_log=path_to_experiment_log) for vl in val_losses: vl.reset() start = time.time() if epoch % 5 == 0: print( "\n--------------------------------------------------------------" ) print("\t\t Validation") print( "--------------------------------------------------------------\n" ) with torch.no_grad(): model.evaluation_mode() evaluator.reset_errors() for i, (seq_temp, seq_shot, label, _) in enumerate(test_loader): seq_temp, seq_shot, label = seq_temp.to( DEVICE), seq_shot.to(DEVICE), label.to(DEVICE) outputs = model.predict(seq_temp, seq_shot) stages_loss, mal = model.get_loss(outputs, label) for vl, sl in zip(val_losses[:-1], stages_loss): vl.update(sl.item()) val_losses[-1].update(mal.item()) evaluator.add_error(stages_loss[-1].item()) if i % 5 == 0: vl_log = [ "VL{} {:.4f}".format(i + 1, sl.item()) for i, sl in enumerate(stages_loss) ] vl_log = " | ".join(vl_log) print( "[ Epoch: {}/{} - Batch: {}/{} ] | [ {} | Val MAL: {:.4f} ]" .format(epoch + 1, epochs, i + 1, test_set_size, vl_log, stages_loss[-1].item())) print( "\n--------------------------------------------------------------\n" ) val_time = time.time() - start log_time(time=val_time, time_type="val", path_to_log=path_to_experiment_log) metrics = evaluator.compute_metrics() print( "\n********************************************************************" ) print(" Train Time ....... : {:.4f}".format(train_time)) tl_log = " | ".join([ "L{} {:.4f}".format(i + 1, tl.avg) for i, tl in enumerate(train_losses[:-1]) ]) print(" AVG Train Loss ... : [ {} | MAL: {:.4f} ]".format( tl_log, train_losses[-1].avg)) if val_time > 0.1: print( "...................................................................." ) print(" Val Time ......... : {:.4f}".format(val_time)) vl_log = " | ".join([ "L{} {:.4f}".format(i + 1, vl.avg) for i, vl in enumerate(val_losses[:-1]) ]) print(" AVG Val Loss: .... : [ {} | MAL: {:.4f} ]".format( vl_log, val_losses[-1].avg)) print( "...................................................................." ) print_val_metrics(metrics, best_metrics) print( "********************************************************************\n" ) curr_val_loss = val_losses[-2].avg if 0 < curr_val_loss < best_val_loss: best_val_loss = curr_val_loss best_metrics = evaluator.update_best_metrics() print("Saving new best model... \n") model.save(os.path.join(path_to_log, "model.pth")) log_metrics(train_losses[-1].avg, val_losses[-1].avg, metrics, best_metrics, path_to_metrics_log)
def main(opt): model_type = opt.model_type data_folder = opt.data_folder split_folder = opt.split_folder plot_losses = opt.plot_losses path_to_pth = os.path.join("trained_models", data_folder, model_type, split_folder, "model.pth") path_to_log = os.path.join( PATH_TO_LOGS, "{}_{}_{}_{}".format(model_type, data_folder, split_folder, time())) os.makedirs(path_to_log) eval1, eval2, eval3 = Evaluator(), Evaluator(), Evaluator() eval_data = {"file_names": [], "predictions": [], "ground_truths": []} inference_times = [] test_set = GrayBall(mode="test", num_folds=1) test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=20) print('Test set size: {}'.format(len(test_set))) model = MODELS[model_type]() print('\n Loading pretrained {} model stored at: {} \n'.format( model_type, path_to_pth)) model.load(path_to_pth) model.evaluation_mode() print("\n *** Testing model {} on {}/{} *** \n".format( model_type, data_folder, split_folder)) with torch.no_grad(): for i, (seq, mimic, label, file_name) in enumerate(test_loader): seq, mimic, label = seq.to(DEVICE), mimic.to(DEVICE), label.to( DEVICE) tic = perf_counter() o1, o2, o3 = model.predict(seq, mimic) toc = perf_counter() inference_times.append(toc - tic) p1, p2, p3 = o1, torch.mul(o1, o2), torch.mul(torch.mul(o1, o2), o3) l1 = model.get_angular_loss(p1, label).item() l2 = model.get_angular_loss(p2, label).item() l3 = model.get_angular_loss(p3, label).item() eval1.add_error(l1) eval2.add_error(l2) eval3.add_error(l3) eval_data["file_names"].append(file_name[0]) eval_data["predictions"].append(p3.cpu().numpy()) eval_data["ground_truths"].append(label.cpu().numpy()) if i % 1 == 0: print("Item {}: {} - [ L1: {:.4f} | L2: {:.4f} | L3: {:.4f} ]". format(i, file_name[0].split(os.sep)[-1], l1, l2, l3)) print(" \n Average inference time: {:.4f} \n".format( np.mean(inference_times))) e1, e2, e3 = eval1.get_errors(), eval2.get_errors(), eval3.get_errors() eval_data["errors"] = e3 metrics1, metrics2, metrics3 = eval1.compute_metrics( ), eval2.compute_metrics(), eval3.compute_metrics() print_test_metrics((metrics1, metrics2, metrics3)) if plot_losses: plt.plot(range(len(e1)), e1, label="AE1") plt.plot(range(len(e2)), e2, label="AE2") plt.plot(range(len(e3)), e3, label="AE3") plt.legend() plt.show() pd.DataFrame({k: [v] for k, v in metrics1.items() }).to_csv(os.path.join(path_to_log, "metrics_1.csv"), index=False) pd.DataFrame({k: [v] for k, v in metrics2.items() }).to_csv(os.path.join(path_to_log, "metrics_2.csv"), index=False) pd.DataFrame({k: [v] for k, v in metrics3.items() }).to_csv(os.path.join(path_to_log, "metrics_3.csv"), index=False) pd.DataFrame(eval_data).to_csv(os.path.join(path_to_log, "eval.csv"), index=False)