def more_metrics(self, metrics_: OrderedDict): metrics_['loss'] = metrics.Loss(nn.CrossEntropyLoss()) metrics_['accuracy'] = metrics.Accuracy() metrics_['recall'] = metrics.Recall() metrics_['precision'] = metrics.Precision() metrics_['confusion_matrix'] = metrics.ConfusionMatrix( 8, average='recall')
DSet = NpyClfDatasets (CCSN, MSS, CHIRP, DSIR, transform=transforms) train_l, val_l = DSet.train_test_split (random_state=24, test_size=0.25) t_DataLoader = tud.DataLoader (DSet, sampler=train_l, batch_size=10, pin_memory=True) v_DataLoader = tud.DataLoader (DSet, sampler=val_l, batch_size=10, pin_memory=True) ######################### DESC = "Epoch {} - loss {:.2f}" PBAR = tqdm (initial=0, leave=False, total=len(t_DataLoader), desc=DESC.format(0, 0)) CLF = CNN_ONE(idx=50) LFN = tn.CrossEntropyLoss() OPM = to.Adam(CLF.parameters(), lr=1e-3,) VAL_METRICS = { 'loss':im.Loss (LFN), 'acc':im.Accuracy(), 'recall':im.Recall(), 'precision':im.Precision(), 'cfm':im.ConfusionMatrix (3), } L_TRAIN = [] L_EVAL = [] L_ACC = [] L_PRE = [] L_REC = [] L_CFM = [] ######################### def train_step(engine, batch): CLF.train() OPM.zero_grad() x, y = batch['payload'], batch['target'] ypred = CLF (x) loss = LFN (ypred, y.squeeze(1)) loss.backward()
label_path = f"/home/admin/segmentation/task2/data/{dataset}/train/cropped/masks/val" testdataset = AerialDataset("val", dataset, img_path, label_path) testloader = DataLoader( testdataset, batch_size=16, pin_memory=True, drop_last=True, ) evaluator = engine.create_supervised_evaluator( model=model, metrics={ "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "IOU": metrics.IoU(metrics.ConfusionMatrix(num_classes=n_classes)), "mIOU": metrics.mIoU(metrics.ConfusionMatrix(num_classes=n_classes)), # "FPS": metrics.Frequency(output_transform=lambda x: x[0]), }, device=device, non_blocking=True, output_transform=lambda x, y, y_pred: (torch.sigmoid(y_pred["out"]), y), ) writer = tensorboard.SummaryWriter( log_dir=f'summary/{config["model_tag"]}') attach_metric_logger(evaluator, 'val', writer) evaluator.run(testloader)
optimizer = _optimizer(model.parameters(), lr = config["learning_rate"]) trainer = engine.create_supervised_trainer( model = model, optimizer = optimizer, loss_fn = loss_fn, device = device, non_blocking = True, ) evaluator = engine.create_supervised_evaluator( model = model, metrics={ "Loss": metrics.Loss(nn.CrossEntropyLoss()), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "IOU": metrics.IoU(metrics.ConfusionMatrix(num_classes = config["n_classes"])), "mIOU": metrics.mIoU(metrics.ConfusionMatrix(num_classes = config["n_classes"])), # "FPS": metrics.Frequency(output_transform=lambda x: x[0]), }, device = device, non_blocking=True, output_transform = lambda x, y, y_pred: (torch.sigmoid(y_pred["out"]), y), ) writer = tensorboard.SummaryWriter(log_dir=f'summary/{config["model_tag"]}') attach_metric_logger(evaluator, eval_loader, 'val', writer=writer) attach_training_logger(trainer, writer=writer, log_interval=1) attach_model_checkpoint(trainer, {config["model_tag"]: model.module}, args.name) trainer.run(finetune_loader, max_epochs=config["epochs"])
def train(): # initiate command line arguments, configuration file and logging block args = parse_args() config = read_config() try: if args.overwrite: shutil.rmtree(f"./logs/{args.name}", ignore_errors=True) os.mkdir(f"./logs/{args.name}") except: print(f"log folder {args.name} already exits.") init_logging(log_path=f"./logs/{args.name}") # determine train model on which device, cuda or cpu device = 'cuda' if torch.cuda.is_available() else 'cpu' logger.info(f"running training on {device}") device += f':{args.main_cuda}' # prepare training and validation datasets logger.info('creating dataset and data loaders') dataset = args.dataset train_dataset = AerialDataset("train", dataset, config[dataset]["train"]["image_path"], config[dataset]["train"]["mask_path"]) val_dataset = AerialDataset("val", dataset, config[dataset]["val"]["image_path"], config[dataset]["val"]["mask_path"]) train_loader, train_metrics_loader, val_metrics_loader = create_data_loaders( train_dataset=train_dataset, val_dataset=val_dataset, num_workers=config["num_workers"], batch_size=config["batchsize"], ) # create model logger.info( f'creating BiseNetv2 and optimizer with initial lr of {config["learning_rate"]}' ) model = BiSeNetV2(config["n_classes"]) model = nn.DataParallel(model, device_ids=[x for x in range(args.main_cuda, 4) ]).to(device) # initiate loss function and optimizer optimizer_fn = init_optimizer(config) optimizer = optimizer_fn(model.parameters(), lr=config["learning_rate"]) logger.info('creating trainer and evaluator engines') _loss_fn = init_loss(config["loss_fn"]) loss_fn = LossWithAux(_loss_fn) # create trainer and evaluator wiht ignite.engine trainer = engine.create_supervised_trainer( model=model, optimizer=optimizer, loss_fn=loss_fn, device=device, non_blocking=True, ) evaluator = engine.create_supervised_evaluator( model=model, metrics={ 'loss': metrics.Loss(nn.CrossEntropyLoss()), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "IOU": metrics.IoU( metrics.ConfusionMatrix(num_classes=config["n_classes"])), "mIOU": metrics.mIoU( metrics.ConfusionMatrix(num_classes=config["n_classes"])), }, device=device, non_blocking=True, output_transform=lambda x, y, y_pred: (torch.sigmoid(y_pred["out"]), y), ) # attach event listener to do post process after each iteration and epoch logger.info(f'creating summary writer with tag {config["model_tag"]}') writer = tensorboard.SummaryWriter(log_dir=f'logs/{config["model_tag"]}') # logger.info('attaching lr scheduler') # lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) # attach_lr_scheduler(trainer, lr_scheduler, writer) logger.info('attaching event driven calls') attach_model_checkpoint(trainer, {config["model_tag"]: model.module}, args.name) attach_training_logger(trainer, writer=writer) attach_metric_logger(trainer, evaluator, 'train', train_metrics_loader, writer) attach_metric_logger(trainer, evaluator, 'val', val_metrics_loader, writer) # start training (evaluation is included too) logger.info('training...') trainer.run(train_loader, max_epochs=config["epochs"])