def eval(cfg, model, loader, criterion, publisher="test"): model.eval() # metrics acc_meter = MultiAssessmentMeter(num_classes=cfg.dataset.num_classes, metrics=["class", "overall", "iou"]) batch_loss = LossMeter() meters = (acc_meter, batch_loss) with torch.no_grad(): for _, data in enumerate(loader): _ = processing(model, criterion, data, meters, cfg.general.device) # get epoch loss and accuracy epoch_loss = batch_loss.compute() epoch_acc = acc_meter.compute() # save loss and acc to tensorboard log_dict = { "{}/loss".format(publisher): epoch_loss, "{}/mAcc".format(publisher): epoch_acc["class"], "{}/oAcc".format(publisher): epoch_acc["overall"], "{}/IoU".format(publisher): epoch_acc["iou"] } return log_dict
def train(cfg, model, loader, optimizer, criterion, scheduler, publisher="train"): model.train() # metrics acc_meter = MultiAssessmentMeter(num_classes=cfg.dataset.num_classes, metrics=["class", "overall", "iou"]) batch_loss = LossMeter() meters = (acc_meter, batch_loss) for _, data in enumerate(loader): optimizer.zero_grad() loss = processing(model, criterion, data, meters, cfg.general.device) loss.backward() optimizer.step() # nan if torch.isnan(loss): print("Training loss is nan.") exit() scheduler.step() # get epoch loss and accuracy epoch_loss = batch_loss.compute() epoch_acc = acc_meter.compute() # save loss and acc to tensorboard lr = scheduler.get_last_lr()[0] log_dict = { "lr": lr, "{}/loss".format(publisher): epoch_loss, "{}/mAcc".format(publisher): epoch_acc["class"], "{}/oAcc".format(publisher): epoch_acc["overall"], "{}/IoU".format(publisher): epoch_acc["iou"] } return log_dict