def train(): device = torch.device(conf.cuda if torch.cuda.is_available() else "cpu") dataset = Training_Dataset(conf.data_path_train, conf.gaussian_noise_param, conf.crop_img_size) dataset_length = len(dataset) train_loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4) model = UNet(in_channels=conf.img_channel, out_channels=conf.img_channel) criterion = nn.MSELoss() model = model.to(device) optim = Adam(model.parameters(), lr=conf.learning_rate, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=True) scheduler = lr_scheduler.StepLR(optim, step_size=100, gamma=0.5) model.train() print(model) print("Starting Training Loop...") since = time.time() for epoch in range(conf.max_epoch): print('Epoch {}/{}'.format(epoch, conf.max_epoch - 1)) print('-' * 10) running_loss = 0.0 scheduler.step() for batch_idx, (source, target) in enumerate(train_loader): source = source.to(device) target = target.to(device) optim.zero_grad() denoised_source = model(source) loss = criterion(denoised_source, target) loss.backward() optim.step() running_loss += loss.item() * source.size(0) print('Current loss {} and current batch idx {}'.format( loss.item(), batch_idx)) epoch_loss = running_loss / dataset_length print('{} Loss: {:.4f}'.format('current ' + str(epoch), epoch_loss)) if (epoch + 1) % conf.save_per_epoch == 0: save_model(model, epoch + 1) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
for i,p in enumerate(loss_all): for j,q in enumerate(p): table.write(i,j,q) file.save('/home/star/0_code_lhj/DL-SIM-github/Training_codes/UNet/loss_UNet_SRRF_microtubule.xls') lr = get_learning_rate(epoch) for p in optimizer.param_groups: p['lr'] = lr print("learning rate = {}".format(p['lr'])) for batch_idx, items in enumerate(train_dataloader): image = items['image_in'] gt = items['groundtruth'] model.train() image = np.swapaxes(image, 1,3) image = np.swapaxes(image, 2,3) image = image.float() image = image.cuda(cuda) gt = gt.squeeze() gt = gt.float() gt = gt.cuda(cuda) pred = model(image).squeeze() loss = (pred-gt).abs().mean() + 5 * ((pred-gt)**2).mean() optimizer.zero_grad()
'ssim': [] } data, target = get_train_data(X_test, y_test, batch_size) test_data = torch.from_numpy(data.astype(int)).float() test_target = torch.from_numpy(target.astype(int)).float() for epoch in range(1, NUM_EPOCHS + 1): running_results = { 'batch_sizes': 0, 'd_loss': 0, 'g_loss': 0, 'd_score': 0, 'g_score': 0 } netG.train() netD.train() data, target = get_train_data(X_train, y_train, batch_size) data = torch.from_numpy(data.astype(int)).float() target = torch.from_numpy(target.astype(int)).float() g_update_first = True batch_size = data.size(0) running_results['batch_sizes'] += batch_size ############################ # (1) Update D network: maximize D(x)-1-D(G(z)) ########################### real_img = Variable(target) if torch.cuda.is_available(): real_img = real_img.cuda() z = Variable(data)
class Train(object): def __init__(self, configs): self.batch_size = configs.get("batch_size", "16") self.epochs = configs.get("epochs", "100") self.lr = configs.get("lr", "0.0001") device_args = configs.get("device", "cuda") self.device = torch.device( "cpu" if not torch.cuda.is_available() else device_args) self.workers = configs.get("workers", "4") self.vis_images = configs.get("vis_images", "200") self.vis_freq = configs.get("vis_freq", "10") self.weights = configs.get("weights", "./weights") if not os.path.exists(self.weights): os.mkdir(self.weights) self.logs = configs.get("logs", "./logs") if not os.path.exists(self.weights): os.mkdir(self.weights) self.images_path = configs.get("images_path", "./data") self.is_resize = config.get("is_resize", False) self.image_short_side = config.get("image_short_side", 256) self.is_padding = config.get("is_padding", False) is_multi_gpu = config.get("DateParallel", False) pre_train = config.get("pre_train", False) model_path = config.get("model_path", './weights/unet_idcard_adam.pth') # self.image_size = configs.get("image_size", "256") # self.aug_scale = configs.get("aug_scale", "0.05") # self.aug_angle = configs.get("aug_angle", "15") self.step = 0 self.dsc_loss = DiceLoss() self.model = UNet(in_channels=Dataset.in_channels, out_channels=Dataset.out_channels) if pre_train: self.model.load_state_dict(torch.load(model_path, map_location=self.device), strict=False) if is_multi_gpu: self.model = nn.DataParallel(self.model) self.model.to(self.device) self.best_validation_dsc = 0.0 self.loader_train, self.loader_valid = self.data_loaders() self.params = [p for p in self.model.parameters() if p.requires_grad] self.optimizer = optim.Adam(self.params, lr=self.lr, weight_decay=0.0005) # self.optimizer = torch.optim.SGD(self.params, lr=self.lr, momentum=0.9, weight_decay=0.0005) self.scheduler = lr_scheduler.LR_Scheduler_Head( 'poly', self.lr, self.epochs, len(self.loader_train)) def datasets(self): train_datasets = Dataset( images_dir=self.images_path, # image_size=self.image_size, subset="train", # train transform=get_transforms(train=True), is_resize=self.is_resize, image_short_side=self.image_short_side, is_padding=self.is_padding) # valid_datasets = train_datasets valid_datasets = Dataset( images_dir=self.images_path, # image_size=self.image_size, subset="validation", # validation transform=get_transforms(train=False), is_resize=self.is_resize, image_short_side=self.image_short_side, is_padding=False) return train_datasets, valid_datasets def data_loaders(self): dataset_train, dataset_valid = self.datasets() loader_train = DataLoader( dataset_train, batch_size=self.batch_size, shuffle=True, drop_last=True, num_workers=self.workers, ) loader_valid = DataLoader( dataset_valid, batch_size=1, drop_last=False, num_workers=self.workers, ) return loader_train, loader_valid @staticmethod def dsc_per_volume(validation_pred, validation_true): assert len(validation_pred) == len(validation_true) dsc_list = [] for p in range(len(validation_pred)): y_pred = np.array([validation_pred[p]]) y_true = np.array([validation_true[p]]) dsc_list.append(dsc(y_pred, y_true)) return dsc_list @staticmethod def get_logger(filename, verbosity=1, name=None): level_dict = {0: logging.DEBUG, 1: logging.INFO, 2: logging.WARNING} formatter = logging.Formatter( "[%(asctime)s][%(filename)s][line:%(lineno)d][%(levelname)s] %(message)s" ) logger = logging.getLogger(name) logger.setLevel(level_dict[verbosity]) fh = logging.FileHandler(filename, "w") fh.setFormatter(formatter) logger.addHandler(fh) sh = logging.StreamHandler() sh.setFormatter(formatter) logger.addHandler(sh) return logger def train_one_epoch(self, epoch): self.model.train() loss_train = [] for i, data in enumerate(self.loader_train): self.scheduler(self.optimizer, i, epoch, self.best_validation_dsc) x, y_true = data x, y_true = x.to(self.device), y_true.to(self.device) y_pred = self.model(x) # print('1111', y_pred.size()) # print('2222', y_true.size()) loss = self.dsc_loss(y_pred, y_true) loss_train.append(loss.item()) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # lr_scheduler.step() if self.step % 200 == 0: print('Epoch:[{}/{}]\t iter:[{}]\t loss={:.5f}\t '.format( epoch, self.epochs, i, loss)) self.step += 1 def eval_model(self, patience): self.model.eval() loss_valid = [] validation_pred = [] validation_true = [] # early_stopping = EarlyStopping(patience=patience, verbose=True) for i, data in enumerate(self.loader_valid): x, y_true = data x, y_true = x.to(self.device), y_true.to(self.device) # print(x.size()) # print(333,x[0][2]) with torch.no_grad(): y_pred = self.model(x) loss = self.dsc_loss(y_pred, y_true) # print(y_pred.shape) mask = y_pred > 0.5 mask = mask * 255 mask = mask.cpu().numpy()[0][0] # print(mask) # print(mask.shape()) cv2.imwrite('result.png', mask) loss_valid.append(loss.item()) y_pred_np = y_pred.detach().cpu().numpy() validation_pred.extend( [y_pred_np[s] for s in range(y_pred_np.shape[0])]) y_true_np = y_true.detach().cpu().numpy() validation_true.extend( [y_true_np[s] for s in range(y_true_np.shape[0])]) # early_stopping(loss_valid, self.model) # if early_stopping.early_stop: # print('Early stopping') # import sys # sys.exit(1) mean_dsc = np.mean( self.dsc_per_volume( validation_pred, validation_true, )) # print('mean_dsc:', mean_dsc) if mean_dsc > self.best_validation_dsc: self.best_validation_dsc = mean_dsc torch.save(self.model.state_dict(), os.path.join(self.weights, "unet_xia_adam.pth")) print("Best validation mean DSC: {:4f}".format( self.best_validation_dsc)) def main(self): # print('train is begin.....') # print('load data end.....') # loaders = {"train": loader_train, "valid": loader_valid} for epoch in tqdm(range(self.epochs), total=self.epochs): self.train_one_epoch(epoch) self.eval_model(patience=10) torch.save(self.model.state_dict(), os.path.join(self.weights, "unet_final.pth"))
def train(cont=False): # for tensorboard tracking logger = get_logger() logger.info("(1) Initiating Training ... ") logger.info("Training on device: {}".format(device)) writer = SummaryWriter() # init model aux_layers = None if net == "SETR-PUP": aux_layers, model = get_SETR_PUP() elif net == "SETR-MLA": aux_layers, model = get_SETR_MLA() elif net == "TransUNet-Base": model = get_TransUNet_base() elif net == "TransUNet-Large": model = get_TransUNet_large() elif net == "UNet": model = UNet(CLASS_NUM) # prepare dataset cluster_model = get_clustering_model(logger) train_dataset = CityscapeDataset(img_dir=data_dir, img_dim=IMG_DIM, mode="train", cluster_model=cluster_model) valid_dataset = CityscapeDataset(img_dir=data_dir, img_dim=IMG_DIM, mode="val", cluster_model=cluster_model) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) logger.info("(2) Dataset Initiated. ") # optimizer epochs = epoch_num if epoch_num > 0 else iteration_num // len( train_loader) + 1 optim = SGD(model.parameters(), lr=lrate, momentum=momentum, weight_decay=wdecay) # optim = Adam(model.parameters(), lr=lrate) scheduler = lr_scheduler.MultiStepLR( optim, milestones=[int(epochs * fine_tune_ratio)], gamma=0.1) cur_epoch = 0 best_loss = float('inf') epochs_since_improvement = 0 # for continue training if cont: model, optim, cur_epoch, best_loss = load_ckpt_continue_training( best_ckpt_src, model, optim, logger) logger.info("Current best loss: {0}".format(best_loss)) with warnings.catch_warnings(): warnings.simplefilter("ignore") for i in range(cur_epoch): scheduler.step() else: model = nn.DataParallel(model) model = model.to(device) logger.info("(3) Model Initiated ... ") logger.info("Training model: {}".format(net) + ". Training Started.") # loss ce_loss = CrossEntropyLoss() if use_dice_loss: dice_loss = DiceLoss(CLASS_NUM) # loop over epochs iter_count = 0 epoch_bar = tqdm.tqdm(total=epochs, desc="Epoch", position=cur_epoch, leave=True) logger.info("Total epochs: {0}. Starting from epoch {1}.".format( epochs, cur_epoch + 1)) for e in range(epochs - cur_epoch): epoch = e + cur_epoch # Training. model.train() trainLossMeter = LossMeter() train_batch_bar = tqdm.tqdm(total=len(train_loader), desc="TrainBatch", position=0, leave=True) for batch_num, (orig_img, mask_img) in enumerate(train_loader): orig_img, mask_img = orig_img.float().to( device), mask_img.float().to(device) if net == "TransUNet-Base" or net == "TransUNet-Large": pred = model(orig_img) elif net == "SETR-PUP" or net == "SETR-MLA": if aux_layers is not None: pred, _ = model(orig_img) else: pred = model(orig_img) elif net == "UNet": pred = model(orig_img) loss_ce = ce_loss(pred, mask_img[:].long()) if use_dice_loss: loss_dice = dice_loss(pred, mask_img, softmax=True) loss = 0.5 * (loss_ce + loss_dice) else: loss = loss_ce # Backward Propagation, Update weight and metrics optim.zero_grad() loss.backward() optim.step() # update learning rate for param_group in optim.param_groups: orig_lr = param_group['lr'] param_group['lr'] = orig_lr * (1.0 - iter_count / iteration_num)**0.9 iter_count += 1 # Update loss trainLossMeter.update(loss.item()) # print status if (batch_num + 1) % print_freq == 0: status = 'Epoch: [{0}][{1}/{2}]\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch+1, batch_num+1, len(train_loader), loss=trainLossMeter) logger.info(status) # log loss to tensorboard if (batch_num + 1) % tensorboard_freq == 0: writer.add_scalar( 'Train_Loss_{0}'.format(tensorboard_freq), trainLossMeter.avg, epoch * (len(train_loader) / tensorboard_freq) + (batch_num + 1) / tensorboard_freq) train_batch_bar.update(1) writer.add_scalar('Train_Loss_epoch', trainLossMeter.avg, epoch) # Validation. model.eval() validLossMeter = LossMeter() valid_batch_bar = tqdm.tqdm(total=len(valid_loader), desc="ValidBatch", position=0, leave=True) with torch.no_grad(): for batch_num, (orig_img, mask_img) in enumerate(valid_loader): orig_img, mask_img = orig_img.float().to( device), mask_img.float().to(device) if net == "TransUNet-Base" or net == "TransUNet-Large": pred = model(orig_img) elif net == "SETR-PUP" or net == "SETR-MLA": if aux_layers is not None: pred, _ = model(orig_img) else: pred = model(orig_img) elif net == "UNet": pred = model(orig_img) loss_ce = ce_loss(pred, mask_img[:].long()) if use_dice_loss: loss_dice = dice_loss(pred, mask_img, softmax=True) loss = 0.5 * (loss_ce + loss_dice) else: loss = loss_ce # Update loss validLossMeter.update(loss.item()) # print status if (batch_num + 1) % print_freq == 0: status = 'Validation: [{0}][{1}/{2}]\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch+1, batch_num+1, len(valid_loader), loss=validLossMeter) logger.info(status) # log loss to tensorboard if (batch_num + 1) % tensorboard_freq == 0: writer.add_scalar( 'Valid_Loss_{0}'.format(tensorboard_freq), validLossMeter.avg, epoch * (len(valid_loader) / tensorboard_freq) + (batch_num + 1) / tensorboard_freq) valid_batch_bar.update(1) valid_loss = validLossMeter.avg writer.add_scalar('Valid_Loss_epoch', valid_loss, epoch) logger.info("Validation Loss of epoch [{0}/{1}]: {2}\n".format( epoch + 1, epochs, valid_loss)) # update optim scheduler scheduler.step() # save checkpoint is_best = valid_loss < best_loss best_loss_tmp = min(valid_loss, best_loss) if not is_best: epochs_since_improvement += 1 logger.info("Epochs since last improvement: %d\n" % (epochs_since_improvement, )) if epochs_since_improvement == early_stop_tolerance: break # early stopping. else: epochs_since_improvement = 0 state = { 'epoch': epoch, 'loss': best_loss_tmp, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optim.state_dict(), } torch.save(state, ckpt_src) logger.info("Checkpoint updated.") best_loss = best_loss_tmp epoch_bar.update(1) writer.close()
class UNetObjPrior(nn.Module): """ Wrapper around UNet that takes object priors (gaussians) and images as input. """ def __init__(self, params, depth=5): super(UNetObjPrior, self).__init__() self.in_channels = 4 self.model = UNet(1, self.in_channels, depth, cuda=params['cuda']) self.params = params self.device = torch.device('cuda' if params['cuda'] else 'cpu') def forward(self, im, obj_prior): x = torch.cat((im, obj_prior), dim=1) return self.model(x) def train(self, dataloader_train, dataloader_val): since = time.time() best_loss = float("inf") dataloader_train.mode = 'train' dataloader_val.mode = 'val' dataloaders = {'train': dataloader_train, 'val': dataloader_val} optimizer = optim.SGD(self.model.parameters(), momentum=self.params['momentum'], lr=self.params['lr'], weight_decay=self.params['weight_decay']) train_logger = LossLogger('train', self.params['batch_size'], len(dataloader_train), self.params['out_dir']) val_logger = LossLogger('val', self.params['batch_size'], len(dataloader_val), self.params['out_dir']) loggers = {'train': train_logger, 'val': val_logger} # self.criterion = WeightedMSE(dataloader_train.get_classes_weights(), # cuda=self.params['cuda']) self.criterion = nn.MSELoss() for epoch in range(self.params['num_epochs']): print('Epoch {}/{}'.format(epoch, self.params['num_epochs'] - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': #scheduler.step() self.model.train() else: self.model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. samp = 1 for i, data in enumerate(dataloaders[phase]): # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): out = self.forward(data.image, data.obj_prior) loss = self.criterion(out, data.truth) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() loggers[phase].update(epoch, samp, loss.item()) samp += 1 loggers[phase].print_epoch(epoch) # Generate train prediction for check if phase == 'train': path = os.path.join(self.params['out_dir'], 'previews', 'epoch_{:04d}.jpg'.format(epoch)) data = dataloaders['val'].sample_uniform() pred = self.forward(data.image, data.obj_prior) im_ = data.image[0] truth_ = data.truth[0] pred_ = pred[0, ...] utls.save_tensors(im_, pred_, truth_, path) if phase == 'val' and (loggers['val'].get_loss(epoch) < best_loss): best_loss = loggers['val'].get_loss(epoch) loggers[phase].save('log_{}.csv'.format(phase)) # save checkpoint if phase == 'val': is_best = loggers['val'].get_loss(epoch) <= best_loss path = os.path.join(self.params['out_dir'], 'checkpoint.pth.tar') utls.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.state_dict(), 'best_loss': best_loss, 'optimizer': optimizer.state_dict() }, is_best, path=path) def load_checkpoint(self, path, device='gpu'): if (device != 'gpu'): checkpoint = torch.load(path, map_location=lambda storage, loc: storage) else: checkpoint = torch.load(path) self.model.load_state_dict(checkpoint['state_dict'])
return image, label def __len__(self): # get the size of data set return len(self.imgs_path) if __name__ == "__main__": dataset = DataLoader("data/train10/") train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=1, shuffle=True) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = UNet(n_channels=1, n_classes=1) net.to(device=device) net.train() for image, label in train_loader: image = image.to(device=device, dtype=torch.float32) label = label.to(device=device, dtype=torch.float32) pred = net(image) loss = F.cross_entropy(pred, label.to(torch.long)) print('Loss/train', loss.item()) if loss < best_loss: best_loss = loss torch.save(net.state_dict(), 'best_model.pth') loss.backward() optimizer.step() print(pred.shape, image.shape, label.shape)