def main(conf): device = "cuda:0" if torch.cuda.is_available() else 'cpu' beta_schedule = "linear" beta_start = 1e-4 beta_end = 2e-2 n_timestep = 1000 conf.distributed = dist.get_world_size() > 1 transform = transforms.Compose( [ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True), ] ) train_set = MultiResolutionDataset( conf.dataset.path, transform, conf.dataset.resolution ) train_sampler = dist.data_sampler( train_set, shuffle=True, distributed=conf.distributed ) train_loader = conf.training.dataloader.make(train_set, sampler=train_sampler) model = UNet( conf.model.in_channel, conf.model.channel, channel_multiplier=conf.model.channel_multiplier, n_res_blocks=conf.model.n_res_blocks, attn_strides=conf.model.attn_strides, dropout=conf.model.dropout, fold=conf.model.fold, ) model = model.to(device) ema = UNet( conf.model.in_channel, conf.model.channel, channel_multiplier=conf.model.channel_multiplier, n_res_blocks=conf.model.n_res_blocks, attn_strides=conf.model.attn_strides, dropout=conf.model.dropout, fold=conf.model.fold, ) ema = ema.to(device) if conf.distributed: model = nn.parallel.DistributedDataParallel( model, device_ids=[dist.get_local_rank()], output_device=dist.get_local_rank(), ) optimizer = conf.training.optimizer.make(model.parameters()) scheduler = conf.training.scheduler.make(optimizer) betas = make_beta_schedule(beta_schedule, beta_start, beta_end, n_timestep) diffusion = GaussianDiffusion(betas).to(device) train(conf, train_loader, model, ema, diffusion, optimizer, scheduler, device)
def train(epochs, batch_size, learning_rate): train_loader = torch.utils.data.DataLoader(SegThorDataset( "data", phase='train', transform=transforms.Compose([Rescale(0.25), Normalize(), ToTensor()]), target_transform=transforms.Compose([Rescale(0.25), ToTensor()])), batch_size=batch_size, shuffle=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = UNet().to(device) model.apply(weight_init) #optimizer = optim.Adam(model.parameters(), lr=learning_rate) #learning rate to 0.001 for initial stage optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.95) #optimizer = adabound.AdaBound(params = model.parameters(), lr = 0.001, final_lr = 0.1) for epoch in range(epochs): print('Epoch {}/{}'.format(epoch + 1, epochs)) print('-' * 10) running_loss = 0.0 loss_seg = np.zeros(5) for batch_idx, (train_data, labels) in enumerate(train_loader): train_data, labels = train_data.to( device, dtype=torch.float), labels.to(device, dtype=torch.uint8) print("train data size", train_data.size()) print("label size", labels.size()) optimizer.zero_grad() output = model(train_data) print("output: {} and taget: {}".format(output.size(), labels.size())) loss_label, loss = dice_loss(output, labels) loss.backward() optimizer.step() running_loss += loss.item() for i in range(4): loss_seg[i] += loss_label[i] print("Length: ", len(train_loader)) epoch_loss = running_loss / len(train_loader) epoch_loss_class = np.true_divide(loss_seg, len(train_loader)) print( "Dice per class: Background = {:.4f} Eusophagus = {:.4f} Heart = {:.4f} Trachea = {:.4f} Aorta = {:.4f}\n" .format(epoch_loss_class[0], epoch_loss_class[1], epoch_loss_class[2], epoch_loss_class[3], epoch_loss_class[4])) print("Total Dice Loss: {:.4f}\n".format(epoch_loss)) os.makedirs("models", exist_ok=True) torch.save(model, "models/model.pt")
def main(): train_root_dir = '/content/drive/My Drive/DDSM/train/CBIS-DDSM' test_root_dir = '/content/drive/My Drive/DDSM/test/CBIS-DDSM' path_weights = '/content/drive/My Drive/Cv/weights' batch_size = 3 valid_size = 0.2 nb_epochs = 20 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # data loaders loaders = dataloaders(train_root_dir, combined_transform, batch_size, valid_size) model = UNet(in_channels=3, out_channels=1) model.to(device) optimizer = optim.Adam(model.parameters(), lr=0.01) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.3) model = train(model, optimizer, exp_lr_scheduler, loaders, nb_epochs, device, path_weights) # from torchsummary import summary # # summary(model, input_size=(3, 224, 224)) # test_transform = transforms.Compose([ # transforms.ToTensor(), # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # ]) test_loader = DataLoader( MassSegmentationDataset(test_root_dir, combined_transform), batch_size=batch_size, num_workers=0 ) test(model, test_loader, device)
def train(): # Load the data sets train_dataset = NucleusDataset( "data", train=True, transform=Compose([Rescale(256), ToTensor()]), target_transform=Compose([Rescale(256), ToTensor()])) # Use cuda if available device = "cuda" if torch.cuda.is_available() else "cpu" # Set model to GPU/CPU if args.from_checkpoint: model = UNet.load(args.from_checkpoint) else: model = UNet() model.to(device) # Initialize optimizer optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) # Initialize trainer trainer = Trainer(dataset=train_dataset, model=model, optimizer=optimizer, batch_size=args.batch_size, device=args.device, output_dir=output_dir) # Run the training trainer.run_train_loop(epochs=args.epochs)
def main(): args = parser.parse_args() dataset = SyntheticCellDataset(arg.img_dir, arg.mask_dir) indices = torch.randperm(len(dataset)).tolist() sr = int(args.split_ratio * len(dataset)) train_set = torch.utils.data.Subset(dataset, indices[:-sr]) val_set = torch.utils.data.Subset(dataset, indices[-sr:]) train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, pin_memory=True) device = torch.device("cpu" if not args.use_cuda else "cuda:0") model = UNet() model.to(device) dsc_loss = DiceLoss() optimizer = torch.optim.Adam(model.parameters(), args.lr) val_overall = 1000 for epoch in args.N_epoch: model, train_loss, optimizer = train(model, train_loader, device, optimizer) val_loss = validate(model, val_loader, device) if val_loss < val_overall: save_checkpoint(args.model_save_dir + '/epoch_'+str(epoch+1), model, train_loss, val_loss, epoch) val_overall = val_loss print('[{}/{}] train loss :{} val loss : {}'.format(epoch+1, num_epoch, train_loss, val_loss)) print('Training completed)
def main(args): train_dataloader, test_dataloader = dataloader.load_datasets( batch_size=args.batch_size, image_resize=args.image_resize, train_dataset_size=args.train_data_size, test_dataset_size=args.test_data_size, download=args.download_dataset ) model = UNet(out_channels=21) optimizer = optim.Adam(model.parameters(), lr=args.lr) ce_weight = utils.get_weight(train_dataloader.dataset) if len(ce_weight) < 21: criterion = nn.CrossEntropyLoss() else: criterion = nn.CrossEntropyLoss(utils.get_weight(train_dataloader.dataset)) print(f'Start training for {args.epochs} epochs') train(model=model, dataloader=train_dataloader, epochs=args.epochs, optimizer=optimizer, criterion=criterion, save_output_every=1, ) print(f'Training finished') print(f'Start evaluating with {len(test_dataloader.dataset)} images') eval(model, test_dataloader) print('All done')
def main(): # width_in = 284 # height_in = 284 # width_out = 196 # height_out = 196 # PATH = './unet.pt' # x_train, y_train, x_val, y_val = get_dataset(width_in, height_in, width_out, height_out) # print(x_train.shape, y_train.shape, x_val.shape, y_val.shape) batch_size = 3 epochs = 1 epoch_lapse = 50 threshold = 0.5 learning_rate = 0.01 unet = UNet(in_channel=1, out_channel=2) if use_gpu: unet = unet.cuda() criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(unet.parameters(), lr=0.01, momentum=0.99) if sys.argv[1] == 'train': train(unet, batch_size, epochs, epoch_lapse, threshold, learning_rate, criterion, optimizer, x_train, y_train, x_val, y_val, width_out, height_out) pass else: if use_gpu: unet.load_state_dict(torch.load(PATH)) else: unet.load_state_dict(torch.load(PATH, map_location='cpu')) print(unet.eval())
def main(): """ Training. """ global start_epoch, epoch, checkpoint # Initialize model or load checkpoint if checkpoint is None: model = UNet(in_channels, out_channels) # Initialize the optimizer optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=lr) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 model = checkpoint['model'] optimizer = checkpoint['optimizer'] # Move to default device model = model.to(device) criterion = nn.L1Loss().to(device) # Custom dataloaders train_loader = torch.utils.data.DataLoader(TripletDataset( train_folder, crop_size, scale), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(TripletDataset( test_folder, crop_size, scale), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Total number of epochs to train for epochs = int(iterations // len(train_loader) + 1) # Epochs for epoch in range(start_epoch, epochs): # One epoch's training train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, epoch=epoch, epochs=epochs) test(test_loader=test_loader, model=model, criterion=criterion) # Save checkpoint torch.save({ 'epoch': epoch, 'model': model, 'optimizer': optimizer }, f'checkpoints/checkpoint_unet_{epoch}.pth.tar')
def main(): parser = argparse.ArgumentParser() parser.add_argument("--action", type=str, default='train', help="train or test") args = parser.parse_args() config = load_config() # 使用tensorboard time_now = datetime.now().isoformat() if not os.path.exists(config.RUN_PATH): os.mkdir(config.RUN_PATH) writer = SummaryWriter(log_dir=config.RUN_PATH) # 随机数种子 torch.manual_seed(config.SEED) torch.cuda.manual_seed(config.SEED) np.random.seed(config.SEED) random.seed(config.SEED) # INIT GPU os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(e) for e in config.GPU) if torch.cuda.is_available(): config.DEVICE = torch.device("cuda") print('\nGPU IS AVAILABLE') torch.backends.cudnn.benchmark = True else: config.DEVICE = torch.device("cpu") net = UNet(2).to(config.DEVICE) print(list(torchvision.models.resnet18(False).children())[7]) optimizer = optim.Adam(net.parameters(), betas=(0.5, 0.999), lr=config.LR) loss = nn.L1Loss() # 加载数据集 if args.action == 'train': train_dataset = LABDataset(config, config.TRAIN_PATH) len_train = len(train_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.BATCH_SIZE, shuffle=True) iter_per_epoch = len(train_loader) train_(config, train_loader, net, optimizer, loss, len_train, iter_per_epoch, writer) if args.action == "test": test_dataset = LABDataset(config, config.TEST_PATH) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False) test(config, test_loader, net, loss)
def main(): train_transform = A.Compose([ A.Resize(height=config.IMAGE_HEIGHT, width=config.IMAGE_WIDTH), A.Rotate(limit=35, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0), ToTensorV2, ]) val_transform = A.Compose([ A.Resize(height=config.IMAGE_HEIGHT, width=config.IMAGE_WIDTH), A.normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0), ToTensorV2, ]) model = UNet(in_channels=3, out_channels=1).to(config.DEVICE) loss_fn = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE) train_loader, val_loader = get_loaders( config.TRAIN_IMAGE_DIR, config.TRAIN_MASK_DIR, config.VAL_IMG_DIR, config.VAL_MASK_DIR, config.BATCH_SIZE, train_transform, val_transform, ) if config.LOAD_MODEL: load_checkpoint(torch.load('my_checkpoint.pth.tar'), model) scaler = torch.cuda.amp.GradScaler() for epoch in range(config.NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler) # save model checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } save_checkpoint(checkpoint) # check acc check_accuracy(val_loader, model, device=config.DEVICE) # print some examples to a folder save_predictions_as_imgs(val_loader, model, folder='saved_images', device=config.DEVICE)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--file_paths', default="data/files.txt") parser.add_argument('--landmark_paths', default="data/landmarks.txt") parser.add_argument('--landmark', type=int, default=0) parser.add_argument('--save_path') parser.add_argument('--num_epochs', type=int, default=int(1e9)) parser.add_argument('--log_freq', type=int, default=100) parser.add_argument('--separator', default=",") parser.add_argument('--batch_size', type=int, default=8) args = parser.parse_args() file_paths = args.file_paths landmark_paths = args.landmark_paths landmark_wanted = args.landmark num_epochs = args.num_epochs log_freq = args.log_freq save_path = args.save_path x, y = get_data(file_paths, landmark_paths, landmark_wanted, separator=args.separator) print(f"Got {len(x)} images with {len(y)} landmarks") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("device", device) dataset = TensorDataset(torch.Tensor(x), torch.Tensor(y)) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) unet = UNet(in_dim=1, out_dim=6, num_filters=4) criterion = torch.nn.CrossEntropyLoss(weight=get_weigths(y)) optimizer = optim.SGD(unet.parameters(), lr=0.001, momentum=0.9) unet.to(device) for epoch in range(num_epochs): running_loss = 0.0 for i, data in enumerate(dataloader): inputs, labels = data optimizer.zero_grad() outputs = unet(inputs) loss = criterion(outputs, labels.long()) loss.backward() optimizer.step() running_loss += loss.item() print(f"[{epoch+1}/{num_epochs}] loss: {running_loss}") if epoch % log_freq == log_freq - 1: if save_path is not None: torch.save(unet.state_dict(), os.path.join(save_path, f"unet-{epoch}.pt"))
def main(): global args net = UNet(3, 1) net.load(opt.ckpt_path) loss = Loss('soft_dice_loss') torch.cuda.set_device(0) net = net.cuda() loss = loss.cuda() if args.phase == 'train': # train dataset = NucleiDetector(opt, phase=args.phase) train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=opt.pin_memory) lr = opt.lr optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=opt.weight_decay) previous_loss = None # haven't run for epoch in range(opt.epoch + 1): now_loss = train(train_loader, net, loss, epoch, optimizer, opt.model_save_freq, opt.model_save_path) if previous_loss is not None and now_loss > previous_loss: lr *= opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr save_lr(net.model_name, opt.lr_save_path, lr) previous_loss = now_loss elif args.phase == 'val': # val phase dataset = NucleiDetector(opt, phase='val') val_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=opt.pin_memory) val(val_loader, net, loss) else: # test phase dataset = NucleiDetector(opt, phase='test') test_loader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.num_workers, pin_memory=opt.pin_memory) test(test_loader, net, opt)
def main(mode): config = load_config() # 使用tensorboard time_now = datetime.now().isoformat() if not os.path.exists(config.RUN_PATH): os.mkdir(config.RUN_PATH) writer = SummaryWriter(log_dir=config.RUN_PATH) # 随机数种子 torch.manual_seed(config.SEED) torch.cuda.manual_seed(config.SEED) np.random.seed(config.SEED) random.seed(config.SEED) # INIT GPU os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(e) for e in config.GPU) if torch.cuda.is_available(): config.DEVICE = torch.device("cuda") print('\nGPU IS AVAILABLE') torch.backends.cudnn.benchmark = True else: config.DEVICE = torch.device("cpu") net = UNet(1).to(config.DEVICE) optimizer = optim.Adam(net.parameters(), betas=(0.5, 0.999), lr=config.LR) #criterion = nn.CrossEntropyLoss() # 定义loss函数 criterion = nn.BCELoss() # 加载数据集 if mode == 1: train_dataset = MyDataset(config, config.TRAIN_PATH) len_train = len(train_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.BATCH_SIZE, shuffle=True) iter_per_epoch = len(train_loader) train_(config, train_loader, net, optimizer, criterion, len_train, iter_per_epoch, writer) if mode == 2: test_dataset = MyDataset(config, config.TEST_PATH) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False) test(config, test_loader, net, criterion)
def train(args): model = UNet(n_channels=125, n_classes=10).to(device) batch_size = args.batch_size num_epochs = args.num_epochs save_path = args.save_path criterion = L.CrossEntropyLoss2d() # 损失函数 optimizer = optim.Adam(model.parameters( )) # 梯度下降 # model.parameters():Returns an iterator over module parameters liver_dataset = LiverDataset("data", transform=x_transform, target_transform=y_transform) # 加载数据集 dataloader = DataLoader(liver_dataset, batch_size=batch_size, shuffle=True, num_workers=8) train_model(model, criterion, optimizer, dataloader, num_epochs, save_path) test(args)
class Unetmodel(LightningModule): def __init__(self,hparams): super(Unetmodel,self).__init__() self.hparams = hparams self.hdf_path = self.hparams.hdf_path self.max_dist = self.hparams.max_dist self.grid_resolution = self.hparams.grid_resolution self.augment = self.hparams.augment self.net = UNet() self.metric = Ovl('Volume_overlap') self.loss = Dice_loss() self.lr = self.hparams.lr self.batch_size = self.hparams.batch_size def forward(self, x): return self.net(x) def training_step(self, batch, batch_idx): x, y = batch y_hat = self(x) loss = self.loss(y, y_hat) metric = self.metric(y,y_hat) tensorboard_logs = {'train_loss': loss,'train_metric':metric} return {'loss': loss,'log':tensorboard_logs,'progress_bar':tensorboard_logs} def training_epoch_end(self, outputs): avg_loss = torch.stack([x['loss'] for x in outputs]).mean() return {'train_loss': avg_loss} def configure_optimizers(self): optimizer = Adam(self.net.parameters(), lr=self.lr) return [optimizer] def train_dataloader(self): self.data_handle = h5py.File(self.hdf_path, mode='r') self.ids = list(self.data_handle.keys()) self.data_handle.close() self.train_dataset = Binding_pocket_dataset(self.hdf_path,self.max_dist, self.grid_resolution,self.ids,self.augment) loader = DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=4, shuffle=True) return loader
def noise_seg(datapath: Path, identifier=None, lr=0.001, batch_size=8, wf=7, depth=3, data="brain") -> Trainer: device = torch.device("cuda") def loss_f(trainer, batch, batch_results): return (F.binary_cross_entropy_with_logits( batch_results, batch[1].float(), reduction="none") * (batch[0] > 0.05)).mean() model = UNet(in_channels=1, n_classes=1, batch_norm=False, up_mode="upconv", depth=depth, wf=wf, padding=True).to(device) train_step = partial(simple_train_step, loss_f=loss_f) val_step = partial(simple_val_step, loss_f=loss_f) optimiser = adam(model.parameters(), lr=lr) callback_dict = basic_callback_dict(identifier, save="val_loss") Loss(lambda batch_res, batch: loss_f(trainer, batch, batch_res)).register( callback_dict, log=True, tensorboard=True, train=True, val=True) trainer = Trainer(model=model, train_dataloader=None, val_dataloader=None, optimiser=optimiser, train_step=train_step, val_step=val_step, callback_dict=callback_dict, device=device, identifier=identifier) trainer.train_dataloader, trainer.val_dataloader = get_segmentation_dataloaders( batch_size, data, datapath) trainer.reset_state() return trainer
def train(epochs, batch_size, learning_rate): train_loader = torch.utils.data.DataLoader( NucleusDataset("data", train=True, transform=transforms.Compose([ Normalize(), Rescale(256), ToTensor() ]), target_transform=transforms.Compose([ Normalize(), Rescale(256), ToTensor() ])), batch_size=batch_size, shuffle=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = UNet().to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(epochs): print('Epoch {}/{}'.format(epoch + 1, epochs)) print('-' * 10) running_loss = 0.0 for batch_idx, (images, masks) in enumerate(train_loader): images, masks = images.to(device), masks.to(device) optimizer.zero_grad() output = model(images) loss = F.binary_cross_entropy(output, masks) loss.backward() optimizer.step() running_loss += loss.item() epoch_loss = running_loss / len(train_loader) print("Loss: {:.4f}\n".format(epoch_loss)) os.makedirs("models", exist_ok=True) torch.save(model, "models/model.pt")
b = random.randint(100, 160) b1 = b + 352 for i in range(bathsize): e1 = random.randint(f[2, 0], f[2, 1]) x2[0, :, :] = x1[a:a1, b:b1, e1] y2[0, :, :] = y1[a:a1, b:b1, e1] x = torch.Tensor(x2) y = torch.Tensor(y2) return x, y def __len__(self): # 返回图像的数量 return self.Data optimizer = torch.optim.SGD(unet.parameters(), lr=0.01) # 传入 net 的所有参数, 学习率 loss_func = torch.nn.BCELoss() # 预测值和真实值的误差计算公式 (均方差) loss_func2 = DiceLoss() ship_train_dataset = getDataset(path) # 利用dataloader读取我们的数据对象,并设定batch-size和工作现场 # batch_size=2 ship_train_loader = DataLoader(ship_train_dataset, batch_size=4, num_workers=32, shuffle=False) for epoch in range(300): epoch_loss = 0 Diceloss = 0 Sen = 0
epochs = 20 batch_size = 2 alpha= .5 beta = .5 lr = .05 use_wandb = True if use_wandb: wandb.init(project="cs446", entity="weustis") wandb.watch(model) crit = DiceBCELoss() opt = torch.optim.Adam(model.parameters(), lr=lr) lambdalr = lambda epoch: .05 - (.0499*(epoch/epochs)) scheduler = LambdaLR(opt, lr_lambda=[lambdalr]) # data = [np.load('data_pub/train/001_imgs.npy'), np.load('data_pub/train/001_seg.npy')] for epoch in tqdm.tqdm(range(epochs)): X, y = get_batch(dataset, 1, 'cpu') eloss = 0 for batch in range(0, len(X) - batch_size, batch_size): x_batch = X[batch:batch + batch_size].to(device) opt.zero_grad()
def main(args): writer = SummaryWriter(os.path.join('./logs')) # torch.backends.cudnn.benchmark = True if not os.path.isdir(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('[MODEL] CUDA DEVICE : {}'.format(device)) # TODO DEFINE TRAIN AND TEST TRANSFORMS train_tf = None test_tf = None # Channel wise mean calculated on adobe240-fps training dataset mean = [0.429, 0.431, 0.397] std = [1, 1, 1] normalize = transforms.Normalize(mean=mean, std=std) transform = transforms.Compose([transforms.ToTensor(), normalize]) test_valid = 'validation' if args.valid else 'test' train_data = BlurDataset(os.path.join(args.dataset_root, 'train'), seq_len=args.sequence_length, tau=args.num_frame_blur, delta=5, transform=train_tf) test_data = BlurDataset(os.path.join(args.dataset_root, test_valid), seq_len=args.sequence_length, tau=args.num_frame_blur, delta=5, transform=train_tf) train_loader = DataLoader(train_data, batch_size=args.train_batch_size, shuffle=True) test_loader = DataLoader(test_data, batch_size=args.test_batch_size, shuffle=False) # TODO IMPORT YOUR CUSTOM MODEL model = UNet(3, 3, device, decode_mode=args.decode_mode) if args.checkpoint: store_dict = torch.load(args.checkpoint) try: model.load_state_dict(store_dict['state_dict']) except KeyError: model.load_state_dict(store_dict) if args.train_continue: store_dict = torch.load(args.checkpoint) model.load_state_dict(store_dict['state_dict']) else: store_dict = {'loss': [], 'valLoss': [], 'valPSNR': [], 'epoch': -1} model.to(device) model.train(True) # model = nn.DataParallel(model) # TODO DEFINE MORE CRITERIA # input(True if device == torch.device('cuda:0') else False) criterion = { 'MSE': nn.MSELoss(), 'L1': nn.L1Loss(), # 'Perceptual': PerceptualLoss(model='net-lin', net='vgg', dataparallel=True, # use_gpu=True if device == torch.device('cuda:0') else False) } criterion_w = {'MSE': 1.0, 'L1': 10.0, 'Perceptual': 10.0} # Define optimizers # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9,weight_decay=5e-4) optimizer = optim.Adam(model.parameters(), lr=args.init_learning_rate) # Define lr scheduler scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.milestones, gamma=0.1) # best_acc = 0.0 # start = time.time() cLoss = store_dict['loss'] valLoss = store_dict['valLoss'] valPSNR = store_dict['valPSNR'] checkpoint_counter = 0 loss_tracker = {} loss_tracker_test = {} psnr_old = 0.0 dssim_old = 0.0 for epoch in range(1, 10 * args.epochs): # loop over the dataset multiple times # Append and reset cLoss.append([]) valLoss.append([]) valPSNR.append([]) running_loss = 0 # Increment scheduler count scheduler.step() tqdm_loader = tqdm(range(len(train_loader)), ncols=150) loss = 0.0 psnr_ = 0.0 dssim_ = 0.0 loss_tracker = {} for loss_fn in criterion.keys(): loss_tracker[loss_fn] = 0.0 # Train model.train(True) total_steps = 0.01 total_steps_test = 0.01 '''for train_idx, data in enumerate(train_loader, 1): loss = 0.0 blur_data, sharpe_data = data #import pdb; pdb.set_trace() # input(sharpe_data.shape) #import pdb; pdb.set_trace() interp_idx = int(math.ceil((args.num_frame_blur/2) - 0.49)) #input(interp_idx) if args.decode_mode == 'interp': sharpe_data = sharpe_data[:, :, 1::2, :, :] elif args.decode_mode == 'deblur': sharpe_data = sharpe_data[:, :, 0::2, :, :] else: #print('\nBoth\n') sharpe_data = sharpe_data #print(sharpe_data.shape) #input(blur_data.shape) blur_data = blur_data.to(device)[:, :, :, :352, :].permute(0, 1, 2, 4, 3) try: sharpe_data = sharpe_data.squeeze().to(device)[:, :, :, :352, :].permute(0, 1, 2, 4, 3) except: sharpe_data = sharpe_data.squeeze(3).to(device)[:, :, :, :352, :].permute(0, 1, 2, 4, 3) # clear gradient optimizer.zero_grad() # forward pass sharpe_out = model(blur_data) # import pdb; pdb.set_trace() # input(sharpe_out.shape) # compute losses # import pdb; # pdb.set_trace() sharpe_out = sharpe_out.permute(0, 2, 1, 3, 4) B, C, S, Fx, Fy = sharpe_out.shape for loss_fn in criterion.keys(): loss_tmp = 0.0 if loss_fn == 'Perceptual': for bidx in range(B): loss_tmp += criterion_w[loss_fn] * \ criterion[loss_fn](sharpe_out[bidx].permute(1, 0, 2, 3), sharpe_data[bidx].permute(1, 0, 2, 3)).sum() # loss_tmp /= B else: loss_tmp = criterion_w[loss_fn] * \ criterion[loss_fn](sharpe_out, sharpe_data) # try: # import pdb; pdb.set_trace() loss += loss_tmp # if # except : try: loss_tracker[loss_fn] += loss_tmp.item() except KeyError: loss_tracker[loss_fn] = loss_tmp.item() # Backpropagate loss.backward() optimizer.step() # statistics # import pdb; pdb.set_trace() sharpe_out = sharpe_out.detach().cpu().numpy() sharpe_data = sharpe_data.cpu().numpy() for sidx in range(S): for bidx in range(B): psnr_ += psnr(sharpe_out[bidx, :, sidx, :, :], sharpe_data[bidx, :, sidx, :, :]) #, peak=1.0) """dssim_ += dssim(np.moveaxis(sharpe_out[bidx, :, sidx, :, :], 0, 2), np.moveaxis(sharpe_data[bidx, :, sidx, :, :], 0, 2) )""" """sharpe_out = sharpe_out.reshape(-1,3, sx, sy).detach().cpu().numpy() sharpe_data = sharpe_data.reshape(-1, 3, sx, sy).cpu().numpy() for idx in range(sharpe_out.shape[0]): # import pdb; pdb.set_trace() psnr_ += psnr(sharpe_data[idx], sharpe_out[idx]) dssim_ += dssim(np.swapaxes(sharpe_data[idx], 2, 0), np.swapaxes(sharpe_out[idx], 2, 0))""" # psnr_ /= sharpe_out.shape[0] # dssim_ /= sharpe_out.shape[0] running_loss += loss.item() loss_str = '' total_steps += B*S for key in loss_tracker.keys(): loss_str += ' {0} : {1:6.4f} '.format(key, 1.0*loss_tracker[key] / total_steps) # set display info if train_idx % 5 == 0: tqdm_loader.set_description(('\r[Training] [Ep {0:6d}] loss: {1:6.4f} PSNR: {2:6.4f} SSIM: {3:6.4f} '.format (epoch, running_loss / total_steps, psnr_ / total_steps, dssim_ / total_steps) + loss_str )) tqdm_loader.update(5) tqdm_loader.close()''' # Validation running_loss_test = 0.0 psnr_test = 0.0 dssim_test = 0.0 # print('len', len(test_loader)) tqdm_loader_test = tqdm(range(len(test_loader)), ncols=150) # import pdb; pdb.set_trace() loss_tracker_test = {} for loss_fn in criterion.keys(): loss_tracker_test[loss_fn] = 0.0 with torch.no_grad(): model.eval() total_steps_test = 0.0 for test_idx, data in enumerate(test_loader, 1): loss = 0.0 blur_data, sharpe_data = data interp_idx = int(math.ceil((args.num_frame_blur / 2) - 0.49)) # input(interp_idx) if args.decode_mode == 'interp': sharpe_data = sharpe_data[:, :, 1::2, :, :] elif args.decode_mode == 'deblur': sharpe_data = sharpe_data[:, :, 0::2, :, :] else: # print('\nBoth\n') sharpe_data = sharpe_data # print(sharpe_data.shape) # input(blur_data.shape) blur_data = blur_data.to(device)[:, :, :, :352, :].permute( 0, 1, 2, 4, 3) try: sharpe_data = sharpe_data.squeeze().to( device)[:, :, :, :352, :].permute(0, 1, 2, 4, 3) except: sharpe_data = sharpe_data.squeeze(3).to( device)[:, :, :, :352, :].permute(0, 1, 2, 4, 3) # clear gradient optimizer.zero_grad() # forward pass sharpe_out = model(blur_data) # import pdb; pdb.set_trace() # input(sharpe_out.shape) # compute losses sharpe_out = sharpe_out.permute(0, 2, 1, 3, 4) B, C, S, Fx, Fy = sharpe_out.shape for loss_fn in criterion.keys(): loss_tmp = 0.0 if loss_fn == 'Perceptual': for bidx in range(B): loss_tmp += criterion_w[loss_fn] * \ criterion[loss_fn](sharpe_out[bidx].permute(1, 0, 2, 3), sharpe_data[bidx].permute(1, 0, 2, 3)).sum() # loss_tmp /= B else: loss_tmp = criterion_w[loss_fn] * \ criterion[loss_fn](sharpe_out, sharpe_data) loss += loss_tmp try: loss_tracker_test[loss_fn] += loss_tmp.item() except KeyError: loss_tracker_test[loss_fn] = loss_tmp.item() if ((test_idx % args.progress_iter) == args.progress_iter - 1): itr = test_idx + epoch * len(test_loader) # itr_train writer.add_scalars( 'Loss', { 'trainLoss': running_loss / total_steps, 'validationLoss': running_loss_test / total_steps_test }, itr) writer.add_scalar('Train PSNR', psnr_ / total_steps, itr) writer.add_scalar('Test PSNR', psnr_test / total_steps_test, itr) # import pdb; pdb.set_trace() # writer.add_image('Validation', sharpe_out.permute(0, 2, 3, 1), itr) # statistics sharpe_out = sharpe_out.detach().cpu().numpy() sharpe_data = sharpe_data.cpu().numpy() for sidx in range(S): for bidx in range(B): psnr_test += psnr( sharpe_out[bidx, :, sidx, :, :], sharpe_data[bidx, :, sidx, :, :]) #, peak=1.0) dssim_test += dssim( np.moveaxis(sharpe_out[bidx, :, sidx, :, :], 0, 2), np.moveaxis(sharpe_data[bidx, :, sidx, :, :], 0, 2)) #,range=1.0 ) running_loss_test += loss.item() total_steps_test += B * S loss_str = '' for key in loss_tracker.keys(): loss_str += ' {0} : {1:6.4f} '.format( key, 1.0 * loss_tracker_test[key] / total_steps_test) # set display info tqdm_loader_test.set_description(( '\r[Test ] [Ep {0:6d}] loss: {1:6.4f} PSNR: {2:6.4f} SSIM: {3:6.4f} ' .format(epoch, running_loss_test / total_steps_test, psnr_test / total_steps_test, dssim_test / total_steps_test) + loss_str)) tqdm_loader_test.update(1) tqdm_loader_test.close() # save model if psnr_old < (psnr_test / total_steps_test): if epoch != 1: os.remove( os.path.join( args.checkpoint_dir, 'epoch-{}-test-psnr-{}-ssim-{}.ckpt'.format( epoch_old, str(round(psnr_old, 4)).replace('.', 'pt'), str(round(dssim_old, 4)).replace('.', 'pt')))) epoch_old = epoch psnr_old = psnr_test / total_steps_test dssim_old = dssim_test / total_steps_test checkpoint_dict = { 'epoch': epoch_old, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'train_psnr': psnr_ / total_steps, 'train_dssim': dssim_ / total_steps, 'train_mse': loss_tracker['MSE'] / total_steps, 'train_l1': loss_tracker['L1'] / total_steps, # 'train_percp': loss_tracker['Perceptual'] / total_steps, 'test_psnr': psnr_old, 'test_dssim': dssim_old, 'test_mse': loss_tracker_test['MSE'] / total_steps_test, 'test_l1': loss_tracker_test['L1'] / total_steps_test, # 'test_percp': loss_tracker_test['Perceptual'] / total_steps_test, } torch.save( checkpoint_dict, os.path.join( args.checkpoint_dir, 'epoch-{}-test-psnr-{}-ssim-{}.ckpt'.format( epoch_old, str(round(psnr_old, 4)).replace('.', 'pt'), str(round(dssim_old, 4)).replace('.', 'pt')))) # if epoch % args.checkpoint_epoch == 0: # torch.save(model.state_dict(),args.checkpoint_dir + str(int(epoch/100))+".ckpt") return None
# norm=2, # usegpu=True) weights = [ 1, 1, 3.7, 3.9, 1, 8.5, 12.6 / 2, 3.3, 22.1 / 2, 4.1, 7.1, 24.1 / 2, 10.7 / 2, 23.6 / 2, 14.3 / 2, 19.1 / 2, 7.3, 10.2 / 2, 4.2, 5.4, 51.4 / 2, 1592 / 40, 38.1 / 2, 4.0, 8.3, 3.3, 31.6 / 2, 2.5, 14.0 / 2, 3.3, 67.9 / 2, 74.5 / 2, 6.6, 5.1, 21.2 / 2, 40.1 / 2, 42.2 / 2, 8.1, 15.2 / 2, 14.8 / 2, 3.0 ] #e padding criterion_ce = nn.CrossEntropyLoss(ignore_index=99, weight=torch.Tensor(weights)).cuda() discriminative_loss = DiscriminativeLoss().cuda() # Optimizer parameters = model.parameters() optimizer = optim.Adam(parameters, lr=1e-3) #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, # mode='min', # factor=0.1, # patience=10, # verbose=True) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1) def train_model(model, optimizer, scheduler, num_epochs=10): #early = time.time() # Train n_iter_tr = 0 n_iter_val = 0
class Solver: def __init__(self, config, train_loader=None, val_loader=None, test_loader=None): self.cfg = config self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.n_gpus = self.cfg.n_gpus if self.cfg.mode in ['train', 'test']: self.train_loader = train_loader self.val_loader = val_loader else: self.test_loader = test_loader # Build model self.build_model() if self.cfg.resume: self.load_pre_model() else: self.start_epoch = 0 # Trigger Tensorboard Logger if self.cfg.use_tensorboard: try: from tensorboardX import SummaryWriter self.writer = SummaryWriter() except ImportError: print( '=> There is no module named tensorboardX, tensorboard disabled' ) self.cfg.use_tensorboard = False def train_val(self): # Build record objs self.build_recorder() iter_per_epoch = len( self.train_loader.dataset) // self.cfg.train_batch_size if len(self.train_loader.dataset) % self.cfg.train_batch_size != 0: iter_per_epoch += 1 for epoch in range(self.start_epoch, self.start_epoch + self.cfg.n_epochs): self.model.train() self.train_time.reset() self.train_loss.reset() self.train_cls_acc.reset() self.train_pix_acc.reset() self.train_mIoU.reset() for i, (image, label) in enumerate(self.train_loader): start_time = time.time() image_var = image.to(self.device) label_var = label.to(self.device) output = self.model(image_var) loss = self.criterion(output, label_var) self.optim.zero_grad() loss.backward() self.optim.step() end_time = time.time() self.train_time.update(end_time - start_time) self.train_loss.update(loss.item()) if self.cfg.task == 'cls': # Record classification accuracy cls_acc = cal_acc(output, label_var) # Update recorder self.train_cls_acc.update(cls_acc.item()) if (i + 1) % self.cfg.log_step == 0: print( 'Epoch[{0}][{1}/{2}]\t' 'Time {train_time.val:.3f} ({train_time.avg:.3f})\t' 'Loss {train_loss.val:.4f} ({train_loss.avg:.4f})\t' 'Accuracy {train_cls_acc.val:.4f} ({train_cls_acc.avg:.4f})' .format(epoch + 1, i + 1, iter_per_epoch, train_time=self.train_time, train_loss=self.train_loss, train_cls_acc=self.train_cls_acc)) if self.cfg.use_tensorboard: self.writer.add_scalar('train/loss', loss.item(), epoch * iter_per_epoch + i) self.writer.add_scalar('train/accuracy', cls_acc.item(), epoch * iter_per_epoch + i) elif self.cfg.task == 'seg': # Record mIoU and pixel-wise accuracy pix_acc = cal_pixel_acc(output, label_var) mIoU = cal_mIoU(output, label_var)[-1] mIoU = torch.mean(mIoU) # Update recorders self.train_pix_acc.update(pix_acc.item()) self.train_mIoU.update(mIoU.item()) if (i + 1) % self.cfg.log_step == 0: print( 'Epoch[{0}][{1}/{2}]\t' 'Time {train_time.val:.3f} ({train_time.avg:.3f})\t' 'Loss {train_loss.val:.4f} ({train_loss.avg:.4f})\t' 'Pixel-Acc {train_pix_acc.val:.4f} ({train_pix_acc.avg:.4f})\t' 'mIoU {train_mIoU.val:.4f} ({train_mIoU.avg:.4f})'. format(epoch + 1, i + 1, iter_per_epoch, train_time=self.train_time, train_loss=self.train_loss, train_pix_acc=self.train_pix_acc, train_mIoU=self.train_mIoU)) if self.cfg.use_tensorboard: self.writer.add_scalar('train/loss', loss.item(), epoch * iter_per_epoch + i) self.writer.add_scalar('train/pix_acc', pix_acc.item(), epoch * iter_per_epoch + i) self.writer.add_scalar('train/mIoU', mIoU.item(), epoch * iter_per_epoch + i) #FIXME currently test validation code #if (i + 1) % 100 == 0: if (epoch + 1) % self.cfg.val_step == 0: self.validate(epoch) # Close logging self.writer.close() def validate(self, epoch): """ Validate with validation dataset """ self.model.eval() self.val_time.reset() self.val_loss.reset() self.val_cls_acc.reset() self.val_mIoU.reset() self.val_pix_acc.reset() iter_per_epoch = len( self.val_loader.dataset) // self.cfg.val_batch_size if len(self.val_loader.dataset) % self.cfg.val_batch_size != 0: iter_per_epoch += 1 for i, (image, label) in enumerate(self.val_loader): start_time = time.time() image_var = image.to(self.device) label_var = label.to(self.device) output = self.model(image_var) loss = self.criterion(output, label_var) end_time = time.time() self.val_time.update(end_time - start_time) self.val_loss.update(loss.item()) if self.cfg.task == 'cls': # Record classification accuracy cls_acc = cal_acc(output, label_var) # Update recorder self.val_cls_acc.update(cls_acc.item()) if (i + 1) % self.cfg.log_step == 0: print( 'Epoch[{0}][{1}/{2}]\t' 'Time {val_time.val:.3f} ({val_time.avg:.3f})\t' 'Loss {val_loss.val:.4f} ({val_loss.avg:.4f})\t' 'Accuracy {val_cls_acc.val:.4f} ({val_cls_acc.avg:.4f})' .format(epoch + 1, i + 1, iter_per_epoch, val_time=self.val_time, val_loss=self.val_loss, val_cls_acc=self.val_cls_acc)) if self.cfg.use_tensorboard: self.writer.add_scalar('val/loss', loss.item(), epoch * iter_per_epoch + i) self.writer.add_scalar('val/accuracy', cls_acc.item(), epoch * iter_per_epoch + i) elif self.cfg.task == 'seg': # Record mIoU and pixel-wise accuracy pix_acc = cal_pixel_acc(output, label_var) mIoU = cal_mIoU(output, label_var)[-1] mIoU = torch.mean(mIoU) # Update recorders self.val_pix_acc.update(pix_acc.item()) self.val_mIoU.update(mIoU.item()) if (i + 1) % self.cfg.log_step == 0: print( ' ##### Validation\t' 'Epoch[{0}][{1}/{2}]\t' 'Time {val_time.val:.3f} ({val_time.avg:.3f})\t' 'Loss {val_loss.val:.4f} ({val_loss.avg:.4f})\t' 'Pixel-Acc {val_pix_acc.val:.4f} ({val_pix_acc.avg:.4f})\t' 'mIoU {val_mIoU.val:.4f} ({val_mIoU.avg:.4f})'.format( epoch + 1, i + 1, iter_per_epoch, val_time=self.val_time, val_loss=self.val_loss, val_pix_acc=self.val_pix_acc, val_mIoU=self.val_mIoU)) if self.cfg.use_tensorboard: self.writer.add_scalar('val/loss', loss.item(), epoch * iter_per_epoch + i) self.writer.add_scalar('val/pix_acc', pix_acc.item(), epoch * iter_per_epoch + i) self.writer.add_scalar('val/mIoU', mIoU.item(), epoch * iter_per_epoch + i) if self.cfg.task == 'cls': if (epoch + 1) % self.cfg.model_save_epoch == 0: state = { 'epoch': epoch + 1, 'state_dict': self.model.state_dict(), 'optim': self.optim.state_dict() } if self.best_cls < self.val_cls_acc.avg: self.best_cls = self.val_cls_acc.avg torch.save( state, './model/cls_model_' + str(epoch + 1) + '_' + str(self.val_cls_acc.avg)[0:5] + '.pth') elif self.cfg.task == 'seg': # Save segmentation samples and model if (epoch + 1) % self.cfg.sample_save_epoch == 0: pred = torch.argmax(output, dim=1) save_image(image, './sample/ori_' + str(epoch + 1) + '.png') save_image(label.unsqueeze(1), './sample/true_' + str(epoch + 1) + '.png') save_image(pred.cpu().unsqueeze(1), './sample/pred_' + str(epoch + 1) + '.png') if (epoch + 1) % self.cfg.model_save_epoch == 0: state = { 'epoch': epoch + 1, 'state_dict': self.model.state_dict(), 'optim': self.optim.state_dict() } if self.best_seg < self.val_pix_acc.avg: self.best_seg = self.val_pix_acc.avg torch.save( state, './model/seg_model_' + str(epoch + 1) + '_' + str(self.val_pix_acc.avg)[0:5] + '.pth') if self.cfg.use_tensorboard: image = make_grid(image) label = make_grid(label.unsqueeze(1)) pred = make_grid(pred.cpu().unqueeze(1)) self.writer.add_image('Origianl', image, epoch + 1) self.writer.add_image('Labels', label, epoch + 1) self.writer.add_image('Predictions', pred, epoch + 1) def build_model(self): """ Rough """ if self.cfg.task == 'cls': self.model = BinaryClassifier(num_classes=2) elif self.cfg.task == 'seg': self.model = UNet(num_classes=2) self.criterion = nn.CrossEntropyLoss() self.optim = torch.optim.Adam(self.model.parameters(), lr=self.cfg.lr, betas=(self.cfg.beta0, self.cfg.beta1)) if self.n_gpus > 1: print('### {} of gpus are used!!!'.format(self.n_gpus)) self.model = nn.DataParallel(self.model) self.model = self.model.to(self.device) def build_recorder(self): # Train recorder self.train_time = AverageMeter() self.train_loss = AverageMeter() # For classification self.train_cls_acc = AverageMeter() # For segmentation self.train_mIoU = AverageMeter() self.train_pix_acc = AverageMeter() # Validation recorder self.val_time = AverageMeter() self.val_loss = AverageMeter() # For classification self.val_cls_acc = AverageMeter() # For segmentation self.val_mIoU = AverageMeter() self.val_pix_acc = AverageMeter() # self.logger = Logger('./logs') self.best_cls = 0 self.best_seg = 0 def load_pre_model(self): """ Load pretrained model """ print('=> loading checkpoint {}'.format(self.cfg.pre_model)) checkpoint = torch.load(self.cfg.pre_model) self.start_epoch = checkpoint['epoch'] self.model.load_state_dict(checkpoint['state_dict']) self.optim.load_state_dict(checkpoint['optim']) print('=> loaded checkpoint {}(epoch {})'.format( self.cfg.pre_model, self.start_epoch)) #TODO:Inference part: def infer(self, data): """ input @data: iterable 256 x 256 patches output @output : segmentation results from each patch i) If classifier's result is that there is a tissue inside of patch, outcome is a masked result. ii) Otherwise, output is segmentated mask which all of pixels are background """ # Data Loading # Load models of classification and segmetation and freeze them self.freeze() # Forward images to Classification model / Select targeted images # Forward images to Segmentation model # Record Loss / Accuracy / Pixel-Accuracy # Print samples out.. def freeze(self): pass print('{}, {} have frozen!!!'.format('model_name_1', 'model_name_2'))
criterion = dice_BCE elif LOSS_NUM == 3: print("Using distance weighted BCE") criterion = boundary_distance_BCE elif LOSS_NUM == 4: print("Using distance weighted Dice") criterion = boundary_distance_dice elif LOSS_NUM == 5: print("Using Lovasz") criterion = lovasz_hinge_loss elif LOSS_NUM == 6: criterion = lovasz_hinge # Pick optimizer optimizer = Adam(model.parameters(), lr=LR) print("Reading data") all_data, labels, img_name, class_labels = get_data(num_points_fetch) all_data = all_data / 255.0 print("Loaded data") if LOSS_NUM == 3 or LOSS_NUM == 4: print("calculating weights") obj = DistToBoundary(labels) rowWts, colWts = obj.computeWeights() weights = 5 * np.exp(-(rowWts + colWts)/50.0) # Split data into train and test test_data = all_data[train_num_pts:]
# checkpoint ckpt_list = ['model_ckpt_2000.pt', 'model_ckpt_10000.pt'] # Define the model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") depth = 3 model = UNet(in_channels=1, base_filters=16, out_channels=1, depth=depth) if torch.cuda.device_count() > 1: print('---Using {} GPUs---'.format(torch.cuda.device_count())) model = nn.DataParallel(model) model.to(device) # criterion criterion = nn.BCELoss(reduction='mean') # optimizer optimizer = torch.optim.SGD(model.parameters(), lr=5e-4, momentum=0.9, weight_decay=0.00005, nesterov=True) network = NeuralNetwork(model, criterion, optimizer, device) input_sz = (108, 108) step = (68, 68) # Load checkpoints for ckpt in ckpt_list: print("Test checkpoint {}".format(ckpt)) for idx in range(len(img_mask_name)): curr_name = img_mask_name[idx] assert os.path.exists(curr_name[0]) and os.path.exists(curr_name[1]), \
def UNet(self): if self.mode == 'train': transform = transforms.Compose([Normalization(mean=0.5, std=0.5, mode='train'), ToTensor()]) dataset_train = Dataset(mode = self.mode, data_dir=self.data_dir, image_type = self.image_type, transform=transform) loader_train = DataLoader(dataset_train, batch_size=self.batch_size, shuffle=True, num_workers=8) # dataset_val = Dataset(data_dir=os.path.join(data_dir, 'val'), transform=transform) # loader_val = DataLoader(dataset_val, batch_size=batch_size, shuffle=False, num_workers=8) # 그밖에 부수적인 variables 설정하기 num_data_train = len(dataset_train) # num_data_val = len(dataset_val) num_batch_train = np.ceil(num_data_train / self.batch_size) # num_batch_val = np.ceil(num_data_val / batch_size) elif self.mode == 'test': transform = transforms.Compose([Normalization(mean=0.5, std=0.5, mode='test'), ToTensor()]) dataset_test = Dataset(mode = self.mode, data_dir=self.data_dir, image_type = self.image_type, transform=transform) loader_test = DataLoader(dataset_test, batch_size=self.batch_size, shuffle=False, num_workers=8) # 그밖에 부수적인 variables 설정하기 num_data_test = len(dataset_test) num_batch_test = np.ceil(num_data_test / self.batch_size) fn_tonumpy = lambda x: x.to('cpu').detach().numpy().transpose(0, 2, 3, 1) fn_denorm = lambda x, mean, std: (x * std) + mean fn_class = lambda x: 1.0 * (x > 0.5) net = UNet().to(self.device) criterion = torch.nn.MSELoss().to(self.device) optimizer = torch.optim.Adam(net.parameters(), lr=self.lr) writer_train = SummaryWriter(log_dir=os.path.join(self.log_dir, 'train')) if self.mode == 'train': if self.train_continue == "on": net, optimizer = load_model(ckpt_dir=self.ckpt_dir, net=net, optim=optimizer) for epoch in range(1, self.num_epoch + 1): net.train() loss_arr = [] for batch, data in enumerate(loader_train, 1): # forward pass label = data['label'].to(self.device) input = data['input'].to(self.device) output = net(input) # backward pass optimizer.zero_grad() loss = criterion(output, label) loss.backward() optimizer.step() # 손실함수 계산 loss_arr += [loss.item()] # Tensorboard 저장하기 label = fn_tonumpy(label) input = fn_tonumpy(fn_denorm(input, mean=0.5, std=0.5)) output = fn_tonumpy(fn_class(output)) writer_train.add_image('label', label, num_batch_train * (epoch - 1) + batch, dataformats='NHWC') writer_train.add_image('input', input, num_batch_train * (epoch - 1) + batch, dataformats='NHWC') writer_train.add_image('output', output, num_batch_train * (epoch - 1) + batch, dataformats='NHWC') writer_train.add_scalar('loss', np.mean(loss_arr), epoch) print("TRAIN: EPOCH %04d / %04d | LOSS %.4f" %(epoch, self.num_epoch, np.mean(loss_arr))) if epoch % 20 == 0: save_model(ckpt_dir=self.ckpt_dir, net=net, optim=optimizer, epoch=0) writer_train.close() # TEST MODE elif self.mode == 'test': net, optimizer = load_model(ckpt_dir=self.ckpt_dir, net=net, optim=optimizer) with torch.no_grad(): net.eval() loss_arr = [] id = 1 for batch, data in enumerate(loader_test, 1): # forward pass input = data['input'].to(self.device) output = net(input) # 손실함수 계산하기 #loss = criterion(output, label) #loss_arr += [loss.item()] #print("TEST: BATCH %04d / %04d | " % # (batch, num_batch_test)) # Tensorboard 저장하기 output = fn_tonumpy(fn_class(output)) for j in range(input.shape[0]): if id == 800: id = 2350 print(id) #plt.imsave(os.path.join(self.result_dir, 'png', 'label_%04d.png' % id), label[j].squeeze(), cmap='gray') #plt.imsave(os.path.join(self.result_dir, 'png', 'input_%04d.png' % id), input[j].squeeze(), cmap='gray') plt.imsave(os.path.join(self.result_dir, 'png', 'gt%06d.png' % id), output[j].squeeze(), cmap='gray') id+=1 # np.save(os.path.join(result_dir, 'numpy', 'label_%04d.npy' % id), label[j].squeeze()) # np.save(os.path.join(result_dir, 'numpy', 'input_%04d.npy' % id), input[j].squeeze()) # np.save(os.path.join(result_dir, 'numpy', 'output_%04d.npy' % id), output[j].squeeze()) print("AVERAGE TEST: BATCH %04d / %04d | LOSS %.4f" % (batch, num_batch_test, np.mean(loss_arr)))
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=100, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=67, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=80, metavar='N', help='number of epochs to train (default: 14)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 1.0)') parser.add_argument('--gamma', type=float, default=0.3, metavar='M', help='Learning rate step gamma (default: 0.7)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=13, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=5, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--output_nc', type=int, default=1, metavar='N', help='output channels') parser.add_argument('--save-model', action='store_true', default=True, help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 2, 'pin_memory': True} if use_cuda else {} #Dataset making img_dir = '/n/holyscratch01/wadduwage_lab/temp20200620/20-Jun-2020/beads_tr_data_5sls_20-Jun-2020.h5' train_dataset = HDF5Dataset(img_dir=img_dir, isTrain=True) test_dataset = HDF5Dataset(img_dir=img_dir, isTrain=False) # Data Loading # train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=0, drop_last=True) test_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=0, drop_last=True) model = UNet(n_classes=args.output_nc).cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-6) scheduler = StepLR(optimizer, step_size=20, gamma=args.gamma) criterion = torch.nn.SmoothL1Loss() Best_ACC = 0 Best_Epoch = 1 for epoch in range(1, args.epochs + 1): tloss = train(args, model, device, train_loader, optimizer, epoch, criterion) vloss = test(args, model, device, test_loader, criterion) print("epoch:%.1f" % epoch, "Train_loss:%.4f" % tloss, "Val_loss:%.4f" % vloss) scheduler.step() try: os.makedirs(model_path) except OSError: pass torch.save(model.state_dict(), model_path + "/fcn_deep_" + str(epoch) + ".pth")
class Instructor: ''' Model training and evaluation ''' def __init__(self, opt): self.opt = opt if opt.inference: self.testset = TestImageDataset(fdir=opt.impaths['test'], imsize=opt.imsize) else: self.trainset = ImageDataset(fdir=opt.impaths['train'], bdir=opt.impaths['btrain'], imsize=opt.imsize, mode='train', aug_prob=opt.aug_prob, prefetch=opt.prefetch) self.valset = ImageDataset(fdir=opt.impaths['val'], bdir=opt.impaths['bval'], imsize=opt.imsize, mode='val', aug_prob=opt.aug_prob, prefetch=opt.prefetch) self.model = UNet(n_channels=3, n_classes=1, bilinear=self.opt.use_bilinear) if opt.checkpoint: self.model.load_state_dict( torch.load('./state_dict/{:s}'.format(opt.checkpoint), map_location=self.opt.device)) print('checkpoint {:s} has been loaded'.format(opt.checkpoint)) if opt.multi_gpu == 'on': self.model = torch.nn.DataParallel(self.model) self.model = self.model.to(opt.device) self._print_args() def _print_args(self): n_trainable_params, n_nontrainable_params = 0, 0 for p in self.model.parameters(): n_params = torch.prod(torch.tensor(p.shape)) if p.requires_grad: n_trainable_params += n_params else: n_nontrainable_params += n_params self.info = 'n_trainable_params: {0}, n_nontrainable_params: {1}\n'.format( n_trainable_params, n_nontrainable_params) self.info += 'training arguments:\n' + '\n'.join([ '>>> {0}: {1}'.format(arg, getattr(self.opt, arg)) for arg in vars(self.opt) ]) if self.opt.device.type == 'cuda': print('cuda memory allocated:', torch.cuda.memory_allocated(opt.device.index)) print(self.info) def _reset_records(self): self.records = { 'best_epoch': 0, 'best_dice': 0, 'train_loss': list(), 'val_loss': list(), 'val_dice': list(), 'checkpoints': list() } def _update_records(self, epoch, train_loss, val_loss, val_dice): if val_dice > self.records['best_dice']: path = './state_dict/{:s}_dice{:.4f}_temp{:s}.pt'.format( self.opt.model_name, val_dice, str(time.time())[-6:]) if self.opt.multi_gpu == 'on': torch.save(self.model.module.state_dict(), path) else: torch.save(self.model.state_dict(), path) self.records['best_epoch'] = epoch self.records['best_dice'] = val_dice self.records['checkpoints'].append(path) self.records['train_loss'].append(train_loss) self.records['val_loss'].append(val_loss) self.records['val_dice'].append(val_dice) def _draw_records(self): timestamp = str(int(time.time())) print('best epoch: {:d}'.format(self.records['best_epoch'])) print('best train loss: {:.4f}, best val loss: {:.4f}'.format( min(self.records['train_loss']), min(self.records['val_loss']))) print('best val dice {:.4f}'.format(self.records['best_dice'])) os.rename( self.records['checkpoints'][-1], './state_dict/{:s}_dice{:.4f}_save{:s}.pt'.format( self.opt.model_name, self.records['best_dice'], timestamp)) for path in self.records['checkpoints'][0:-1]: os.remove(path) # Draw figures plt.figure() trainloss, = plt.plot(self.records['train_loss']) valloss, = plt.plot(self.records['val_loss']) plt.legend([trainloss, valloss], ['train', 'val'], loc='upper right') plt.title('{:s} loss curve'.format(timestamp)) plt.savefig('./figs/{:s}_loss.png'.format(timestamp), format='png', transparent=True, dpi=300) plt.figure() valdice, = plt.plot(self.records['val_dice']) plt.title('{:s} dice curve'.format(timestamp)) plt.savefig('./figs/{:s}_dice.png'.format(timestamp), format='png', transparent=True, dpi=300) # Save report report = '\t'.join( ['val_dice', 'train_loss', 'val_loss', 'best_epoch', 'timestamp']) report += "\n{:.4f}\t{:.4f}\t{:.4f}\t{:d}\t{:s}\n{:s}".format( self.records['best_dice'], min(self.records['train_loss']), min(self.records['val_loss']), self.records['best_epoch'], timestamp, self.info) with open('./logs/{:s}_log.txt'.format(timestamp), 'w') as f: f.write(report) print('report saved:', './logs/{:s}_log.txt'.format(timestamp)) def _train(self, train_dataloader, criterion, optimizer): self.model.train() train_loss, n_total, n_batch = 0, 0, len(train_dataloader) for i_batch, sample_batched in enumerate(train_dataloader): inputs, target = sample_batched[0].to( self.opt.device), sample_batched[1].to(self.opt.device) predict = self.model(inputs) optimizer.zero_grad() loss = criterion(predict, target) loss.backward() optimizer.step() train_loss += loss.item() * len(sample_batched) n_total += len(sample_batched) ratio = int((i_batch + 1) * 50 / n_batch) sys.stdout.write("\r[" + ">" * ratio + " " * (50 - ratio) + "] {}/{} {:.2f}%".format(i_batch + 1, n_batch, (i_batch + 1) * 100 / n_batch)) sys.stdout.flush() print() return train_loss / n_total def _evaluation(self, val_dataloader, criterion): self.model.eval() val_loss, val_dice, n_total = 0, 0, 0 with torch.no_grad(): for sample_batched in val_dataloader: inputs, target = sample_batched[0].to( self.opt.device), sample_batched[1].to(self.opt.device) predict = self.model(inputs) loss = criterion(predict, target) dice = dice_coeff(predict, target) val_loss += loss.item() * len(sample_batched) val_dice += dice.item() * len(sample_batched) n_total += len(sample_batched) return val_loss / n_total, val_dice / n_total def run(self): _params = filter(lambda p: p.requires_grad, self.model.parameters()) optimizer = torch.optim.Adam(_params, lr=self.opt.lr, weight_decay=self.opt.l2reg) criterion = BCELoss2d() train_dataloader = DataLoader(dataset=self.trainset, batch_size=self.opt.batch_size, shuffle=True) val_dataloader = DataLoader(dataset=self.valset, batch_size=self.opt.batch_size, shuffle=False) self._reset_records() for epoch in range(self.opt.num_epoch): train_loss = self._train(train_dataloader, criterion, optimizer) val_loss, val_dice = self._evaluation(val_dataloader, criterion) self._update_records(epoch, train_loss, val_loss, val_dice) print( '{:d}/{:d} > train loss: {:.4f}, val loss: {:.4f}, val dice: {:.4f}' .format(epoch + 1, self.opt.num_epoch, train_loss, val_loss, val_dice)) self._draw_records() def inference(self): test_dataloader = DataLoader(dataset=self.testset, batch_size=1, shuffle=False) n_batch = len(test_dataloader) with torch.no_grad(): for i_batch, sample_batched in enumerate(test_dataloader): index, inputs = sample_batched[0], sample_batched[1].to( self.opt.device) predict = self.model(inputs) self.testset.save_img(index.item(), predict, self.opt.use_crf) ratio = int((i_batch + 1) * 50 / n_batch) sys.stdout.write( "\r[" + ">" * ratio + " " * (50 - ratio) + "] {}/{} {:.2f}%".format(i_batch + 1, n_batch, (i_batch + 1) * 100 / n_batch)) sys.stdout.flush() print()
# criterion = nn.MSELoss(reduction = 'sum') # PyTorch 0.4.1 # criterion = sum_squared_error() # criterion = nn.MSELoss() criterion = Intensity_loss() cri_chk = nn.MSELoss() if cuda: model = model.cuda() pre_model = pre_model.cuda() # device_ids = [0] # model = nn.DataParallel(model, device_ids=device_ids).cuda() criterion = criterion.cuda() cri_chk = cri_chk.cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = MultiStepLR(optimizer, milestones=[20, 240, 360], gamma=0.2) # learning rates for epoch in range(initial_epoch, n_epoch): scheduler.step(epoch) # step to the learning rate in this epcoh xs = dg.datagenerator(data_dir=args.train_data) xs = xs.astype('float32') / 255.0 xs = torch.from_numpy(xs.transpose((0, 3, 1, 2))) # tensor of the clean patches, NXCXHXW DDataset = DenoisingDataset(xs, sigma) batch_y, batch_x = DDataset[:238336] # fig = plt.figure() # gs = GridSpec(nrows=1, ncols=2) # # plot1 = fig.add_subplot(gs[0, 0])
def train(n_channels, n_classes, bilinear, epochs, batch_size, lr, val_rate, num_workers, pin_memory, roots, threshold): data_root, model_root, log_root = roots device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print(torch.cuda.get_device_properties(device)) print("id:{}".format(device)) print("name:{}".format(torch.cuda.get_device_name(0))) logging.info(f'Using device {device}') model = UNet(n_channels, n_classes, bilinear).to(device) logging.info(f'Network:\n' f'\t{n_channels} input channels\n' f'\t{n_classes} output channels (classes)\n' f'\t{"Bilinear" if bilinear else "Dilated conv"} upscaling') dataset = SuperviselyDataset(data_root) num_val = int(len(dataset) * val_rate) num_train = len(dataset) - num_val train_date, val_data = random_split(dataset, [num_train, num_val]) train_loader = DataLoader(train_date, batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory) val_loader = DataLoader(val_data, batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory) writer = SummaryWriter(comment=f'LR_{lr}_BS_{batch_size}') logging.info(f'''Starting training: Epochs: {epochs} Batch size: {batch_size} Learning rate (original): {lr} Training item: {num_train} Validation item: {num_val} Device: {device.type}''') # criterion = nn.CrossEntropyLoss() if n_classes > 1 else nn.BCEWithLogitsLoss() criterion = nn.MSELoss() optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=1e-8, momentum=0.9) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min' if n_classes > 1 else 'max', patience=2) losses = [] global_step = 0 for epoch in range(epochs): model.train() with tqdm(total=num_train) as t: t.set_description('epoch: {}/{}'.format(epoch + 1, epochs)) for img, mask in train_loader: # import cv2 # print(mask.shape) # cv2.imshow('figure1', img[0].permute(1, 2, 0).numpy()) # cv2.waitKey() # cv2.imshow('figure2', mask[0].permute(1, 2, 0).numpy()) # cv2.waitKey() mask_type = torch.float32 if n_classes == 1 else torch.long mask = mask.to(device=device, dtype=mask_type) img = img.to(device=device, dtype=torch.float32) # update pred = model(img) loss = criterion(pred, mask) optimizer.zero_grad() nn.utils.clip_grad_value_(model.parameters(), 0.1) loss.backward() optimizer.step() losses.append(loss.item()) # writer.add_scalar('Loss/train', loss.item(), global_step) t.set_postfix(loss='{:.6f}'.format(loss), lr='%.8f' % optimizer.param_groups[0]['lr']) t.update(img.shape[0]) # value # global_step += 1 # if global_step % (len(dataset) // (10 * batch_size)) == 0: # score = evaluate(model, val_loader, n_classes, device, num_val) # model.train() # logging.info('Validation Dice Coeff: {}'.format(score)) # scheduler.step(score) # if n_classes > 1: # logging.info('Validation cross entropy: {}'.format(score)) # writer.add_scalar('Loss/test', score, global_step) # else: # logging.info('Validation Dice Coeff: {}'.format(score)) # writer.add_scalar('Dice/test', score, global_step) # # writer.add_images('images', img, global_step) # if n_classes == 1: # writer.add_images('masks/true', mask, global_step) # writer.add_images('masks/pred', torch.sigmoid(pred) > threshold, global_step) save_model_and_loss(model, model_root, losses, log_root) writer.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--action", type=str, default='test', help="train or test") args = parser.parse_args() config = load_config() # 使用tensorboard time_now = datetime.now().isoformat() if not os.path.exists(config.RUN_PATH): os.mkdir(config.RUN_PATH) writer = SummaryWriter(log_dir=os.path.join(config.RUN_PATH, time_now)) # 随机数种子 torch.manual_seed(config.SEED) torch.cuda.manual_seed(config.SEED) np.random.seed(config.SEED) random.seed(config.SEED) # INIT GPU os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(e) for e in config.GPU) if torch.cuda.is_available(): config.DEVICE = torch.device("cuda") print('\nGPU IS AVAILABLE') torch.backends.cudnn.benchmark = True else: config.DEVICE = torch.device("cpu") ############################################################## # Initialise the generator and discriminator with the UNet and # DNet architectures respectively. generator = UNet(True).to(config.DEVICE) discriminator = DNet().to(config.DEVICE) ################################################################### # Create ADAM optimizer for the generator as well the discriminator. # Create loss criterion for calculating the L1 and adversarial loss. d_optimizer = optim.Adam(discriminator.parameters(), betas=(0.5, 0.999), lr=config.LR) g_optimizer = optim.Adam(generator.parameters(), betas=(0.5, 0.999), lr=config.LR) d_loss = nn.BCELoss() g_loss_bce = nn.BCELoss() g_loss_l1 = nn.L1Loss() # 加载数据集 if args.action == 'train': train_dataset = LABDataset(config, config.TRAIN_PATH) len_train = len(train_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.BATCH_SIZE, shuffle=True) iter_per_epoch = len(train_loader) train_(config, train_loader, generator, discriminator, d_optimizer, g_optimizer, d_loss, g_loss_bce, g_loss_l1, len_train, iter_per_epoch, writer) if args.action == "test": test_dataset = LABDataset(config, config.TEST_PATH) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False) test(config, test_loader, generator, g_loss_l1)