def __init__(self, args): self.args = args # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader data_kwargs = {'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size} train_dataset = get_segmentation_dataset(args.dataset, split=args.train_split, mode='train', **data_kwargs) val_dataset = get_segmentation_dataset(args.dataset, split='val', mode='val', **data_kwargs) self.train_loader = data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, drop_last=True, shuffle=True) self.val_loader = data.DataLoader(dataset=val_dataset, batch_size=1, drop_last=False, shuffle=False) # create network self.model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, norm_layer=nn.BatchNorm2d).to(args.device) # create criterion self.criterion = MixSoftmaxCrossEntropyLoss(args.aux, args.aux_weight, ignore_label=-1).to(args.device) # for multi-GPU # if torch.cuda.is_available(): # self.model = DataParallelModel(self.model).cuda() # self.criterion = DataParallelCriterion(self.criterion).cuda() # resume checkpoint if needed if args.resume: if os.path.isfile(args.resume): name, ext = os.path.splitext(args.resume) assert ext == '.pkl' or '.pth', 'Sorry only .pth and .pkl files supported.' print('Resuming training, loading {}...'.format(args.resume)) self.model.load_state_dict(torch.load(args.resume, map_location=lambda storage, loc: storage)) # optimizer self.optimizer = torch.optim.SGD(self.model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # lr scheduling self.lr_scheduler = LRScheduler(mode='poly', base_lr=args.lr, nepochs=args.epochs, iters_per_epoch=len(self.train_loader), power=0.9) # evaluation metrics self.metric = SegmentationMetric(train_dataset.num_class) self.best_pred = 0.0
def __init__(self, args): self.args = args self.device = torch.device(args.device) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader val_dataset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) val_sampler = make_data_sampler(val_dataset, False, args.distributed) val_batch_sampler = make_batch_data_sampler(val_sampler, images_per_batch=1) self.val_loader = data.DataLoader(dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=args.workers, pin_memory=True) # create network self.model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, pretrained=True, pretrained_base=False) if args.distributed: self.model = self.model.module self.model.to(self.device) self.metric = SegmentationMetric(val_dataset.num_class)
def __init__(self, args): self.args = args self.device = torch.device(args.device) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader val_dataset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) #val_dataset = get_segmentation_dataset(args.dataset, split='val_test', mode='testval', transform=input_transform) val_sampler = make_data_sampler(val_dataset, False, args.distributed) val_batch_sampler = make_batch_data_sampler(val_sampler, images_per_batch=1) self.val_loader = data.DataLoader(dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=args.workers, pin_memory=True) # create network BatchNorm2d = nn.SyncBatchNorm if args.distributed else nn.BatchNorm2d self.model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, pretrained=True, pretrained_base=False, local_rank=args.local_rank, norm_layer=BatchNorm2d).to(self.device) if args.distributed: self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[args.local_rank], output_device=args.local_rank) self.model.to(self.device) self.metric = SegmentationMetric(val_dataset.num_class)
def demo(config): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # output folder if not os.path.exists(config.outdir): os.makedirs(config.outdir) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) model = get_model(args.model, pretrained=True, root=args.save_folder).to(device) print('Finished loading model!') if args.input_pic != None: image = Image.open(config.input_pic).convert('RGB') images = transform(image).unsqueeze(0).to(device) test(model, images, args.input_pic) else: # image transform test_dataset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=transform) test_sampler = make_data_sampler(test_dataset, True, False) test_batch_sampler = make_batch_data_sampler(test_sampler, images_per_batch=1) test_loader = data.DataLoader(dataset=test_dataset, batch_sampler=test_batch_sampler, num_workers=4, pin_memory=True) for i, (image, target) in enumerate(test_loader): image = image.to(torch.device(device)) test(model, image, ''.join(target))
def __init__(self, config): self.config = config self.run_config = config['run_config'] self.optim_config = config['optim_config'] self.data_config = config['data_config'] self.model_config = config['model_config'] self.device = torch.device(self.run_config["device"]) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader val_dataset = get_segmentation_dataset( self.data_config['dataset_name'], root=self.data_config['dataset_root'], split='test', mode='test', transform=input_transform) val_sampler = make_data_sampler(val_dataset, False, self.run_config['distributed']) val_batch_sampler = make_batch_data_sampler(val_sampler, images_per_batch=10, drop_last=False) self.val_loader = data.DataLoader(dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=4, pin_memory=True) # create network BatchNorm2d = nn.SyncBatchNorm if self.run_config[ 'distributed'] else nn.BatchNorm2d self.model = get_segmentation_model( model=self.model_config['model'], dataset=self.data_config['dataset_name'], backbone=self.model_config['backbone'], aux=self.optim_config['aux'], jpu=self.model_config['jpu'], norm_layer=BatchNorm2d, root=run_config['path']['eval_model_root'], pretrained=run_config['eval_model'], pretrained_base=False, local_rank=self.run_config['local_rank']).to(self.device) if self.run_config['distributed']: self.model = nn.parallel.DistributedDataParallel( self.model, device_ids=[self.run_config['local_rank']], output_device=self.run_config['local_rank']) elif len(run_config['gpu_ids']) > 1: assert torch.cuda.is_available() self.model = nn.DataParallel(self.model) self.model.to(self.device) self.metric = SegmentationMetric(val_dataset.num_class)
def eval(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # output folder outdir = 'test_result' if not os.path.exists(outdir): os.makedirs(outdir) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader test_dataset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) test_loader = data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False) # create network model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, pretrained=True, pretrained_base=False).to(device) print('Finished loading model!') metric = SegmentationMetric(test_dataset.num_class) model.eval() for i, (image, label) in enumerate(test_loader): image = image.to(device) with torch.no_grad(): outputs = model(image) pred = torch.argmax(outputs[0], 1) pred = pred.cpu().data.numpy() label = label.numpy() metric.update(pred, label) pixAcc, mIoU = metric.get() print('Sample %d, validation pixAcc: %.3f%%, mIoU: %.3f%%' % (i + 1, pixAcc * 100, mIoU * 100)) if args.save_result: predict = pred.squeeze(0) mask = get_color_pallete(predict, args.dataset) mask.save(os.path.join(outdir, 'seg_{}.png'.format(i)))
def __init__(self, args): self.args = args self.device = torch.device(args.device) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader data_kwargs = { 'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size } val_dataset = get_segmentation_dataset(args.dataset, split='val', mode='testval', **data_kwargs) val_sampler = make_data_sampler(val_dataset, False, args.distributed) val_batch_sampler = make_batch_data_sampler(val_sampler, images_per_batch=1) self.val_loader = data.DataLoader(dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=args.workers, pin_memory=True) # create network BatchNorm2d = nn.SyncBatchNorm if args.distributed else nn.BatchNorm2d self.model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, norm_layer=BatchNorm2d).to( self.device) # resume checkpoint if needed if args.resume: if os.path.isfile(args.resume): name, ext = os.path.splitext(args.resume) assert ext == '.pkl' or '.pth', 'Sorry only .pth and .pkl files supported.' print('Resuming training, loading {}...'.format(args.resume)) self.model.load_state_dict( torch.load(args.resume, map_location=lambda storage, loc: storage)) ###... # self.model.to(self.device) if args.mutilgpu: self.model = nn.DataParallel(self.model, device_ids=args.gpu_ids) ##.... self.metric = SegmentationMetric(val_dataset.num_class)
def __init__(self, args): self.args = args self.device = torch.device(args.device) # Visualizer self.visualizer = TensorboardVisualizer(args, sys.argv) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader data_kwargs = { 'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size } train_dataset = get_segmentation_dataset(args.dataset, split='train', mode='train', **data_kwargs) val_dataset = get_segmentation_dataset(args.dataset, split='val', mode='val', **data_kwargs) args.iters_per_epoch = len(train_dataset) // (args.num_gpus * args.batch_size) args.max_iters = args.epochs * args.iters_per_epoch train_sampler = make_data_sampler(train_dataset, shuffle=True, distributed=args.distributed) train_batch_sampler = make_batch_data_sampler(train_sampler, args.batch_size, args.max_iters) val_sampler = make_data_sampler(val_dataset, False, args.distributed) val_batch_sampler = make_batch_data_sampler(val_sampler, args.batch_size) self.train_loader = data.DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, pin_memory=True) self.val_loader = data.DataLoader(dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=args.workers, pin_memory=True) # create network BatchNorm2d = nn.SyncBatchNorm if args.distributed else nn.BatchNorm2d self.model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, norm_layer=BatchNorm2d).to( self.device) # jpu=args.jpu # resume checkpoint if needed if args.resume: if os.path.isfile(args.resume): name, ext = os.path.splitext(args.resume) assert ext == '.pkl' or '.pth', 'Sorry only .pth and .pkl files supported.' print('Resuming training, loading {}...'.format(args.resume)) self.model.load_state_dict( torch.load(args.resume, map_location=lambda storage, loc: storage)) # create criterion self.criterion = get_segmentation_loss(args.model, use_ohem=args.use_ohem, aux=args.aux, aux_weight=args.aux_weight, ignore_index=-1).to(self.device) # optimizer, for model just includes pretrained, head and auxlayer params_list = list() if hasattr(self.model, 'pretrained'): params_list.append({ 'params': self.model.pretrained.parameters(), 'lr': args.lr }) if hasattr(self.model, 'exclusive'): for module in self.model.exclusive: params_list.append({ 'params': getattr(self.model, module).parameters(), 'lr': args.lr * 10 }) self.optimizer = torch.optim.SGD(params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # lr scheduling self.lr_scheduler = WarmupPolyLR(self.optimizer, max_iters=args.max_iters, power=0.9, warmup_factor=args.warmup_factor, warmup_iters=args.warmup_iters, warmup_method=args.warmup_method) if args.distributed: self.model = nn.parallel.DistributedDataParallel( self.model, device_ids=[args.local_rank], output_device=args.local_rank) # evaluation metrics self.metric = SegmentationMetric(train_dataset.num_class) self.best_pred = 0.0
def train(args, model, enc=False): # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) data_kwargs = { 'dataset_root': args.datadir, 'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size, 'encode': enc } train_dataset = get_segmentation_dataset('ade20k', split='train', mode='train', **data_kwargs) val_dataset = get_segmentation_dataset('ade20k', split='val', mode='val', **data_kwargs) train_sampler = make_data_sampler(train_dataset, shuffle=True, distributed=False) train_batch_sampler = make_batch_data_sampler(train_sampler, args.batch_size) val_sampler = make_data_sampler(val_dataset, shuffle=False, distributed=False) val_batch_sampler = make_batch_data_sampler(val_sampler, args.batch_size) loader = data.DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=args.num_workers, pin_memory=True) loader_val = data.DataLoader(dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=args.num_workers, pin_memory=True) criterion = CrossEntropyLoss2d() print(type(criterion)) savedir = f'../save/{args.savedir}' if (enc): automated_log_path = savedir + "/automated_log_encoder.txt" modeltxtpath = savedir + "/model_encoder.txt" else: automated_log_path = savedir + "/automated_log.txt" modeltxtpath = savedir + "/model.txt" if (not os.path.exists(automated_log_path) ): #dont add first line if it exists with open(automated_log_path, "a") as myfile: myfile.write( "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate" ) with open(modeltxtpath, "w") as myfile: myfile.write(str(model)) optimizer = Adam(model.parameters(), args.lr, (0.9, 0.999), eps=1e-08, weight_decay=1e-4) start_epoch = 1 best_acc = 0.0 if args.resume: if enc: filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar' else: filenameCheckpoint = savedir + '/checkpoint.pth.tar' assert os.path.exists(filenameCheckpoint) checkpoint = torch.load(filenameCheckpoint) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) best_acc = checkpoint['best_acc'] print("=> Loaded checkpoint at epoch {})".format(checkpoint['epoch'])) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler ## scheduler 1 lambda1 = lambda epoch: pow( (1 - ((epoch - 1) / args.num_epochs)), 0.7) ## scheduler 2 scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) ## scheduler 2 for epoch in range(start_epoch, args.num_epochs + 1): print("----- TRAINING - EPOCH", epoch, "-----", " LR", optimizer.param_groups[0]['lr'], "-----") epoch_loss = [] time_train = [] doIouTrain = args.iouTrain doIouVal = args.iouVal if (doIouTrain): iouEvalTrain = iouEval(args.NUM_CLASSES) usedLr = optimizer.param_groups[0]['lr'] model.train() total_train_step = len(train_dataset) // args.batch_size total_val_step = len(val_dataset) // args.batch_size for step, (images, labels, _) in enumerate(loader): start_time = time.time() imgs_batch = images.shape[0] if imgs_batch != args.batch_size: break if args.cuda: inputs = images.cuda() targets = labels.cuda() outputs = model(inputs, only_encode=enc) optimizer.zero_grad() loss = criterion(outputs, targets) loss.backward() optimizer.step() scheduler.step(epoch) ## scheduler 2 epoch_loss.append(loss.item()) time_train.append(time.time() - start_time) if (doIouTrain): targets = torch.unsqueeze(targets, 1) iouEvalTrain.addBatch( outputs.max(1)[1].unsqueeze(1).data, targets.data) if args.steps_loss > 0 and step % args.steps_loss == 0: average = sum(epoch_loss) / len(epoch_loss) print( f'loss: {average:0.4} (epoch: {epoch}, step: {step}/{total_train_step})', "// Remaining time: %.1f s" % ((total_train_step - step) * sum(time_train) / len(time_train))) average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss) iouTrain = 0 if (doIouTrain): iouTrain, iou_classes = iouEvalTrain.getIoU() print("EPOCH IoU on TRAIN set: ", iouTrain.item() * 100, "%") print("----- VALIDATING - EPOCH", epoch, "-----") model.eval() epoch_loss_val = [] time_val = [] if (doIouVal): iouEvalVal = iouEval(args.NUM_CLASSES) for step, (images, labels, _) in enumerate(loader_val): start_time = time.time() imgs_batch = images.shape[0] if imgs_batch != args.batch_size: break if args.cuda: images = images.cuda() labels = labels.cuda() with torch.no_grad(): inputs = Variable(images) targets = Variable(labels) outputs = model(inputs, only_encode=enc) loss = criterion(outputs, targets) epoch_loss_val.append(loss.item()) time_val.append(time.time() - start_time) #Add batch to calculate TP, FP and FN for iou estimation if (doIouVal): targets = torch.unsqueeze(targets, 1) iouEvalVal.addBatch( outputs.max(1)[1].unsqueeze(1).data, targets.data) if args.steps_loss > 0 and step % args.steps_loss == 0: average = sum(epoch_loss_val) / len(epoch_loss_val) print( f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step}/{total_val_step})', "// Remaining time: %.1f s" % ((total_val_step - step) * sum(time_val) / len(time_val))) average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val) # scheduler.step(average_epoch_loss_val, epoch) ## scheduler 1 # update lr if needed iouVal = 0 if (doIouVal): iouVal, iou_classes = iouEvalVal.getIoU() print("EPOCH IoU on VAL set: ", iouVal.item() * 100, "%") # remember best valIoU and save checkpoint if iouVal == 0: current_acc = -average_epoch_loss_val else: current_acc = iouVal print('best acc:', best_acc, ' current acc:', current_acc.item()) is_best = current_acc > best_acc best_acc = max(current_acc, best_acc) if enc: filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar' filenameBest = savedir + '/model_best_enc.pth.tar' else: filenameCheckpoint = savedir + '/checkpoint.pth.tar' filenameBest = savedir + '/model_best.pth.tar' save_checkpoint( { 'epoch': epoch + 1, 'arch': str(model), 'state_dict': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, filenameCheckpoint, filenameBest) #SAVE MODEL AFTER EPOCH if (enc): filename = f'{savedir}/model_encoder-{epoch:03}.pth' filenamebest = f'{savedir}/model_encoder_best.pth' else: filename = f'{savedir}/model-{epoch:03}.pth' filenamebest = f'{savedir}/model_best.pth' if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0: torch.save(model.state_dict(), filename) print(f'save: {filename} (epoch: {epoch})') if (is_best): torch.save(model.state_dict(), filenamebest) print(f'save: {filenamebest} (epoch: {epoch})') if (not enc): with open(savedir + "/best.txt", "w") as myfile: myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal)) else: with open(savedir + "/best_encoder.txt", "w") as myfile: myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal)) with open(automated_log_path, "a") as myfile: myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" % (epoch, average_epoch_loss_train, average_epoch_loss_val, iouTrain, iouVal, usedLr)) return (model)
def __init__(self, args, logger): self.args = args self.logger = logger if get_rank() == 0: TBWriter.init( os.path.join(args.project_dir, args.task_dir, "tbevents") ) self.device = torch.device(args.device) self.meters = MetricLogger(delimiter=" ") # image transform input_transform = transforms.Compose( [ transforms.ToTensor(), transforms.Normalize( [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] ), ] ) # dataset and dataloader data_kwargs = { "transform": input_transform, "base_size": args.base_size, "crop_size": args.crop_size, "root": args.dataroot, } train_dataset = get_segmentation_dataset( args.dataset, split="train", mode="train", **data_kwargs ) val_dataset = get_segmentation_dataset( args.dataset, split="val", mode="val", **data_kwargs ) args.iters_per_epoch = len(train_dataset) // ( args.num_gpus * args.batch_size ) args.max_iters = args.epochs * args.iters_per_epoch train_sampler = make_data_sampler( train_dataset, shuffle=True, distributed=args.distributed ) train_batch_sampler = make_batch_data_sampler( train_sampler, args.batch_size, args.max_iters ) val_sampler = make_data_sampler(val_dataset, False, args.distributed) val_batch_sampler = make_batch_data_sampler( val_sampler, args.batch_size ) self.train_loader = data.DataLoader( dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, pin_memory=True, ) self.val_loader = data.DataLoader( dataset=val_dataset, batch_sampler=val_batch_sampler, num_workers=args.workers, pin_memory=True, ) # create network BatchNorm2d = nn.SyncBatchNorm if args.distributed else nn.BatchNorm2d self.model = get_segmentation_model( model=args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, jpu=args.jpu, norm_layer=BatchNorm2d, ).to(self.device) # resume checkpoint if needed if args.resume: if os.path.isfile(args.resume): name, ext = os.path.splitext(args.resume) assert ( ext == ".pkl" or ".pth" ), "Sorry only .pth and .pkl files supported." print("Resuming training, loading {}...".format(args.resume)) self.model.load_state_dict( torch.load( args.resume, map_location=lambda storage, loc: storage ) ) # create criterion self.criterion = get_segmentation_loss( args.model, use_ohem=args.use_ohem, aux=args.aux, aux_weight=args.aux_weight, ignore_index=-1, ).to(self.device) # optimizer, for model just includes pretrained, head and auxlayer params_list = list() if hasattr(self.model, "pretrained"): params_list.append( {"params": self.model.pretrained.parameters(), "lr": args.lr} ) if hasattr(self.model, "exclusive"): for module in self.model.exclusive: params_list.append( { "params": getattr(self.model, module).parameters(), "lr": args.lr * args.lr_scale, } ) self.optimizer = torch.optim.SGD( params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) # lr scheduling self.lr_scheduler = get_lr_scheduler(self.optimizer, args) if args.distributed: self.model = nn.parallel.DistributedDataParallel( self.model, device_ids=[args.local_rank], output_device=args.local_rank, ) # evaluation metrics self.metric = SegmentationMetric(train_dataset.num_class) self.best_pred = 0.0