def test(args): # output folder outdir = 'outdir' if not os.path.exists(outdir): os.makedirs(outdir) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) # dataloader loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **loader_kwargs) # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, se_loss=args.se_loss, norm_layer=BatchNorm2d, base_size=args.base_size, crop_size=args.crop_size) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models model.load_state_dict(checkpoint['state_dict']) utils.save_checkpoint({ 'state_dict': checkpoint['state_dict'], }, self.args, is_best, 'DANet101_reduce.pth.tar') print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))
def test_dataset(dataset_name): input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225])]) trainset = get_segmentation_dataset(dataset_name, split='val', mode='train', transform=input_transform) trainloader = data.DataLoader(trainset, batch_size=16, drop_last=True, shuffle=True) tbar = tqdm(trainloader) max_label = -10 for i, (image, target) in enumerate(tbar): tmax = target.max().item() tmin = target.min().item() assert(tmin >= -1) if tmax > max_label: max_label = tmax assert(max_label < trainset.NUM_CLASS) tbar.set_description("Batch %d, max label %d"%(i, max_label))
def __init__(self, args): if args.se_loss: args.checkname = args.checkname + "_se" self.args = args # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225])]) # dataset data_kwargs = {'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size} trainset = get_segmentation_dataset(args.dataset, split='train', mode='train', **data_kwargs) testset = get_segmentation_dataset(args.dataset, split='val', mode ='val', **data_kwargs) # dataloader kwargs = {'num_workers': args.workers, 'pin_memory': False} \ if args.cuda else {} self.trainloader = data.DataLoader(trainset, batch_size=args.batch_size, drop_last=True, shuffle=True, **kwargs) self.valloader = data.DataLoader(testset, batch_size=args.batch_size, drop_last=False, shuffle=False, **kwargs) self.nclass = trainset.num_class # model model = get_segmentation_model(args.model, dataset=args.dataset, backbone = args.backbone, aux = args.aux, se_loss = args.se_loss, norm_layer = BatchNorm2d, base_size=args.base_size, crop_size=args.crop_size) print(model) # count parameter number pytorch_total_params = sum(p.numel() for p in model.parameters()) print("Total number of parameters: %d"%pytorch_total_params) # optimizer using different LR params_list = [{'params': model.pretrained.parameters(), 'lr': args.lr},] if hasattr(model, 'head'): if args.diflr: params_list.append({'params': model.head.parameters(), 'lr': args.lr*10}) else: params_list.append({'params': model.head.parameters(), 'lr': args.lr}) if hasattr(model, 'auxlayer'): if args.diflr: params_list.append({'params': model.auxlayer.parameters(), 'lr': args.lr*10}) else: params_list.append({'params': model.auxlayer.parameters(), 'lr': args.lr}) optimizer = torch.optim.SGD(params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = torch.optim.ASGD(params_list, # lr=args.lr, # weight_decay=args.weight_decay) # criterions self.criterion = SegmentationLosses(se_loss=args.se_loss, aux=args.aux, nclass=self.nclass) self.model, self.optimizer = model, optimizer # using cuda if args.cuda: self.model = DataParallelModel(self.model).cuda() self.criterion = DataParallelCriterion(self.criterion).cuda() # resuming checkpoint if args.resume is not None and len(args.resume)>0: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: # load weights for the same model # self.model.module.load_state_dict(checkpoint['state_dict']) # model and checkpoint have different strucutures pretrained_dict = checkpoint['state_dict'] model_dict = self.model.module.state_dict() for name, param in pretrained_dict.items(): if name not in model_dict: continue if isinstance(param, Parameter): # backwards compatibility for serialized parameters param = param.data model_dict[name].copy_(param) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 # lr scheduler self.scheduler = utils.LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.trainloader),lr_step=args.lr_step) self.best_pred = 0.0
def test(args): # output folder outdir = '%s/msdanet_vis' % (args.dataset) if not os.path.exists(outdir): os.makedirs(outdir) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) else: # set split='test' for test set testset = get_segmentation_dataset(args.dataset, split='val', mode='vis', transform=input_transform) # dataloader loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **loader_kwargs) if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, se_loss=args.se_loss, norm_layer=BatchNorm2d, base_size=args.base_size, crop_size=args.crop_size, multi_grid=args.multi_grid, multi_dilation=args.multi_dilation) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models model.load_state_dict(checkpoint['state_dict'], strict=False) print(model) num_class = testset.num_class evaluator = MultiEvalModule(model, testset.num_class, multi_scales=args.multi_scales).cuda() evaluator.eval() tbar = tqdm(test_data) def eval_batch(image, dst, evaluator, eval_mode): if eval_mode: # evaluation mode on validation set targets = dst outputs = evaluator.parallel_forward(image) batch_inter, batch_union, batch_correct, batch_label = 0, 0, 0, 0 for output, target in zip(outputs, targets): correct, labeled = utils.batch_pix_accuracy( output.data.cpu(), target) inter, union = utils.batch_intersection_union( output.data.cpu(), target, testset.num_class) batch_correct += correct batch_label += labeled batch_inter += inter batch_union += union return batch_correct, batch_label, batch_inter, batch_union else: # Visualize and dump the results im_paths = dst outputs = evaluator.parallel_forward(image) predicts = [ torch.max(output, 1)[1].cpu().numpy() + testset.pred_offset for output in outputs ] for predict, impath in zip(predicts, im_paths): mask = utils.get_mask_pallete(predict, args.dataset) outname = os.path.splitext(impath)[0] + '.png' mask.save(os.path.join(outdir, outname)) # dummy outputs for compatible with eval mode return 0, 0, 0, 0 total_inter, total_union, total_correct, total_label = \ np.int64(0), np.int64(0), np.int64(0), np.int64(0) for i, (image, dst) in enumerate(tbar): if torch_ver == "0.3": image = Variable(image, volatile=True) correct, labeled, inter, union = eval_batch( image, dst, evaluator, args.eval) else: with torch.no_grad(): correct, labeled, inter, union = eval_batch( image, dst, evaluator, args.eval) pixAcc, mIoU, IoU = 0, 0, 0 if args.eval: total_correct += correct.astype('int64') total_label += labeled.astype('int64') total_inter += inter.astype('int64') total_union += union.astype('int64') pixAcc = np.float64(1.0) * total_correct / ( np.spacing(1, dtype=np.float64) + total_label) IoU = np.float64(1.0) * total_inter / ( np.spacing(1, dtype=np.float64) + total_union) mIoU = IoU.mean() tbar.set_description('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) return pixAcc, mIoU, IoU, num_class
def test(args): # output folder outdir = 'outdir' if not os.path.exists(outdir): os.makedirs(outdir) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset data_kwargs = {'root': args.data_root} if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform, **data_kwargs) elif args.test_val: testset = get_segmentation_dataset(args.dataset, split='val', mode='test', transform=input_transform, **data_kwargs) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform, **data_kwargs) # dataloader loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **loader_kwargs) # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) #model.base_size = args.base_size #model.crop_size = args.crop_size else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, se_loss=args.se_loss, norm_layer=SyncBatchNorm, base_size=args.base_size, crop_size=args.crop_size) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) print(model) # scales = [0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \ # [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0] scales = [1.0] evaluator = MultiEvalModule(model, testset.num_class, scales=scales).cuda() evaluator.eval() metric = utils.SegmentationMetric(testset.num_class) tbar = tqdm(test_data) for i, (image, dst) in enumerate(tbar): if args.eval: with torch.no_grad(): predicts = evaluator.parallel_forward(image) metric.update(dst, predicts) pixAcc, mIoU = metric.get() tbar.set_description('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) else: with torch.no_grad(): outputs = evaluator.parallel_forward(image) predicts = [ testset.make_pred(torch.max(output, 1)[1].cpu().numpy()) for output in outputs ] for predict, impath in zip(predicts, dst): mask = utils.get_mask_pallete(predict, args.dataset) outname = os.path.splitext(impath)[0] + '.png' mask.save(os.path.join(outdir, outname))
def test(args): # output folder outdir = 'outdir' if not os.path.exists(outdir): os.makedirs(outdir) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='val', transform=input_transform, return_file=True) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) # dataloader loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **loader_kwargs) # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, se_loss=args.se_loss, norm_layer=BatchNorm2d, base_size=args.base_size, crop_size=args.crop_size) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models pretrained_dict = checkpoint['state_dict'] model_dict = model.state_dict() for name, param in pretrained_dict.items(): if name not in model_dict: continue if isinstance(param, Parameter): # backwards compatibility for serialized parameters param = param.data model_dict[name].copy_(param) #model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) print(model) # count parameter number pytorch_total_params = sum(p.numel() for p in model.parameters()) print("Total number of parameters: %d" % pytorch_total_params) evaluator = MultiEvalModule(model, testset.num_class).cuda() evaluator.eval() tbar = tqdm(test_data) def eval_batch(image, dst, im_paths, evaluator, eval_mode): if eval_mode: # evaluation mode on validation set targets = dst outputs = evaluator.parallel_forward(image) batch_inter, batch_union, batch_correct, batch_label = 0, 0, 0, 0 for output, target in zip(outputs, targets): correct, labeled = utils.batch_pix_accuracy( output.data.cpu(), target) inter, union = utils.batch_intersection_union( output.data.cpu(), target, testset.num_class) batch_correct += correct batch_label += labeled batch_inter += inter batch_union += union # save outputs predicts = [ torch.max(output, 1)[1].cpu().numpy() # + testset.pred_offset for output in outputs ] for predict, impath, target in zip(predicts, im_paths, targets): mask = utils.get_mask_pallete(predict, args.dataset) outname = os.path.splitext(impath)[0] + '.png' mask.save(os.path.join(outdir, outname)) # save ground truth into png format target = target.data.cpu().numpy() target = utils.get_mask_pallete(target, args.dataset) outname = os.path.splitext(impath)[0] + '_gtruth.png' target.save(os.path.join(outdir, outname)) return batch_correct, batch_label, batch_inter, batch_union else: # test mode, dump the results im_paths = dst outputs = evaluator.parallel_forward(image) predicts = [ torch.max(output, 1)[1].cpu().numpy() # + testset.pred_offset for output in outputs ] for predict, impath in zip(predicts, im_paths): mask = utils.get_mask_pallete(predict, args.dataset) outname = os.path.splitext(impath)[0] + '.png' mask.save(os.path.join(outdir, outname)) # dummy outputs for compatible with eval mode return 0, 0, 0, 0 total_inter, total_union, total_correct, total_label = \ np.int64(0), np.int64(0), np.int64(0), np.int64(0) for i, (image, dst, img_paths) in enumerate(tbar): if torch_ver == "0.3": image = Variable(image, volatile=True) correct, labeled, inter, union = eval_batch( image, dst, img_paths, evaluator, args.eval) else: with torch.no_grad(): correct, labeled, inter, union = eval_batch( image, dst, img_paths, evaluator, args.eval) if args.eval: total_correct += correct.astype('int64') total_label += labeled.astype('int64') total_inter += inter.astype('int64') total_union += union.astype('int64') pixAcc = np.float64(1.0) * total_correct / ( np.spacing(1, dtype=np.float64) + total_label) IoU = np.float64(1.0) * total_inter / ( np.spacing(1, dtype=np.float64) + total_union) mIoU = IoU.mean() tbar.set_description('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU))
def __init__(self, args): self.args = args args.log_name = str(args.checkname) args.log_root = os.path.join(args.dataset, args.log_root) # dataset/log/ self.logger = utils.create_logger(args.log_root, args.log_name) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225])]) # dataset data_kwargs = {'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size, 'logger': self.logger, 'scale': args.scale} trainset = get_segmentation_dataset(args.dataset, split='trainval', mode='trainval', **data_kwargs) testset = get_segmentation_dataset(args.dataset, split='val', mode='val', # crop fixed size as model input **data_kwargs) # dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} self.trainloader = data.DataLoader(trainset, batch_size=args.batch_size, drop_last=True, shuffle=True, **kwargs) self.valloader = data.DataLoader(testset, batch_size=args.batch_size, drop_last=False, shuffle=False, **kwargs) self.nclass = trainset.num_class # model model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, norm_layer=BatchNorm2d, base_size=args.base_size, crop_size=args.crop_size, ) #print(model) self.logger.info(model) # optimizer using different LR params_list = [{'params': model.pretrained.parameters(), 'lr': args.lr},] if hasattr(model, 'head'): print("this model has object, head") params_list.append({'params': model.head.parameters(), 'lr': args.lr*10}) optimizer = torch.optim.SGD(params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) self.criterion = SegmentationLosses(nclass=self.nclass) self.model, self.optimizer = model, optimizer # using cuda if args.cuda: self.model = DataParallelModel(self.model).cuda() self.criterion = DataParallelCriterion(self.criterion).cuda() # resuming checkpoint if args.resume: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] self.logger.info("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) # lr scheduler self.scheduler = utils.LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.trainloader), logger=self.logger, lr_step=args.lr_step) self.best_pred = 0.0
def main(): config = vars(parse_args()) np.random.seed(config['random_seed']) torch.random.manual_seed(config['random_seed']) torch.cuda.manual_seed(config['random_seed']) if config['name'] is None: config['name'] = '%s_%s' % (config['arch'], config['dataset']) cur_time = time.strftime("%Y%m%d_%H%M%S", time.localtime())[2:] #TODO:cur_time想想取什么目录名 exp_dir = os.path.join(sys.path[0], 'exps',config['name'], cur_time + "_" + \ config["optimizer"] + "_lr_" + '{:0.0e}_'.format(config["lr"])+"wd_"+ '{:0.0e}'.format(config["weight_decay"]) + '_fa_' + str(config['fuse_attention'])) print('-' * 20) for key in config: print('%s:%s' %(key, config[key])) print('-' * 20) # gpu_id == None,说明使用cpu if config['gpu id'] is not None and config['gpu id'] >=0: device = torch.device("cuda") os.environ["CUDA_VISIBLE_DEVICES"] = str(config['gpu id']) print(os.environ["CUDA_VISIBLE_DEVICES"]) else: device = torch.device("cpu") #好像是可以加速 cudnn.benchmark = True #读取配置 #读取数据集,现在只有VOC input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225])]) # dataset data_kwargs = {'transform': input_transform, 'root':config['data_dir'], 'base_size': config["base_size"], 'crop_size': config["crop_size"], 'logger': None, 'scale': config["scale"]} trainset = get_segmentation_dataset(config["dataset"], split='train', mode='train', **data_kwargs) testset = get_segmentation_dataset(config["dataset"], split='val', mode='val', **data_kwargs) # dataloader kwargs = {'num_workers': config["num_workers"], 'pin_memory': True} \ if config['gpu id'] >= 0 else {} train_iter = data.DataLoader(trainset, batch_size=config["batch_size"], drop_last=True, shuffle=True, **kwargs) val_iter = data.DataLoader(testset, batch_size=config["batch_size"], drop_last=False, shuffle=False, **kwargs) num_classes = trainset.num_class #累计梯度设置,1就是不累积 #TODO:没考虑bn层的表现 accumulation_steps = 1 #create model model_kwargs = {'fuse_attention':config['fuse_attention']} print("=> creating model %s" % config['arch']) model = archs.__dict__[config['arch']](num_classes=num_classes, input_channels=config['input_channels'], **model_kwargs) model = model.to(device) print("training on", device) params = filter(lambda p: p.requires_grad, model.parameters()) if config['optimizer'] == "Adam": optimizer = torch.optim.Adam( params, lr=config['lr'], weight_decay=config['weight_decay'] ) elif config['optimizer'] == 'SGD': optimizer = torch.optim.SGD( params, lr=config['lr'], momentum=config['momentum'], weight_decay=config['weight_decay'] ) else: raise NotImplementedError #用于梯度累计的计数 iter_cnt = 0 #loss函数 criterion = nn.CrossEntropyLoss() #学习率策略 scheduler = LR_Scheduler(config['scheduler'], base_lr = config['lr'], num_epochs=config['epochs'], \ iters_per_epoch=len(train_iter)) # if config['scheduler'] == 'CosineAnnealingLR': # scheduler = lr_scheduler.CosineAnnealingLR( # optimizer, T_max=config['epochs'], eta_min=config['min_lr']) # elif config['scheduler'] == 'ReduceLROnPlateau': # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=config['lr_gamma'], patience=config['patience'], # verbose=True, min_lr=config['min_lr']) # elif config['scheduler'] == 'MultiStepLR': # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[int(e) for e in config['milestones'].split(',')], gamma=config['lr_gamma']) # elif config['scheduler'] == 'StepLR': # scheduler = lr_scheduler.StepLR(optimizer, step_size=config['step_size'], gamma=config['lr_gamma']) # elif config['scheduler'] == 'ConstantLR': # scheduler = None # else: # raise NotImplementedError #创建实验结果保存目录 writer = SummaryWriter(exp_dir) with open(os.path.join(exp_dir,'config.yml'), 'w') as f: yaml.dump(config, f) X, label = next(iter(train_iter)) writer.add_graph(model, X.to(device)) best_iou = 0 epoch_begin = 0 if config['checkpoint_PATH'] is None: #在训练开始前看看输出是什么 epoch = -1 predict(model, exp_dir, epoch, config, device, writer) val_log = validate(config, val_iter, model, criterion, device) writer.add_scalars('0_Loss', {"train":val_log['loss'], "val":val_log['loss']}, epoch) writer.add_scalars('1_mIoU', {"train":val_log['iou'], "val":val_log['iou']}, epoch) writer.add_scalar("1_mIoU/best_iou", val_log['iou'], epoch) writer.add_scalars('2_Acc_cls', {"train":val_log['acc_cls'], "val":val_log['acc_cls']}, epoch) writer.add_scalars('3_Acc', {"train":val_log['acc'], "val":val_log['acc']}, epoch) else: if config['only_read_model']: model, _, _, _, _ = load_checkpoint(model, config['checkpoint_PATH']) else: model, epoch_begin, best_iou, optimizer, scheduler = load_checkpoint(model, config['checkpoint_PATH'], epoch_begin, best_iou, optimizer, scheduler) #下面正式开始训练 for epoch in range(epoch_begin, config['epochs']): print('Epoch [%d/%d]' % (epoch, config['epochs'])) start_time = time.time() # train for one epoch train_log = train(config, train_iter, model, criterion, optimizer, scheduler,best_iou,epoch, device) val_log = validate(config, val_iter, model, criterion, device) # if config['scheduler'] == 'ReduceLROnPlateau': # scheduler.step(val_log['loss']) # elif config['scheduler'] == 'ConstantLR': # pass # else: # scheduler.step() predict(model, exp_dir, epoch, config, device, writer) print('loss %.4f - iou %.4f - val_loss %.4f - val_iou %.4f' % (train_log['loss'], train_log['iou'], val_log['loss'], val_log['iou'])) if val_log['iou'] >best_iou: best_iou = val_log['iou'] # torch.save({'epoch':epoch, 'state_dict':model.state_dict(), 'best_iou':best_iou, # 'optimizer':optimizer.state_dict(), 'scheduler':scheduler.state_dict()}, os.path.join(exp_dir,'model.pth')) torch.save({'epoch':epoch, 'state_dict':model.state_dict(), 'best_iou':best_iou, 'optimizer':optimizer.state_dict()}, os.path.join(exp_dir,'model.pth')) print("=> saved best model") # writer.add_scalar("Loss/train", train_log['loss'], epoch) # writer.add_scalar("Loss/val", val_log['loss'], epoch) #writer.add_scalar("mIoU/train", train_log['iou'], epoch) #writer.add_scalar("mIoU/val", val_log['iou'], epoch) writer.add_scalars('0_Loss', {"train":train_log['loss'], "val":val_log['loss']}, epoch) writer.add_scalar('0_Loss/LR', optimizer.param_groups[0]['lr'], epoch) writer.add_scalars('1_mIoU', {"train":train_log['iou'], "val":val_log['iou']}, epoch) writer.add_scalar("1_mIoU/best_iou", best_iou, epoch) writer.add_scalars('2_Acc_cls', {"train":train_log['acc_cls'], "val":val_log['acc_cls']}, epoch) writer.add_scalars('3_Acc', {"train":train_log['acc'], "val":val_log['acc']}, epoch) # writer.add_scalar("Acc/train", train_log['acc'], epoch) # writer.add_scalar("Acc/val", val_log['acc'], epoch) # writer.add_scalar("Acc_cls/train", train_log['acc_cls'], epoch) # writer.add_scalar("Acc_cls/val", val_log['acc_cls'], epoch) torch.cuda.empty_cache()
def test(args): # output folder outdir = args.save_folder if not os.path.exists(outdir): os.makedirs(outdir) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset testset = get_segmentation_dataset(args.dataset, split=args.split, mode=args.mode, transform=input_transform) # dataloader loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **loader_kwargs) # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, dilated=args.dilated, multi_grid=args.multi_grid, stride=args.stride, lateral=args.lateral, jpu=args.jpu, aux=args.aux, se_loss=args.se_loss, norm_layer=BatchNorm, base_size=args.base_size, crop_size=args.crop_size) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models model.load_state_dict(checkpoint['state_dict'], strict=False) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # print(model) scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \ [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] if not args.ms: scales = [1.0] evaluator = MultiEvalModule(model, testset.num_class, scales=scales, flip=args.ms).cuda() evaluator.eval() metric = utils.SegmentationMetric(testset.num_class) tbar = tqdm(test_data) total_inter, total_union, total_correct, total_label, all_label = 0, 0, 0, 0, 0 # for i, (image, dst) in enumerate(tbar): # # print(dst) # with torch.no_grad(): # outputs = evaluator.parallel_forward(image)[0] # correct, labeled = batch_pix_accuracy(outputs, dst[0]) # total_correct += correct # all_label += labeled # img_pixAcc = 1.0 * correct / (np.spacing(1) + labeled) # inter, union, area_pred, area_lab = batch_intersection_union(outputs, dst[0], testset.num_class) # total_label += area_lab # total_inter += inter # total_union += union # class_pixAcc = 1.0 * inter / (np.spacing(1) + area_lab) # class_IoU = 1.0 * inter / (np.spacing(1) + union) # class_mIoU = class_IoU.mean() # print("img pixAcc:", img_pixAcc) # print("img Classes pixAcc:", class_pixAcc) # print("img Classes IoU:", class_IoU) # total_pixAcc = 1.0 * total_correct / (np.spacing(1) + all_label) # pixAcc = 1.0 * total_inter / (np.spacing(1) + total_label) # IoU = 1.0 * total_inter / (np.spacing(1) + total_union) # mIoU = IoU.mean() # print("set pixAcc:", pixAcc) # print("set Classes pixAcc:", pixAcc) # print("set Classes IoU:", IoU) # print("set mean IoU:", mIoU) for i, (image, dst) in enumerate(tbar): if 'val' in args.mode: with torch.no_grad(): predicts = evaluator.parallel_forward(image) # metric.update(dst[0], predicts[0]) # pixAcc, mIoU = metric.get() # tbar.set_description( 'pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) else: with torch.no_grad(): outputs = evaluator.parallel_forward(image)
def test(args): # output folder outdir = 'outdir' if not os.path.exists(outdir): os.makedirs(outdir) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='val', transform=input_transform) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) # dataloader loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **loader_kwargs) # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, se_loss=args.se_loss, norm_layer=BatchNorm2d, base_size=args.base_size, crop_size=args.crop_size) # resuming checkpoint # if args.resume is None or not os.path.isfile(args.resume): # raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) # checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models # pretrained_dict = checkpoint['state_dict'] # model_dict = model.state_dict() #for name, param in pretrained_dict.items(): # if name not in model_dict: # continue # if isinstance(param, Parameter): # backwards compatibility for serialized parameters # param = param.data # model_dict[name].copy_(param) #model.load_state_dict(checkpoint['state_dict']) # print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) print(model) model = model.cuda() model.eval() run_time = list() for i in range(0, 100): input = torch.randn(1, 3, 512, 512).cuda() # ensure that context initialization and normal_() operations # finish before you start measuring time torch.cuda.synchronize() torch.cuda.synchronize() start = time.perf_counter() with torch.no_grad(): output = model(input) torch.cuda.synchronize() # wait for mm to finish end = time.perf_counter() print(end - start) run_time.append(end - start) run_time.pop(0) print('Mean running time is ', np.mean(run_time))
def __init__(self, args): self.args = args # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset data_kwargs = { 'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size } trainset = get_segmentation_dataset(args.dataset, split=args.train_split, mode='train', **data_kwargs) testset = get_segmentation_dataset(args.dataset, split='val', mode='val', **data_kwargs) # dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} self.trainloader = data.DataLoader(trainset, batch_size=args.batch_size, drop_last=False, shuffle=True, **kwargs) self.valloader = data.DataLoader(testset, batch_size=args.batch_size, drop_last=False, shuffle=False, **kwargs) self.nclass = trainset.num_class # model model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, dilated=args.dilated, multi_grid=args.multi_grid, stride=args.stride, lateral=args.lateral, jpu=args.jpu, aux=args.aux, se_loss=args.se_loss, norm_layer=SyncBatchNorm, base_size=args.base_size, crop_size=args.crop_size) # print(model) # optimizer using different LR params_list = [ { 'params': model.pretrained.parameters(), 'lr': args.lr }, ] if hasattr(model, 'jpu') and model.jpu: params_list.append({ 'params': model.jpu.parameters(), 'lr': args.lr * 10 }) if hasattr(model, 'head'): params_list.append({ 'params': model.head.parameters(), 'lr': args.lr * 10 }) if hasattr(model, 'auxlayer'): params_list.append({ 'params': model.auxlayer.parameters(), 'lr': args.lr * 10 }) optimizer = torch.optim.SGD(params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) class_balance_weight = 'None' if args.dataset == "pcontext60": class_balance_weight = torch.tensor([ 1.3225e-01, 2.0757e+00, 1.8146e+01, 5.5052e+00, 2.2060e+00, 2.8054e+01, 2.0566e+00, 1.8598e+00, 2.4027e+00, 9.3435e+00, 3.5990e+00, 2.7487e-01, 1.4216e+00, 2.4986e+00, 7.7258e-01, 4.9020e-01, 2.9067e+00, 1.2197e+00, 2.2744e+00, 2.0444e+01, 3.0057e+00, 1.8167e+01, 3.7405e+00, 5.6749e-01, 3.2631e+00, 1.5007e+00, 5.5519e-01, 1.0056e+01, 1.8952e+01, 2.6792e-01, 2.7479e-01, 1.8309e+00, 2.0428e+01, 1.4788e+01, 1.4908e+00, 1.9113e+00, 2.6166e+02, 2.3233e-01, 1.9096e+01, 6.7025e+00, 2.8756e+00, 6.8804e-01, 4.4140e+00, 2.5621e+00, 4.4409e+00, 4.3821e+00, 1.3774e+01, 1.9803e-01, 3.6944e+00, 1.0397e+00, 2.0601e+00, 5.5811e+00, 1.3242e+00, 3.0088e-01, 1.7344e+01, 2.1569e+00, 2.7216e-01, 5.8731e-01, 1.9956e+00, 4.4004e+00 ]) elif args.dataset == "ade20k": class_balance_weight = torch.tensor([ 0.0772, 0.0431, 0.0631, 0.0766, 0.1095, 0.1399, 0.1502, 0.1702, 0.2958, 0.3400, 0.3738, 0.3749, 0.4059, 0.4266, 0.4524, 0.5725, 0.6145, 0.6240, 0.6709, 0.6517, 0.6591, 0.6818, 0.9203, 0.9965, 1.0272, 1.0967, 1.1202, 1.2354, 1.2900, 1.5038, 1.5160, 1.5172, 1.5036, 2.0746, 2.1426, 2.3159, 2.2792, 2.6468, 2.8038, 2.8777, 2.9525, 2.9051, 3.1050, 3.1785, 3.3533, 3.5300, 3.6120, 3.7006, 3.6790, 3.8057, 3.7604, 3.8043, 3.6610, 3.8268, 4.0644, 4.2698, 4.0163, 4.0272, 4.1626, 4.3702, 4.3144, 4.3612, 4.4389, 4.5612, 5.1537, 4.7653, 4.8421, 4.6813, 5.1037, 5.0729, 5.2657, 5.6153, 5.8240, 5.5360, 5.6373, 6.6972, 6.4561, 6.9555, 7.9239, 7.3265, 7.7501, 7.7900, 8.0528, 8.5415, 8.1316, 8.6557, 9.0550, 9.0081, 9.3262, 9.1391, 9.7237, 9.3775, 9.4592, 9.7883, 10.6705, 10.2113, 10.5845, 10.9667, 10.8754, 10.8274, 11.6427, 11.0687, 10.8417, 11.0287, 12.2030, 12.8830, 12.5082, 13.0703, 13.8410, 12.3264, 12.9048, 12.9664, 12.3523, 13.9830, 13.8105, 14.0345, 15.0054, 13.9801, 14.1048, 13.9025, 13.6179, 17.0577, 15.8351, 17.7102, 17.3153, 19.4640, 17.7629, 19.9093, 16.9529, 19.3016, 17.6671, 19.4525, 20.0794, 18.3574, 19.1219, 19.5089, 19.2417, 20.2534, 20.0332, 21.7496, 21.5427, 20.3008, 21.1942, 22.7051, 23.3359, 22.4300, 20.9934, 26.9073, 31.7362, 30.0784 ]) elif args.dataset == "cocostuff": class_balance_weight = torch.tensor([ 4.8557e-02, 6.4709e-02, 3.9255e+00, 9.4797e-01, 1.2703e+00, 1.4151e+00, 7.9733e-01, 8.4903e-01, 1.0751e+00, 2.4001e+00, 8.9736e+00, 5.3036e+00, 6.0410e+00, 9.3285e+00, 1.5952e+00, 3.6090e+00, 9.8772e-01, 1.2319e+00, 1.9194e+00, 2.7624e+00, 2.0548e+00, 1.2058e+00, 3.6424e+00, 2.0789e+00, 1.7851e+00, 6.7138e+00, 2.1315e+00, 6.9813e+00, 1.2679e+02, 2.0357e+00, 2.2933e+01, 2.3198e+01, 1.7439e+01, 4.1294e+01, 7.8678e+00, 4.3444e+01, 6.7543e+01, 1.0066e+01, 6.7520e+00, 1.3174e+01, 3.3499e+00, 6.9737e+00, 2.1482e+00, 1.9428e+01, 1.3240e+01, 1.9218e+01, 7.6836e-01, 2.6041e+00, 6.1822e+00, 1.4070e+00, 4.4074e+00, 5.7792e+00, 1.0321e+01, 4.9922e+00, 6.7408e-01, 3.1554e+00, 1.5832e+00, 8.9685e-01, 1.1686e+00, 2.6487e+00, 6.5354e-01, 2.3801e-01, 1.9536e+00, 1.5862e+00, 1.7797e+00, 2.7385e+01, 1.2419e+01, 3.9287e+00, 7.8897e+00, 7.5737e+00, 1.9758e+00, 8.1962e+01, 3.6922e+00, 2.0039e+00, 2.7333e+00, 5.4717e+00, 3.9048e+00, 1.9184e+01, 2.2689e+00, 2.6091e+02, 4.7366e+01, 2.3844e+00, 8.3310e+00, 1.4857e+01, 6.5076e+00, 2.0854e-01, 1.0425e+00, 1.7386e+00, 1.1973e+01, 5.2862e+00, 1.7341e+00, 8.6124e-01, 9.3702e+00, 2.8545e+00, 6.0123e+00, 1.7560e-01, 1.8128e+00, 1.3784e+00, 1.3699e+00, 2.3728e+00, 6.2819e-01, 1.3097e+00, 4.7892e-01, 1.0268e+01, 1.2307e+00, 5.5662e+00, 1.2867e+00, 1.2745e+00, 4.7505e+00, 8.4029e+00, 1.8679e+00, 1.0519e+01, 1.1240e+00, 1.4975e-01, 2.3146e+00, 4.1265e-01, 2.5896e+00, 1.4537e+00, 4.5575e+00, 7.8143e+00, 1.4603e+01, 2.8812e+00, 1.8868e+00, 7.8131e+01, 1.9323e+00, 7.4980e+00, 1.2446e+01, 2.1856e+00, 3.0973e+00, 4.1270e-01, 4.9016e+01, 7.1001e-01, 7.4035e+00, 2.3395e+00, 2.9207e-01, 2.4156e+00, 3.3211e+00, 2.1300e+00, 2.4533e-01, 1.7081e+00, 4.6621e+00, 2.9199e+00, 1.0407e+01, 7.6207e-01, 2.7806e-01, 3.7711e+00, 1.1852e-01, 8.8280e+00, 3.1700e-01, 6.3765e+01, 6.6032e+00, 5.2177e+00, 4.3596e+00, 6.2965e-01, 1.0207e+00, 1.1731e+01, 2.3935e+00, 9.2767e+00, 1.1023e-01, 3.6947e+00, 1.3943e+00, 2.3407e+00, 1.2112e-01, 2.8518e+00, 2.8195e+00, 1.0078e+00, 1.6614e+00, 6.5307e-01, 1.9070e+01, 2.7231e+00, 6.0769e-01 ]) # criterions self.criterion = SegmentationLosses(se_loss=args.se_loss, aux=args.aux, nclass=self.nclass, se_weight=args.se_weight, aux_weight=args.aux_weight, weight=class_balance_weight) self.model, self.optimizer = model, optimizer # using cuda if args.cuda: self.model = DataParallelModel(self.model).cuda() self.criterion = DataParallelCriterion(self.criterion).cuda() # resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 # lr scheduler self.scheduler = utils.LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.trainloader))
def test(args): directory = "runs/val_summary/%s/%s/%s/" % (args.dataset, args.model, args.resume) if not os.path.exists(directory): os.makedirs(directory) writer = SummaryWriter(directory) # output folder outdir = 'outdir' if not os.path.exists(outdir): os.makedirs(outdir) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) elif args.test_val: testset = get_segmentation_dataset(args.dataset, split='val', mode='test', transform=input_transform) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) # dataloader loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **loader_kwargs) Norm_method = torch.nn.BatchNorm2d # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) #model.base_size = args.base_size #model.crop_size = args.crop_size else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, multi_grid=args.multi_grid, num_center=args.num_center, norm_layer=Norm_method, root=args.backbone_path, base_size=args.base_size, crop_size=args.crop_size) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models #model.module.load_state_dict(checkpoint['state_dict']) old_state_dict = checkpoint['state_dict'] new_state_dict = dict() for k, v in old_state_dict.items(): if k.startswith('module.'): #new_state_dict[k[len('module.'):]] = old_state_dict[k] new_state_dict[k[len('model.module.'):]] = old_state_dict[k] #new_state_dict[k] = old_state_dict[k] else: new_state_dict[k] = old_state_dict[k] #new_k = 'module.' + k #new_state_dict[new_k] = old_state_dict[k] model.load_state_dict(new_state_dict) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) print(model) scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \ [0.75, 1.0, 1.25, 1.5, 1.75, 2.0] if args.dataset == 'ade20k': scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0] if not args.ms: scales = [1.0] if args.dataset == 'ade20k': evaluator = MultiEvalModule2(model, testset.num_class, scales=scales, flip=args.ms).cuda() else: evaluator = MultiEvalModule(model, testset.num_class, scales=scales, flip=args.ms).cuda() evaluator.eval() metric = utils.SegmentationMetric(testset.num_class) tbar = tqdm(test_data) for i, (image, dst) in enumerate(tbar): if args.eval: with torch.no_grad(): predicts = evaluator.parallel_forward(image) metric.update(dst, predicts) pixAcc, mIoU = metric.get() tbar.set_description('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) writer.add_scalar('pixAcc', pixAcc, i) writer.add_scalar('mIoU', mIoU, i) else: with torch.no_grad(): outputs = evaluator.parallel_forward(image) predicts = [ testset.make_pred(torch.max(output, 1)[1].cpu().numpy()) for output in outputs ] for predict, impath in zip(predicts, dst): mask = utils.get_mask_pallete(predict, args.dataset) outname = os.path.splitext(impath)[0] + '.png' mask.save(os.path.join(outdir, outname)) writer.close()
def __init__(self, args): self.args = args self.logger = utils.create_logger(self.args.exp_dir, "log") for k, v in vars(self.args).items(): self.logger.info((k, v)) if self.args.cuda: device = torch.device("cuda") self.logger.info("training on gpu:" + self.args.gpu_id) else: self.logger.info("training on cpu") device = torch.device("cpu") self.device = device #指定随机数 set_seed(args.random_seed) # args.log_name = str(args.checkname) #好像是可以加速 cudnn.benchmark = True #读取数据集,现在只有VOC self.logger.info('training on dataset ' + self.args.dataset) input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]) ]) data_kwargs = { 'transform': input_transform, 'root': self.args.data_dir, 'base_size': self.args.base_size, 'crop_size': self.args.crop_size, 'logger': self.logger, 'scale': self.args.scale } trainset = get_segmentation_dataset(self.args.dataset, split='train', mode='train', **data_kwargs) testset = get_segmentation_dataset(self.args.dataset, split='val', mode='val', **data_kwargs) # dataloader kwargs = {'num_workers': self.args.num_workers, 'pin_memory': True} \ if self.args.cuda else {} self.train_iter = data.DataLoader(trainset, batch_size=self.args.batch_size, drop_last=True, shuffle=True, **kwargs) self.val_iter = data.DataLoader(testset, batch_size=self.args.batch_size, drop_last=False, shuffle=False, **kwargs) self.num_classes = trainset.num_classes self.input_channels = trainset.input_channels #create model kwargs = {'fuse_attention': self.args.fuse_attention} self.model = get_segmentation_model(args.arch, dataset=args.dataset, backbone=args.backbone) print("=> creating model %s" % self.args.arch) # self.model = archs.__dict__[self.args.arch](num_classes=self.num_classes, # input_channels=self.input_channels, **model_kwargs) self.model = self.model.to(device) # self.logger.info(self.model) self.optimizer = None params = filter(lambda p: p.requires_grad, self.model.parameters()) if self.args.optimizer == "Adam": self.optimizer = torch.optim.Adam( params, lr=self.args.lr, weight_decay=self.args.weight_decay) elif self.args.optimizer == 'SGD': self.optimizer = torch.optim.SGD( params, lr=self.args.lr, momentum=self.args.momentum, weight_decay=self.args.weight_decay) else: raise NotImplementedError #loss函数 self.criterion = nn.CrossEntropyLoss( ignore_index=trainset.IGNORE_INDEX) #语义分割评价指标 self.metric = SegmentationMetric(self.num_classes) #学习率策略 self.scheduler = LR_Scheduler(self.args.scheduler, base_lr = self.args.lr, num_epochs=self.args.epochs, \ iters_per_epoch=len(self.train_iter)) #创建实验结果保存目录 self.writer = SummaryWriter(args.exp_dir) # with open(os.path.join(args.exp_dir,'config.yml'), 'w') as f: # yaml.dump(config, f) #用tensoboard看一下模型结构 X, label = next(iter(self.train_iter)) self.writer.add_graph(self.model, X.to(device)) self.epoch_begin = 0 self.best_iou = 0.0 #在训练开始前看看输出是什么 val_log = self.validate(epoch=-1, is_visualize_segmentation=True) self.write_into_tensorboard(val_log, val_log, epoch=-1) #checkpoint_PATH if self.args.checkpoint_PATH is not None: if self.args.only_read_model: model, _, _, _, _ = load_checkpoint(model, self.args.checkpoint_PATH) else: model, self.epoch_begin, self.best_iou, self.optimizer = load_checkpoint( model, self.args.checkpoint_PATH, epoch_begin, best_iou, optimizer, scheduler)
def test(args): # output folder outdir = args.save_folder if not os.path.exists(outdir): os.makedirs(outdir) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset testset = get_segmentation_dataset(args.dataset, split=args.split, mode=args.mode, transform=input_transform) # dataloader loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **loader_kwargs) # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, dilated=args.dilated, multi_grid=args.multi_grid, stride=args.stride, lateral=args.lateral, jpu=args.jpu, aux=args.aux, se_loss=args.se_loss, norm_layer=BatchNorm, base_size=args.base_size, crop_size=args.crop_size) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models model.load_state_dict(checkpoint['state_dict'], strict=False) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # print(model) scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \ [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] if not args.ms: scales = [1.0] evaluator = MultiEvalModule(model, testset.num_class, scales=scales, flip=args.ms).cuda() evaluator.eval() tbar = tqdm(test_data) total_inter, total_union, total_correct, total_label = 0, 0, 0, 0 result = [] for i, (image, dst) in enumerate(tbar): # print(dst) with torch.no_grad(): if i > 20: st = time.time() outputs = evaluator.forward(image[0].unsqueeze(0).cuda()) if i > 20: result.append(1 / (time.time() - st)) print(np.mean(result), np.std(result)) if 'val' in args.mode: # compute image IoU metric inter, union, area_pred, area_lab = batch_intersection_union( outputs, dst[0], testset.num_class) total_label += area_lab total_inter += inter total_union += union class_pixAcc = 1.0 * inter / (np.spacing(1) + area_lab) class_IoU = 1.0 * inter / (np.spacing(1) + union) print("img Classes pixAcc:", class_pixAcc) print("img Classes IoU:", class_IoU) else: # save prediction results predict = testset.make_pred( torch.max(output, 1)[1].cpu().numpy()) mask = utils.get_mask_pallete(predict, args.dataset) outname = os.path.splitext(dst[0])[0] + '.png' mask.save(os.path.join(outdir, outname)) if 'val' in args.mode: # compute set IoU metric pixAcc = 1.0 * total_inter / (np.spacing(1) + total_label) IoU = 1.0 * total_inter / (np.spacing(1) + total_union) mIoU = IoU.mean() print("set Classes pixAcc:", pixAcc) print("set Classes IoU:", IoU) print("set mean IoU:", mIoU)
def test(args): # output folder outdir = 'outdir' if not os.path.exists(outdir): os.makedirs(outdir) # data transforms if not os.path.exists(args.save): os.makedirs(args.save) input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225])]) # dataset if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) # dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **kwargs) # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone = args.backbone, aux = args.aux, se_loss = args.se_loss, norm_layer = BatchNorm2d) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) print(model) evaluator = MultiEvalModuleCityscapes(model, testset.num_class).cuda() evaluator.eval() tbar = tqdm(test_data) interp = nn.Upsample(size=(1024, 2048), mode='bilinear') def eval_batch(image, dst, evaluator, eval_mode, hist, names): if eval_mode: # evaluation mode on validation set targets = dst outputs = evaluator.parallel_forward(image) batch_inter, batch_union, batch_correct, batch_label = 0, 0, 0, 0 for output, target, name in zip(outputs, targets, names): output = interp(output) correct, labeled = utils.batch_pix_accuracy(output.data.cpu(), target) inter, union = utils.batch_intersection_union( output.data.cpu(), target, testset.num_class) batch_correct += correct batch_label += labeled batch_inter += inter batch_union += union a = target.numpy().flatten() b = output.data.cpu() _, b = torch.max(b, 1) b = b.numpy().flatten() n = testset.num_class k = (a >= 0) & (a < n) hist += np.bincount(n * a[k].astype(int) + b[k], minlength = n ** 2).reshape(n, n) output = output.data.cpu().numpy()[0] output = output.transpose(1, 2, 0) output = np.asarray(np.argmax(output, axis=2), dtype=np.uint8) output_col = colorize_mask(output) output = Image.fromarray(output) name = name.split('/')[-1] output.save('%s/%s' % (args.save, name)) output_col.save('%s/%s_color.png' % (args.save, name.split('.')[0])) return batch_correct, batch_label, batch_inter, batch_union, hist else: # test mode, dump the results im_paths = dst outputs = evaluator.parallel_forward(image) predicts = [torch.max(output, 1)[1].cpu().numpy() + testset.pred_offset for output in outputs] for predict, impath in zip(predicts, im_paths): mask = utils.get_mask_pallete(predict, args.dataset) outname = os.path.splitext(impath)[0] + '.png' mask.save(os.path.join(outdir, outname)) # dummy outputs for compatible with eval mode return 0, 0, 0, 0 total_inter, total_union, total_correct, total_label = \ np.int64(0), np.int64(0), np.int64(0), np.int64(0) hist =np.zeros((testset.num_class, testset.num_class)) for i, (image, dst, name) in enumerate(tbar): if torch_ver == "0.3": image = Variable(image, volatile=True) correct, labeled, inter, union, hist = eval_batch(image, dst, evaluator, args.eval, hist, name) else: with torch.no_grad(): correct, labeled, inter, union, hist = eval_batch(image, dst, evaluator, args.eval, hist, name) if args.eval: total_correct += correct.astype('int64') total_label += labeled.astype('int64') total_inter += inter.astype('int64') total_union += union.astype('int64') pixAcc = np.float64(1.0) * total_correct / (np.spacing(1, dtype=np.float64) + total_label) IoU = np.float64(1.0) * total_inter / (np.spacing(1, dtype=np.float64) + total_union) mIoU = IoU.mean() tbar.set_description( 'pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) mIoUs = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) print(str(round(np.nanmean(mIoUs) * 100, 2))) print(mIoUs)
def test(args): # output folder outdir = args.save_folder if not os.path.exists(outdir): os.makedirs(outdir) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset testset = get_segmentation_dataset(args.dataset, split=args.split, mode=args.mode, transform=input_transform) # dataloader loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **loader_kwargs) # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, dilated=args.dilated, multi_grid=args.multi_grid, stride=args.stride, lateral=args.lateral, jpu=args.jpu, aux=args.aux, se_loss=args.se_loss, norm_layer=BatchNorm, base_size=args.base_size, crop_size=args.crop_size) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # print(model) scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \ [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] if not args.ms: scales = [1.0] evaluator = MultiEvalModule(model, testset.num_class, scales=scales, flip=args.ms).cuda() evaluator.eval() metric = utils.SegmentationMetric(testset.num_class) tbar = tqdm(test_data) for i, (image, dst) in enumerate(tbar): if 'val' in args.mode: with torch.no_grad(): predicts = evaluator.parallel_forward(image) metric.update(dst, predicts) pixAcc, mIoU = metric.get() tbar.set_description('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) else: # with torch.no_grad(): # outputs = evaluator.parallel_forward(image) # predicts = [testset.make_pred(torch.max(output, 1)[1].cpu().numpy()) # for output in outputs] # for predict, impath in zip(predicts, dst): # mask = utils.get_mask_pallete(predict, args.dataset) # outname = os.path.splitext(impath)[0] + '.png' # mask.save(os.path.join(outdir, outname)) with torch.no_grad(): outputs = evaluator.parallel_forward(image) # predicts = [testset.make_pred(torch.max(output, 1)[1].cpu().numpy()) # for output in outputs] predicts = [ torch.softmax(output, 1).cpu().numpy() for output in outputs ] for predict, impath in zip(predicts, dst): # mask = utils.get_mask_pallete(predict, args.dataset) import numpy as np from PIL import Image mask = Image.fromarray( (predict[0, 1, :, :] * 255).astype(np.uint8)) outname = os.path.splitext(impath)[0] + '.bmp' mask.save(os.path.join(outdir, outname))
def __init__(self, args): self.args = args args.log_name = str(args.checkname) self.logger = utils.create_logger(args.log_root, args.log_name) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset data_kwargs = { 'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size, 'logger': self.logger, 'scale': args.scale } trainset = get_segmentation_dataset(args.dataset, split='train', mode='train', **data_kwargs) testset = get_segmentation_dataset(args.dataset, split='val', mode='val', **data_kwargs) # dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} self.trainloader = data.DataLoader(trainset, batch_size=args.batch_size, drop_last=True, shuffle=True, **kwargs) self.valloader = data.DataLoader(testset, batch_size=args.batch_size, drop_last=False, shuffle=False, **kwargs) self.nclass = trainset.num_class # model model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, se_loss=args.se_loss, norm_layer=BatchNorm2d, base_size=args.base_size, crop_size=args.crop_size, multi_grid=args.multi_grid, multi_dilation=args.multi_dilation) #print(model) self.logger.info(model) # optimizer using different LR params_list = [ { 'params': model.pretrained.parameters(), 'lr': args.lr }, ] if hasattr(model, 'head'): params_list.append({ 'params': model.head.parameters(), 'lr': args.lr * 10 }) if hasattr(model, 'auxlayer'): params_list.append({ 'params': model.auxlayer.parameters(), 'lr': args.lr * 10 }) cityscape_weight = torch.FloatTensor([ 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 1.0865, 1.1529, 1.0507 ]) optimizer = torch.optim.SGD(params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #weight for class imbalance # self.criterion = SegmentationMultiLosses(nclass=self.nclass, weight=cityscape_weight) self.criterion = SegmentationMultiLosses(nclass=self.nclass) #self.criterion = SegmentationLosses(se_loss=args.se_loss, aux=args.aux,nclass=self.nclass) self.model, self.optimizer = model, optimizer # using cuda if args.cuda: self.model = DataParallelModel(self.model).cuda() self.criterion = DataParallelCriterion(self.criterion).cuda() # finetune from a trained model if args.ft: args.start_epoch = 0 checkpoint = torch.load(args.ft_resume) if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict'], strict=False) else: self.model.load_state_dict(checkpoint['state_dict'], strict=False) self.logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.ft_resume, checkpoint['epoch'])) # resuming checkpoint if args.resume: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] self.logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # lr scheduler self.scheduler = utils.LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.trainloader), logger=self.logger, lr_step=args.lr_step) self.best_pred = 0.0
def __init__(self, args): self.args = args # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset data_kwargs = { 'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size } trainset = get_segmentation_dataset(args.dataset, split=args.train_split, mode='train', **data_kwargs) testset = get_segmentation_dataset(args.dataset, split='val', mode='val', **data_kwargs) # dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} self.trainloader = data.DataLoader(trainset, batch_size=args.batch_size, drop_last=True, shuffle=True, **kwargs) self.valloader = data.DataLoader(testset, batch_size=args.batch_size, drop_last=False, shuffle=False, **kwargs) self.nclass = trainset.num_class # model model = get_segmentation_model( args.model, dataset=args.dataset, backbone=args.backbone, dilated=args.dilated, lateral=args.lateral, jpu=args.jpu, aux=args.aux, se_loss=args.se_loss, norm_layer=torch.nn.BatchNorm2d, ## BatchNorm2d base_size=args.base_size, crop_size=args.crop_size) print(model) # optimizer using different LR params_list = [ { 'params': model.pretrained.parameters(), 'lr': args.lr }, ] if hasattr(model, 'jpu'): params_list.append({ 'params': model.jpu.parameters(), 'lr': args.lr * 10 }) if hasattr(model, 'head'): params_list.append({ 'params': model.head.parameters(), 'lr': args.lr * 10 }) if hasattr(model, 'auxlayer'): params_list.append({ 'params': model.auxlayer.parameters(), 'lr': args.lr * 10 }) optimizer = torch.optim.SGD(params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # criterions self.criterion = SegmentationLosses(se_loss=args.se_loss, aux=args.aux, nclass=self.nclass, se_weight=args.se_weight, aux_weight=args.aux_weight) self.model, self.optimizer = model, optimizer # using cuda if args.cuda: self.model = DataParallelModel(self.model).cuda() self.criterion = DataParallelCriterion(self.criterion).cuda() # resuming checkpoint if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 # lr scheduler self.scheduler = utils.LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.trainloader)) self.best_pred = 0.0
def __init__(self, args): self.args = args args.log_name = str(args.checkname) root_dir = getattr(args, "data_root", '../datasets') wo_head = getattr(args, "resume_wo_head", False) self.logger = utils.create_logger(args.log_root, args.log_name) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset data_kwargs = { 'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size, 'logger': self.logger, 'scale': args.scale } trainset = get_segmentation_dataset(args.dataset, split='train', mode='train', root=root_dir, **data_kwargs) testset = get_segmentation_dataset(args.dataset, split='val', mode='val', root=root_dir, **data_kwargs) # dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} self.trainloader = data.DataLoader(trainset, batch_size=args.batch_size, drop_last=True, shuffle=True, **kwargs) self.valloader = data.DataLoader(testset, batch_size=args.batch_size, drop_last=False, shuffle=False, **kwargs) self.nclass = trainset.num_class # model model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, se_loss=args.se_loss, norm_layer=BatchNorm2d, base_size=args.base_size, crop_size=args.crop_size, multi_grid=args.multi_grid, multi_dilation=args.multi_dilation) #print(model) self.logger.info(model) # optimizer using different LR if not args.wo_backbone: params_list = [ { 'params': model.pretrained.parameters(), 'lr': args.lr }, ] else: params_list = [] if hasattr(model, 'head'): params_list.append({ 'params': model.head.parameters(), 'lr': args.lr * 10 }) if hasattr(model, 'auxlayer'): params_list.append({ 'params': model.auxlayer.parameters(), 'lr': args.lr * 10 }) optimizer = torch.optim.SGD(params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) self.criterion = SegmentationMultiLosses(nclass=self.nclass) #self.criterion = SegmentationLosses(se_loss=args.se_loss, aux=args.aux,nclass=self.nclass) self.model, self.optimizer = model, optimizer # using cuda if args.cuda: self.model = DataParallelModel(self.model).cuda() self.criterion = DataParallelCriterion(self.criterion).cuda() # finetune from a trained model if args.ft: args.start_epoch = 0 checkpoint = torch.load(args.ft_resume) if wo_head: print("WITHout HEAD !!!!!!!!!!") from collections import OrderedDict new = OrderedDict() for k, v in checkpoint['state_dict'].items(): if not k.startswith("head"): new[k] = v checkpoint['state_dict'] = new else: print("With HEAD !!!!!!!!!!") if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict'], strict=False) else: self.model.load_state_dict(checkpoint['state_dict'], strict=False) # self.logger.info("=> loaded checkpoint '{}' (epoch {})".format(args.ft_resume, checkpoint['epoch'])) # resuming checkpoint if args.resume: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] self.logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # lr scheduler self.scheduler = utils.LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.trainloader), logger=self.logger, lr_step=args.lr_step) self.best_pred = 0.0