def main(): global args, logger args = get_parser() check(args) logger = get_logger() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.test_gpu) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] gray_folder = os.path.join(args.save_folder, 'gray') color_folder = os.path.join(args.save_folder, 'color') test_transform = transform.Compose([transform.ToTensor()]) test_data = dataset.SemData(split=args.split, data_root=args.data_root, data_list=args.test_list, transform=test_transform) index_start = args.index_start if args.index_step == 0: index_end = len(test_data.data_list) else: index_end = min(index_start + args.index_step, len(test_data.data_list)) test_data.data_list = test_data.data_list[index_start:index_end] test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) colors = np.loadtxt(args.colors_path).astype('uint8') names = [line.rstrip('\n') for line in open(args.names_path)] if not args.has_prediction: if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, pretrained=False) elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, pretrained=False) logger.info(model) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True if os.path.isfile(args.model_path): logger.info("=> loading checkpoint '{}'".format(args.model_path)) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict'], strict=False) logger.info("=> loaded checkpoint '{}'".format(args.model_path)) else: raise RuntimeError("=> no checkpoint found at '{}'".format(args.model_path)) test(test_loader, test_data.data_list, model, args.classes, mean, std, args.base_size, args.test_h, args.test_w, args.scales, gray_folder, color_folder, colors) if args.split != 'test': cal_acc(test_data.data_list, gray_folder, args.classes, names)
def main(): global args, logger args = get_parser() check(args) logger = get_logger() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.test_gpu) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] colors = np.loadtxt(args.colors_path).astype('uint8') if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, pretrained=False) elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, pretrained=False) logger.info(model) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = False #True if os.path.isfile(args.model_path): logger.info("=> loading checkpoint '{}'".format(args.model_path)) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict'], strict=False) logger.info("=> loaded checkpoint '{}'".format(args.model_path)) else: raise RuntimeError("=> no checkpoint found at '{}'".format(args.model_path)) with open(args.image) as f: image_files = f.read().splitlines() for file in image_files: image = file.split() image = os.path.join('/scratch/mw3706/dim/Deep_Image_Matting_Reproduce/pspnet/data/portrait/', image[0]) test(model.eval(), image, args.classes, mean, std, args.base_size, args.test_h, args.test_w, args.scales, colors) if (args.image).split('/')[-1] == 'training.txt' train_label_list = os.listdir('/scratch/mw3706/dim/Deep_Image_Matting_Reproduce/pspnet/data/portrait/label/train_label') with open('/scratch/mw3706/dim/Deep_Image_Matting_Reproduce/pspnet/data/portrait/label/training.txt', 'w') as f: for label in train_label_list: f.write('/scratch/mw3706/dim/Deep_Image_Matting_Reproduce/pspnet/data/portrait/label/train_label/'+label+'\n') else: val_label_list = os.listdir('/scratch/mw3706/dim/Deep_Image_Matting_Reproduce/pspnet/data/portrait/label/val_label/') with open('/scratch/mw3706/dim/Deep_Image_Matting_Reproduce/pspnet/data/portrait/label/validation.txt', 'w') as f: for label in val_label_list: f.write('/scratch/mw3706/dim/Deep_Image_Matting_Reproduce/pspnet/data/portrait/label/val_label/'+label+'\n')
def main(): global args, logger args = get_parser() check(args) logger = get_logger() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( str(x) for x in args.test_gpu) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] colors = np.loadtxt(args.colors_path).astype('uint8') if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, pretrained=False) elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, pretrained=False) logger.info(model) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True if os.path.isfile(args.model_path): logger.info("=> loading checkpoint '{}'".format(args.model_path)) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict'], strict=False) logger.info("=> loaded checkpoint '{}'".format(args.model_path)) else: raise RuntimeError("=> no checkpoint found at '{}'".format( args.model_path)) paths = glob.glob(args.image + '/scene*/color/*00.jpg') for path in paths: test(model.eval(), path, args.classes, mean, std, args.base_size, args.test_h, args.test_w, args.scales, colors)
def __setupSegmentation(self): if torch.cuda.device_count() == 0: device = torch.device("cpu") else: device = torch.device("cuda") model = PSPNet(layers=self.args.layers, classes=self.args.classes, zoom_factor=self.args.zoom_factor, pretrained=False) model = torch.nn.DataParallel(model) cudnn.benchmark = True if os.path.isfile(self.args.model_path): checkpoint = torch.load(self.args.model_path, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['state_dict'], strict=False) return model else: raise RuntimeError("=> no checkpoint found at '{}'".format(self.args.model_path))
def main_worker(gpu, ngpus_per_node, argss): global args args = argss ## step.1 设置分布式相关参数 # 1.1 分布式初始化 if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # 分布式初始化 ## step.2 构建网络 # ---------------------------------------------- 根据实际情况自己写 ---------------------------------------------# criterion = nn.CrossEntropyLoss( ignore_index=args.ignore_label) # 交叉熵损失函数, 根据情况自己修改 if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, criterion=criterion) modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.ppm, model.cls, model.aux] elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, psa_type=args.psa_type, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, criterion=criterion) modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.psa, model.cls, model.aux] # ---------------------------------------------------- END ---------------------------------------------------# ## step.3 设置优化器 params_list = [] # 模型参数列表 for module in modules_ori: params_list.append(dict(params=module.parameters(), lr=args.base_lr)) # 原来backbone网络 学习率 0.01 for module in modules_new: params_list.append( dict(params=module.parameters(), lr=args.base_lr * 10)) # 新加入预测网络 学习率 0.1 args.index_split = 5 optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) # SGD优化器 # 3.x 设置sync_bn from torch.nn.SyncBatchNorm if args.sync_bn: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) ## step.4 多线程分布式工作 # 4.1 判断是否是在主进程中, 如果在进行如下程序 if main_process(): global logger, writer logger = get_logger() # 设置logger writer = SummaryWriter(args.save_path) # 设置writer logger.info(args) # 输出参数列表 logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) # 输出网络列表 # 4.2 分布式工作 if args.distributed: torch.cuda.set_device(gpu) # 指定编号为gpu的那一张显卡 args.batch_size = int(args.batch_size / ngpus_per_node) # 每张卡的训练的batch size args.batch_size_val = int(args.batch_size_val / ngpus_per_node) # 每张卡的评测的batch size args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) # 每张卡工作的数目 model = torch.nn.parallel.DistributedDataParallel( model.cuda(), device_ids=[gpu]) # 加载torch分布式 else: model = torch.nn.DataParallel(model.cuda()) # 数据并行 ## step.5 加载网络权重 # 5.1 直接加载网络预权重 if args.weight: if os.path.isfile(args.weight): if main_process(): logger.info("=> loading weight '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) if main_process(): logger.info("=> loaded weight '{}'".format(args.weight)) else: if main_process(): logger.info("=> no weight found at '{}'".format(args.weight)) # 5.2 加载上次没训练完的模型权重 if args.resume: if os.path.isfile(args.resume): if main_process(): logger.info("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if main_process(): logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: if main_process(): logger.info("=> no checkpoint found at '{}'".format( args.resume)) ## step.7 设置数据loader # 7.1 loader参数设置 value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = transform.Compose([ transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) # 组合数据预处理 # 7.2 训练数据, 可以根据需要自己修改或写 # ---------------------------------------------- 根据实际情况自己写 ---------------------------------------------# train_data = dataset.SemData(split='train', data_root=args.data_root, data_list=args.train_list, transform=train_transform) # ---------------------------------------------------- END ---------------------------------------------------# if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) # 分布式下数据loader else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.evaluate: # evaluate数据 val_transform = transform.Compose([ transform.Crop([args.train_h, args.train_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) val_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.val_list, transform=val_transform) if args.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data) else: val_sampler = None val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) ## step.8 主循环 for epoch in range(args.start_epoch, args.epochs): epoch_log = epoch + 1 if args.distributed: train_sampler.set_epoch(epoch) # 8.1 训练函数 # ---------------------------------------------- 根据实际情况自己写 ---------------------------------------------# loss_train, mIoU_train, mAcc_train, allAcc_train = train( train_loader, model, optimizer, epoch) # ---------------------------------------------------- END ---------------------------------------------------# if main_process(): writer.add_scalar('loss_train', loss_train, epoch_log) writer.add_scalar('mIoU_train', mIoU_train, epoch_log) writer.add_scalar('mAcc_train', mAcc_train, epoch_log) writer.add_scalar('allAcc_train', allAcc_train, epoch_log) # 8.2 保存checkpoint if (epoch_log % args.save_freq == 0) and main_process(): filename = args.save_path + '/train_epoch_' + str( epoch_log) + '.pth' logger.info('Saving checkpoint to: ' + filename) torch.save( { 'epoch': epoch_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, filename) if epoch_log / args.save_freq > 2: deletename = args.save_path + '/train_epoch_' + str( epoch_log - args.save_freq * 2) + '.pth' os.remove(deletename) # 训练一个epoch之后evaluate if args.evaluate: loss_val, mIoU_val, mAcc_val, allAcc_val = validate( val_loader, model, criterion) if main_process(): writer.add_scalar('loss_val', loss_val, epoch_log) writer.add_scalar('mIoU_val', mIoU_val, epoch_log) writer.add_scalar('mAcc_val', mAcc_val, epoch_log) writer.add_scalar('allAcc_val', allAcc_val, epoch_log)
def main( config_name, weights_url='https://github.com/deepparrot/semseg/releases/download/0.1/pspnet50-ade20k.pth', weights_name='pspnet50-ade20k.pth'): args = config.load_cfg_from_cfg_file(config_name) check(args) os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( str(x) for x in args.test_gpu) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] gray_folder = os.path.join(args.save_folder, 'gray') color_folder = os.path.join(args.save_folder, 'color') args.data_root = './.data/vision/ade20k' args.val_list = './.data/vision/ade20k/validation.txt' args.test_list = './.data/vision/ade20k/validation.txt' print(args.data_root) test_transform = transform.Compose([transform.ToTensor()]) test_data = dataset.SemData(split=args.split, data_root=args.data_root, data_list=args.test_list, transform=test_transform) index_start = args.index_start if args.index_step == 0: index_end = len(test_data.data_list) else: index_end = min(index_start + args.index_step, len(test_data.data_list)) test_data.data_list = test_data.data_list[index_start:index_end] test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) colors = np.loadtxt(args.colors_path).astype('uint8') names = [] if not args.has_prediction: if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, pretrained=False) elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, pretrained=False) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True local_checkpoint, _ = urllib.request.urlretrieve( weights_url, weights_name) if os.path.isfile(local_checkpoint): checkpoint = torch.load(local_checkpoint) model.load_state_dict(checkpoint['state_dict'], strict=False) else: raise RuntimeError( "=> no checkpoint found at '{}'".format(local_checkpoint)) test(test_loader, test_data.data_list, model, args.classes, mean, std, args.base_size, args.test_h, args.test_w, args.scales, gray_folder, color_folder, colors) if args.split != 'test': cal_acc(test_data.data_list, gray_folder, args.classes, names)
def main_worker(gpu, ngpus_per_node, argss): global args args = argss if args.sync_bn: if args.multiprocessing_distributed: BatchNorm = apex.parallel.SyncBatchNorm else: from lib.sync_bn.modules import BatchNorm2d BatchNorm = BatchNorm2d else: BatchNorm = nn.BatchNorm2d if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, criterion=criterion, BatchNorm=BatchNorm) modules_ori = [model.layer0, model.layer1, model.layer2, model.layer3, model.layer4] modules_new = [model.ppm, model.cls, model.aux] elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, psa_type=args.psa_type, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, criterion=criterion, BatchNorm=BatchNorm) modules_ori = [model.layer0, model.layer1, model.layer2, model.layer3, model.layer4] modules_new = [model.psa, model.cls, model.aux] params_list = [] for module in modules_ori: params_list.append(dict(params=module.parameters(), lr=args.base_lr)) for module in modules_new: params_list.append(dict(params=module.parameters(), lr=args.base_lr * 10)) args.index_split = 5 optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) if main_process(): global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) if args.distributed: torch.cuda.set_device(gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.batch_size_val = int(args.batch_size_val / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) if args.use_apex: model, optimizer = apex.amp.initialize(model.cuda(), optimizer, opt_level=args.opt_level, keep_batchnorm_fp32=args.keep_batchnorm_fp32, loss_scale=args.loss_scale) model = apex.parallel.DistributedDataParallel(model) else: model = torch.nn.parallel.DistributedDataParallel(model.cuda(), device_ids=[gpu]) else: model = torch.nn.DataParallel(model.cuda()) if args.weight: if os.path.isfile(args.weight): if main_process(): logger.info("=> loading weight '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) if main_process(): logger.info("=> loaded weight '{}'".format(args.weight)) else: if main_process(): logger.info("=> no weight found at '{}'".format(args.weight)) if args.resume: if os.path.isfile(args.resume): if main_process(): logger.info("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume) checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if main_process(): logger.info("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) else: if main_process(): logger.info("=> no checkpoint found at '{}'".format(args.resume)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = transform.Compose([ transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std)]) train_data = dataset.SemData(split='train', data_root=args.data_root, data_list=args.train_list, transform=train_transform) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.evaluate: val_transform = transform.Compose([ transform.Crop([args.train_h, args.train_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std)]) val_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.val_list, transform=val_transform) if args.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler(val_data) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) for epoch in range(args.start_epoch, args.epochs): epoch_log = epoch + 1 if args.distributed: train_sampler.set_epoch(epoch) loss_train, mIoU_train, mAcc_train, allAcc_train = train(train_loader, model, optimizer, epoch) if main_process(): writer.add_scalar('loss_train', loss_train, epoch_log) writer.add_scalar('mIoU_train', mIoU_train, epoch_log) writer.add_scalar('mAcc_train', mAcc_train, epoch_log) writer.add_scalar('allAcc_train', allAcc_train, epoch_log) if (epoch_log % args.save_freq == 0) and main_process(): filename = args.save_path + '/train_epoch_' + str(epoch_log) + '.pth' logger.info('Saving checkpoint to: ' + filename) torch.save({'epoch': epoch_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, filename) if epoch_log / args.save_freq > 2: deletename = args.save_path + '/train_epoch_' + str(epoch_log - args.save_freq * 2) + '.pth' os.remove(deletename) if args.evaluate: loss_val, mIoU_val, mAcc_val, allAcc_val = validate(val_loader, model, criterion) if main_process(): writer.add_scalar('loss_val', loss_val, epoch_log) writer.add_scalar('mIoU_val', mIoU_val, epoch_log) writer.add_scalar('mAcc_val', mAcc_val, epoch_log) writer.add_scalar('allAcc_val', allAcc_val, epoch_log)
def main_worker(local_rank, ngpus_per_node, argss): global args args = argss dist.init_process_group(backend=args.dist_backend) teacher_model = None if args.teacher_model_path: teacher_model = PSPNet(layers=args.teacher_layers, classes=args.classes, zoom_factor=args.zoom_factor) kd_path = 'alpha_' + str(args.alpha) + '_Temp_' + str(args.temperature) args.save_path = os.path.join(args.save_path, kd_path) if not os.path.exists(args.save_path): os.mkdir(args.save_path) if args.arch == 'psp': model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor) modules_ori = [model.layer0, model.layer1, model.layer2, model.layer3, model.layer4] modules_new = [model.ppm, model.cls, model.aux] elif args.arch == 'bise_v1': model = BiseNet(num_classes=args.classes) modules_ori = [model.sp, model.cp] modules_new = [model.ffm, model.conv_out, model.conv_out16, model.conv_out32] params_list = [] for module in modules_ori: params_list.append(dict(params=module.parameters(), lr=args.base_lr)) for module in modules_new: params_list.append(dict(params=module.parameters(), lr=args.base_lr * 10)) args.index_split = 5 optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.sync_bn: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) if teacher_model is not None: teacher_model = nn.SyncBatchNorm.convert_sync_batchnorm(teacher_model) if main_process(): global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) # tensorboardX logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) if teacher_model is not None: logger.info(teacher_model) if args.distributed: torch.cuda.set_device(local_rank) args.batch_size = int(args.batch_size / ngpus_per_node) args.batch_size_val = int(args.batch_size_val / ngpus_per_node) args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model.cuda(), device_ids=[local_rank]) if teacher_model is not None: teacher_model = torch.nn.parallel.DistributedDataParallel(teacher_model.cuda(), device_ids=[local_rank]) else: model = torch.nn.DataParallel(model.cuda()) if teacher_model is not None: teacher_model = torch.nn.DataParallel(teacher_model.cuda()) if teacher_model is not None: checkpoint = torch.load(args.teacher_model_path, map_location=lambda storage, loc: storage.cuda()) teacher_model.load_state_dict(checkpoint['state_dict'], strict=False) print("=> loading teacher checkpoint '{}'".format(args.teacher_model_path)) criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label).cuda(local_rank) kd_criterion = None if teacher_model is not None: kd_criterion = KDLoss(ignore_index=args.ignore_label).cuda(local_rank) if args.weight: if os.path.isfile(args.weight): if main_process(): logger.info("=> loading weight: '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) if main_process(): logger.info("=> loaded weight '{}'".format(args.weight)) else: if main_process(): logger.info("=> mp weight found at '{}'".format(args.weight)) best_mIoU_val = 0.0 if args.resume: if os.path.isfile(args.resume): if main_process(): logger.info("=> loading checkpoint '{}'".format(args.resume)) # Load all tensors onto GPU checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) best_mIoU_val = checkpoint['best_mIoU_val'] if main_process(): logger.info("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['point'])) else: if main_process(): logger.info("=> no checkpoint found at '{}'".format(args.resume)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = transform.Compose([ transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std)]) train_data = dataset.SemData(split='train', data_root=args.data_root, data_list=args.train_list, transform=train_transform) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.evaluate: val_transform = transform.Compose([ transform.Crop([args.train_h, args.train_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std)]) val_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.val_list, transform=val_transform) if args.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler(val_data) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) for epoch in range(args.start_epoch, args.epochs): epoch_log = epoch + 1 if args.distributed: # Use .set_epoch() method to reshuffle the dataset partition at every iteration train_sampler.set_epoch(epoch) loss_train, mIoU_train, mAcc_train, allAcc_train = train(local_rank, train_loader, model, teacher_model, criterion, kd_criterion, optimizer, epoch) if main_process(): writer.add_scalar('loss_train', loss_train, epoch_log) writer.add_scalar('mIoU_train', mIoU_train, epoch_log) writer.add_scalar('mAcc_train', mAcc_train, epoch_log) writer.add_scalar('allAcc_train', allAcc_train, epoch_log) is_best = False if args.evaluate: loss_val, mIoU_val, mAcc_val, allAcc_val = validate(local_rank, val_loader, model, criterion) if main_process(): writer.add_scalar('loss_val', loss_val, epoch_log) writer.add_scalar('mIoU_val', mIoU_val, epoch_log) writer.add_scalar('mAcc_val', mAcc_val, epoch_log) writer.add_scalar('allAcc_val', allAcc_val, epoch_log) if best_mIoU_val < mIoU_val: is_best = True best_mIoU_val = mIoU_val logger.info('==>The best val mIoU: %.3f' % (best_mIoU_val)) if (epoch_log % args.save_freq == 0) and main_process(): save_checkpoint( { 'epoch': epoch_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_mIoU_val': best_mIoU_val }, is_best, args.save_path ) if is_best: logger.info('Saving checkpoint to:' + args.save_path + '/best.pth with mIoU: ' + str(best_mIoU_val) ) else: logger.info('Saving checkpoint to:' + args.save_path + '/last.pth with mIoU: ' + str(mIoU_val) ) if main_process(): writer.close() # it must close the writer, otherwise it will appear the EOFError! logger.info('==>Training done!\nBest mIoU: %.3f' % (best_mIoU_val))
def main(): global args, logger args = get_parser() if args.test_in_nyu_label_space: args.colors_path = 'nyu/nyu_colors.txt' args.names_path = 'nyu/nyu_names.txt' if args.if_cluster: args.data_root = args.data_root_cluster args.project_path = args.project_path_cluster args.data_config_path = 'data' for key in ['train_list', 'val_list', 'test_list', 'colors_path', 'names_path']: args[key] = os.path.join(args.data_config_path, args[key]) for key in ['save_path', 'model_path', 'save_folder']: args[key] = os.path.join(args.project_path, args[key]) # for key in ['save_path', 'model_path', 'save_folder']: # args[key] = args[key] % args.exp_name check(args) logger = get_logger() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.test_gpu) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] gray_folder = os.path.join(args.save_folder, 'gray') color_folder = os.path.join(args.save_folder, 'color') transform_list_test = [] if args.resize: transform_list_test.append(transform.Resize((args.resize_h_test, args.resize_w_test))) transform_list_test += [ transform.Crop([args.test_h, args.test_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ] test_transform = transform.Compose(transform_list_test) test_data = dataset.SemData(split=args.split, data_root=args.data_root, data_list=args.test_list, transform=test_transform, is_master=True, args=args) # test_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.val_list, transform=test_transform, is_master=True, args=args) index_start = args.index_start if args.index_step == 0: index_end = len(test_data.data_list) else: index_end = min(index_start + args.index_step, len(test_data.data_list)) test_data.data_list = test_data.data_list[index_start:index_end] test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) colors = np.loadtxt(args.colors_path).astype('uint8') names = [line.rstrip('\n') for line in open(args.names_path)] args.read_image = test_data.read_image if not args.has_prediction: if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, pretrained=False) elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, pretrained=False) logger.info(model) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True if os.path.isfile(args.model_path): logger.info("=> loading checkpoint '{}'".format(args.model_path)) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict'], strict=True) logger.info("=> loaded checkpoint '{}'".format(args.model_path)) else: raise RuntimeError("=> no checkpoint found at '{}'".format(args.model_path)) pred_path_list, target_path_list = test(test_loader, test_data.data_list, model, args.classes, mean, std, args.base_size, args.test_h, args.test_w, args.scales, gray_folder, color_folder, colors) if args.split != 'test' or (args.split == 'test' and args.test_has_gt): cal_acc(test_data.data_list, gray_folder, args.classes, names, pred_path_list=pred_path_list, target_path_list=target_path_list)
def main(): global args, logger args = get_parser() check(args) logger = get_logger() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.test_gpu) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] test_h = int(args.base_h * args.scale + 1) test_w = int(args.base_w * args.scale + 1) if args.teacher_model_path: kd_path = 'alpha_' + str(args.alpha) + '_Temp_' + str(args.temperature) kd_save = kd_path + '/val/ss' args.save_folder = os.path.join(args.save_folder, kd_save) args.save_path = os.path.join(args.save_path, kd_path) args.model_path = os.path.join(args.save_path, args.model_path) gray_folder = os.path.join(args.save_folder, 'gray') color_folder = os.path.join(args.save_folder, 'color') test_transform = transform.Compose([transform.ToTensor()]) test_data = dataset.SemData(split='test', data_root=args.data_root, data_list=args.test_list, transform=test_transform) index_start = args.index_start if args.index_step == 0: index_end = len(test_data.data_list) else: index_end = min(index_start + args.index_step, len(test_data.data_list)) test_data.data_list = test_data.data_list[index_start:index_end] test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) colors = np.loadtxt(args.colors_path).astype('uint8') names = [line.rstrip('\n') for line in open(args.names_path)] if not args.has_prediction: if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor) elif args.arch == 'nonlocal': from model.nonlocal_net import Nonlocal model = Nonlocal(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor) elif args.arch == 'sanet': from model.sanet import SANet model = SANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor) elif args.arch == 'bise_v1': from model.bisenet_v1 import BiseNet model = BiseNet(layers=args.layers, classes=args.classes, with_sp=args.with_sp) elif args.arch == 'fanet': from model.fanet import FANet model = FANet(layers=args.layers, classes=args.classes) logger.info(model) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True if os.path.isfile(args.model_path): logger.info("=> loading checkpoint '{}'".format(args.model_path)) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict']) logger.info("=> loaded checkpoint '{}'".format(args.model_path)) else: raise RuntimeError("=> no checkpoint found at '{}'".format(args.model_path)) test(test_loader, test_data.data_list, model, args.classes, mean, std, args.base_size, test_h, test_w, args.scales, gray_folder, color_folder, colors) if args.split != 'test': cal_acc(test_data.data_list, gray_folder, args.classes, names)
def main_worker(gpu, ngpus_per_node, argss): global args args = argss if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, criterion=criterion, args=args) modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.ppm, model.cls, model.aux] elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, psa_type=args.psa_type, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, criterion=criterion) modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.psa, model.cls, model.aux] params_list = [] for module in modules_ori: params_list.append(dict(params=module.parameters(), lr=args.base_lr)) for module in modules_new: params_list.append( dict(params=module.parameters(), lr=args.base_lr * 10)) args.index_split = 5 optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.sync_bn: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) if main_process(): global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) else: logger = None if args.distributed: torch.cuda.set_device(gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.batch_size_val = int(args.batch_size_val / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model.cuda(), device_ids=[gpu]) else: model = torch.nn.DataParallel(model.cuda()) if args.weight: if os.path.isfile(args.weight): if main_process(): logger.info("=> loading weight '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) if main_process(): logger.info("=> loaded weight '{}'".format(args.weight)) else: if main_process(): logger.info("=> no weight found at '{}'".format(args.weight)) if args.resume != 'none': if os.path.isfile(args.resume): if main_process(): logger.info("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_epoch = checkpoint['epoch'] # model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) # print(checkpoint['optimizer'].keys()) if args.if_remove_cls: if main_process(): logger.info( '=====!!!!!!!===== Remove cls layer in resuming...') checkpoint['state_dict'] = { x: checkpoint['state_dict'][x] for x in checkpoint['state_dict'].keys() if ('module.cls' not in x and 'module.aux' not in x) } # checkpoint['optimizer'] = {x: checkpoint['optimizer'][x] for x in checkpoint['optimizer'].keys() if ('module.cls' not in x and 'module.aux' not in x)} # if main_process(): # print('----', checkpoint['state_dict'].keys()) # print('----', checkpoint['optimizer'].keys()) # print('----1', checkpoint['optimizer']['state'].keys()) model.load_state_dict(checkpoint['state_dict'], strict=False) if not args.if_remove_cls: optimizer.load_state_dict(checkpoint['optimizer']) if main_process(): logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: if main_process(): logger.info("=> no checkpoint found at '{}'".format( args.resume)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] transform_list_train = [] if args.resize: transform_list_train.append( transform.Resize((args.resize_h, args.resize_w))) transform_list_train += [ transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ] train_transform = transform.Compose(transform_list_train) train_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.train_list, transform=train_transform, logger=logger, is_master=main_process(), args=args) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.evaluate: transform_list_val = [] if args.resize: transform_list_val.append( transform.Resize((args.resize_h, args.resize_w))) transform_list_val += [ transform.Crop([args.train_h, args.train_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ] val_transform = transform.Compose(transform_list_val) val_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.val_list, transform=val_transform, is_master=main_process(), args=args) args.read_image = val_data.read_image if args.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data) else: val_sampler = None val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) for epoch in range(args.start_epoch, args.epochs): epoch_log = epoch + 1 # if args.evaluate and args.val_every_iter == -1: # # logger.info('Validating.....') # loss_val, mIoU_val, mAcc_val, allAcc_val, return_dict = validate(val_loader, model, criterion, args) # if main_process(): # writer.add_scalar('VAL/loss_val', loss_val, epoch_log) # writer.add_scalar('VAL/mIoU_val', mIoU_val, epoch_log) # writer.add_scalar('VAL/mAcc_val', mAcc_val, epoch_log) # writer.add_scalar('VAL/allAcc_val', allAcc_val, epoch_log) # for sample_idx in range(len(return_dict['image_name_list'])): # writer.add_text('VAL-image_name/%d'%sample_idx, return_dict['image_name_list'][sample_idx], epoch) # writer.add_image('VAL-image/%d'%sample_idx, return_dict['im_list'][sample_idx], epoch, dataformats='HWC') # writer.add_image('VAL-color_label/%d'%sample_idx, return_dict['color_GT_list'][sample_idx], epoch, dataformats='HWC') # writer.add_image('VAL-color_pred/%d'%sample_idx, return_dict['color_pred_list'][sample_idx], epoch, dataformats='HWC') if args.distributed: train_sampler.set_epoch(epoch) loss_train, mIoU_train, mAcc_train, allAcc_train = train( train_loader, model, optimizer, epoch, epoch_log, val_loader, criterion) if main_process(): writer.add_scalar('TRAIN/loss_train', loss_train, epoch_log) writer.add_scalar('TRAIN/mIoU_train', mIoU_train, epoch_log) writer.add_scalar('TRAIN/mAcc_train', mAcc_train, epoch_log) writer.add_scalar('TRAIN/allAcc_train', allAcc_train, epoch_log)
def main(): global args criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, criterion=criterion, pretrained=args.pretrained, naive_ppm=args.naive_ppm) # set diffrent learning rate on different part of models modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.ppm, model.cls_head, model.aux_head] params_list = [] for module in modules_ori: params_list.append(dict(params=module.parameters(), lr=args.base_lr)) for module in modules_new: params_list.append( dict(params=module.parameters(), lr=args.base_lr * 10)) args.index_split = 5 optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) model = model.cuda() model = torch.nn.DataParallel(model).cuda() if args.weight: if os.path.isfile(args.weight): logger.info("=> loading weight '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) logger.info("=> loaded weight '{}'".format(args.weight)) else: logger.info("=> no weight found at '{}'".format(args.weight)) if args.resume: if os.path.isfile(args.resume): logger.info("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format(args.resume)) # image pre-processing and augmentation value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = transform.Compose([ transform.Resize((args.train_h, args.train_w)), # augmentation transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) # initialize dataloader train_data = dataset.SemData(split='trainval', transform=train_transform) train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.evaluate: val_transform = transform.Compose([ transform.Crop([args.train_h, args.train_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) val_data = dataset.SemData(split='test', transform=val_transform) val_sampler = None val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) # start training logger.info('Starting training.') for epoch in range(args.start_epoch, args.epochs): epoch_log = epoch + 1 loss_train, mIoU_train, mAcc_train, allAcc_train = train( train_loader, model, optimizer, epoch) writer.add_scalar('loss/train', loss_train, epoch_log) writer.add_scalar('mIoU/train', mIoU_train, epoch_log) writer.add_scalar('mAcc/train', mAcc_train, epoch_log) writer.add_scalar('allAcc/train', allAcc_train, epoch_log) if args.evaluate: loss_val, mIoU_val, mAcc_val, allAcc_val = validate( val_loader, model, criterion) writer.add_scalar('loss/val', loss_val, epoch_log) writer.add_scalar('mIoU/val', mIoU_val, epoch_log) writer.add_scalar('mAcc/val', mAcc_val, epoch_log) writer.add_scalar('allAcc/val', allAcc_val, epoch_log) if (epoch_log % args.save_freq == 0): filename = args.save_path + '/train_epoch_' + str( epoch_log) + '.pth' logger.info('Saving checkpoint to: ' + filename) torch.save( { 'epoch': epoch_log, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict() }, filename)