def main(): global args, logger args = get_parser('config/cod_mgl50.yaml') check(args) logger = get_logger() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( str(x) for x in args.test_gpu) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] date_str = str(datetime.datetime.now().date()) save_folder = args.save_folder + '/' + date_str check_makedirs(save_folder) cod_folder = os.path.join(save_folder, 'cod') coee_folder = os.path.join(save_folder, 'coee') test_transform = transform.Compose([ transform.Resize((args.test_h, args.test_w)), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) test_data = dataset.SemData(split=args.split, data_root=args.data_root, data_list=args.test_list, transform=test_transform) index_start = args.index_start if args.index_step == 0: index_end = len(test_data.data_list) else: index_end = min(index_start + args.index_step, len(test_data.data_list)) test_data.data_list = test_data.data_list[index_start:index_end] test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.test_batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if not args.has_prediction: if args.arch == 'mgl': from model.mglnet import MGLNet model = MGLNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, pretrained=False, args=args) #logger.info(model) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True if os.path.isfile(args.model_path): logger.info("=> loading checkpoint '{}'".format(args.model_path)) checkpoint = torch.load(args.model_path, map_location='cuda:0') model.load_state_dict(checkpoint['state_dict'], strict=False) logger.info("=> loaded checkpoint '{}', epoch {}".format( args.model_path, checkpoint['epoch'])) else: raise RuntimeError("=> no checkpoint found at '{}'".format( args.model_path)) test(test_loader, test_data.data_list, model, cod_folder, coee_folder) if args.split != 'test': calc_acc(test_data.data_list, cod_folder, coee_folder)
def main_worker(gpu, ngpus_per_node, argss): global args args = argss BatchNorm = nn.BatchNorm2d criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) model = PFENet(layers=args.layers, classes=2, zoom_factor=8, \ criterion=nn.CrossEntropyLoss(ignore_index=255), BatchNorm=BatchNorm, \ pretrained=True, shot=args.shot, ppm_scales=args.ppm_scales, vgg=args.vgg) global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) print(args) model = torch.nn.DataParallel(model.cuda()) if args.weight: if os.path.isfile(args.weight): logger.info("=> loading weight '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) logger.info("=> loaded weight '{}'".format(args.weight)) else: logger.info("=> no weight found at '{}'".format(args.weight)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] assert args.split in [0, 1, 2, 3, 999] if args.resized_val: val_transform = transform.Compose([ transform.Resize(size=args.val_size), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) else: val_transform = transform.Compose([ transform.test_Resize(size=args.val_size), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) val_data = dataset.SemData(split=args.split, shot=args.shot, data_root=args.data_root, \ data_list=args.val_list, transform=val_transform, mode='val', \ use_coco=args.use_coco, use_split_coco=args.use_split_coco) val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) loss_val, mIoU_val, mAcc_val, allAcc_val, class_miou = validate( val_loader, model, criterion)
def main_worker(gpu, ngpus_per_node, argss): global args args = argss BatchNorm = nn.BatchNorm2d criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) model = eval(args.arch).Model(args) for param in model.layer0.parameters(): param.requires_grad = False for param in model.layer1.parameters(): param.requires_grad = False for param in model.layer2.parameters(): param.requires_grad = False for param in model.layer3.parameters(): param.requires_grad = False for param in model.layer4.parameters(): param.requires_grad = False optimizer = model._optimizer(args) global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) print(args) model = torch.nn.DataParallel(model.cuda(), device_ids=[0]) if args.weight: if os.path.isfile(args.weight): logger.info("=> loading weight '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) logger.info("=> loaded weight '{}'".format(args.weight)) else: logger.info("=> no weight found at '{}'".format(args.weight)) if args.resume: if os.path.isfile(args.resume): logger.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format(args.resume)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] assert args.split in [0, 1, 2, 3, 999] train_transform = [ transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.padding_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.padding_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std)] train_transform = transform.Compose(train_transform) train_data = dataset.SemData(split=args.split, shot=args.shot, max_sp=args.max_sp, data_root=args.data_root, \ data_list=args.train_list, transform=train_transform, mode='train', \ use_coco=args.use_coco, use_split_coco=args.use_split_coco) train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.evaluate: if args.resized_val: val_transform = transform.Compose([ transform.Resize(size=args.val_size), transform.ToTensor(), transform.Normalize(mean=mean, std=std)]) else: val_transform = transform.Compose([ transform.test_Resize(size=args.val_size), transform.ToTensor(), transform.Normalize(mean=mean, std=std)]) val_data = dataset.SemData(split=args.split, shot=args.shot, max_sp=args.max_sp, data_root=args.data_root, \ data_list=args.val_list, transform=val_transform, mode='val', \ use_coco=args.use_coco, use_split_coco=args.use_split_coco) val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) max_iou = 0. filename = 'ASGNet.pth' for epoch in range(args.start_epoch, args.epochs): if args.fix_random_seed_val: torch.cuda.manual_seed(args.manual_seed + epoch) np.random.seed(args.manual_seed + epoch) torch.manual_seed(args.manual_seed + epoch) torch.cuda.manual_seed_all(args.manual_seed + epoch) random.seed(args.manual_seed + epoch) epoch_log = epoch + 1 loss_train, aux_loss_train, mIoU_train, mAcc_train, allAcc_train = train(train_loader, model, optimizer, epoch) if main_process(): writer.add_scalar('loss_train', loss_train, epoch_log) writer.add_scalar('aux_loss_train', aux_loss_train, epoch_log) writer.add_scalar('mIoU_train', mIoU_train, epoch_log) writer.add_scalar('mAcc_train', mAcc_train, epoch_log) writer.add_scalar('allAcc_train', allAcc_train, epoch_log) if args.evaluate and (epoch % 2 == 0 or (args.epochs<=50 and epoch%1==0)): loss_val, mIoU_val, mAcc_val, allAcc_val, class_miou = validate(val_loader, model, criterion) if main_process(): writer.add_scalar('loss_val', loss_val, epoch_log) writer.add_scalar('mIoU_val', mIoU_val, epoch_log) writer.add_scalar('mAcc_val', mAcc_val, epoch_log) writer.add_scalar('class_miou_val', class_miou, epoch_log) writer.add_scalar('allAcc_val', allAcc_val, epoch_log) if class_miou > max_iou: max_iou = class_miou if os.path.exists(filename): os.remove(filename) filename = args.save_path + '/train_epoch_' + str(epoch) + '_'+str(max_iou)+'.pth' logger.info('Saving checkpoint to: ' + filename) torch.save({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, filename) filename = args.save_path + '/final.pth' logger.info('Saving checkpoint to: ' + filename) torch.save({'epoch': args.epochs, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, filename)
def main_worker(gpu, ngpus_per_node, argss): global args args = argss if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, criterion=criterion, args=args) modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.ppm, model.cls, model.aux] elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, psa_type=args.psa_type, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, criterion=criterion) modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.psa, model.cls, model.aux] params_list = [] for module in modules_ori: params_list.append(dict(params=module.parameters(), lr=args.base_lr)) for module in modules_new: params_list.append( dict(params=module.parameters(), lr=args.base_lr * 10)) args.index_split = 5 optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.sync_bn: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) if main_process(): global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) else: logger = None if args.distributed: torch.cuda.set_device(gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.batch_size_val = int(args.batch_size_val / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model.cuda(), device_ids=[gpu]) else: model = torch.nn.DataParallel(model.cuda()) if args.weight: if os.path.isfile(args.weight): if main_process(): logger.info("=> loading weight '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) if main_process(): logger.info("=> loaded weight '{}'".format(args.weight)) else: if main_process(): logger.info("=> no weight found at '{}'".format(args.weight)) if args.resume != 'none': if os.path.isfile(args.resume): if main_process(): logger.info("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_epoch = checkpoint['epoch'] # model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) # print(checkpoint['optimizer'].keys()) if args.if_remove_cls: if main_process(): logger.info( '=====!!!!!!!===== Remove cls layer in resuming...') checkpoint['state_dict'] = { x: checkpoint['state_dict'][x] for x in checkpoint['state_dict'].keys() if ('module.cls' not in x and 'module.aux' not in x) } # checkpoint['optimizer'] = {x: checkpoint['optimizer'][x] for x in checkpoint['optimizer'].keys() if ('module.cls' not in x and 'module.aux' not in x)} # if main_process(): # print('----', checkpoint['state_dict'].keys()) # print('----', checkpoint['optimizer'].keys()) # print('----1', checkpoint['optimizer']['state'].keys()) model.load_state_dict(checkpoint['state_dict'], strict=False) if not args.if_remove_cls: optimizer.load_state_dict(checkpoint['optimizer']) if main_process(): logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: if main_process(): logger.info("=> no checkpoint found at '{}'".format( args.resume)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] transform_list_train = [] if args.resize: transform_list_train.append( transform.Resize((args.resize_h, args.resize_w))) transform_list_train += [ transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ] train_transform = transform.Compose(transform_list_train) train_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.train_list, transform=train_transform, logger=logger, is_master=main_process(), args=args) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.evaluate: transform_list_val = [] if args.resize: transform_list_val.append( transform.Resize((args.resize_h, args.resize_w))) transform_list_val += [ transform.Crop([args.train_h, args.train_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ] val_transform = transform.Compose(transform_list_val) val_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.val_list, transform=val_transform, is_master=main_process(), args=args) args.read_image = val_data.read_image if args.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data) else: val_sampler = None val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) for epoch in range(args.start_epoch, args.epochs): epoch_log = epoch + 1 # if args.evaluate and args.val_every_iter == -1: # # logger.info('Validating.....') # loss_val, mIoU_val, mAcc_val, allAcc_val, return_dict = validate(val_loader, model, criterion, args) # if main_process(): # writer.add_scalar('VAL/loss_val', loss_val, epoch_log) # writer.add_scalar('VAL/mIoU_val', mIoU_val, epoch_log) # writer.add_scalar('VAL/mAcc_val', mAcc_val, epoch_log) # writer.add_scalar('VAL/allAcc_val', allAcc_val, epoch_log) # for sample_idx in range(len(return_dict['image_name_list'])): # writer.add_text('VAL-image_name/%d'%sample_idx, return_dict['image_name_list'][sample_idx], epoch) # writer.add_image('VAL-image/%d'%sample_idx, return_dict['im_list'][sample_idx], epoch, dataformats='HWC') # writer.add_image('VAL-color_label/%d'%sample_idx, return_dict['color_GT_list'][sample_idx], epoch, dataformats='HWC') # writer.add_image('VAL-color_pred/%d'%sample_idx, return_dict['color_pred_list'][sample_idx], epoch, dataformats='HWC') if args.distributed: train_sampler.set_epoch(epoch) loss_train, mIoU_train, mAcc_train, allAcc_train = train( train_loader, model, optimizer, epoch, epoch_log, val_loader, criterion) if main_process(): writer.add_scalar('TRAIN/loss_train', loss_train, epoch_log) writer.add_scalar('TRAIN/mIoU_train', mIoU_train, epoch_log) writer.add_scalar('TRAIN/mAcc_train', mAcc_train, epoch_log) writer.add_scalar('TRAIN/allAcc_train', allAcc_train, epoch_log)
def main(): global args, logger args = get_parser() if args.test_in_nyu_label_space: args.colors_path = 'nyu/nyu_colors.txt' args.names_path = 'nyu/nyu_names.txt' if args.if_cluster: args.data_root = args.data_root_cluster args.project_path = args.project_path_cluster args.data_config_path = 'data' for key in ['train_list', 'val_list', 'test_list', 'colors_path', 'names_path']: args[key] = os.path.join(args.data_config_path, args[key]) for key in ['save_path', 'model_path', 'save_folder']: args[key] = os.path.join(args.project_path, args[key]) # for key in ['save_path', 'model_path', 'save_folder']: # args[key] = args[key] % args.exp_name check(args) logger = get_logger() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.test_gpu) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] gray_folder = os.path.join(args.save_folder, 'gray') color_folder = os.path.join(args.save_folder, 'color') transform_list_test = [] if args.resize: transform_list_test.append(transform.Resize((args.resize_h_test, args.resize_w_test))) transform_list_test += [ transform.Crop([args.test_h, args.test_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ] test_transform = transform.Compose(transform_list_test) test_data = dataset.SemData(split=args.split, data_root=args.data_root, data_list=args.test_list, transform=test_transform, is_master=True, args=args) # test_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.val_list, transform=test_transform, is_master=True, args=args) index_start = args.index_start if args.index_step == 0: index_end = len(test_data.data_list) else: index_end = min(index_start + args.index_step, len(test_data.data_list)) test_data.data_list = test_data.data_list[index_start:index_end] test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) colors = np.loadtxt(args.colors_path).astype('uint8') names = [line.rstrip('\n') for line in open(args.names_path)] args.read_image = test_data.read_image if not args.has_prediction: if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, pretrained=False) elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, pretrained=False) logger.info(model) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True if os.path.isfile(args.model_path): logger.info("=> loading checkpoint '{}'".format(args.model_path)) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict'], strict=True) logger.info("=> loaded checkpoint '{}'".format(args.model_path)) else: raise RuntimeError("=> no checkpoint found at '{}'".format(args.model_path)) pred_path_list, target_path_list = test(test_loader, test_data.data_list, model, args.classes, mean, std, args.base_size, args.test_h, args.test_w, args.scales, gray_folder, color_folder, colors) if args.split != 'test' or (args.split == 'test' and args.test_has_gt): cal_acc(test_data.data_list, gray_folder, args.classes, names, pred_path_list=pred_path_list, target_path_list=target_path_list)
def main(): global args criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, criterion=criterion, pretrained=args.pretrained, naive_ppm=args.naive_ppm) # set diffrent learning rate on different part of models modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.ppm, model.cls_head, model.aux_head] params_list = [] for module in modules_ori: params_list.append(dict(params=module.parameters(), lr=args.base_lr)) for module in modules_new: params_list.append( dict(params=module.parameters(), lr=args.base_lr * 10)) args.index_split = 5 optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) model = model.cuda() model = torch.nn.DataParallel(model).cuda() if args.weight: if os.path.isfile(args.weight): logger.info("=> loading weight '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) logger.info("=> loaded weight '{}'".format(args.weight)) else: logger.info("=> no weight found at '{}'".format(args.weight)) if args.resume: if os.path.isfile(args.resume): logger.info("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format(args.resume)) # image pre-processing and augmentation value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = transform.Compose([ transform.Resize((args.train_h, args.train_w)), # augmentation transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) # initialize dataloader train_data = dataset.SemData(split='trainval', transform=train_transform) train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.evaluate: val_transform = transform.Compose([ transform.Crop([args.train_h, args.train_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) val_data = dataset.SemData(split='test', transform=val_transform) val_sampler = None val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) # start training logger.info('Starting training.') for epoch in range(args.start_epoch, args.epochs): epoch_log = epoch + 1 loss_train, mIoU_train, mAcc_train, allAcc_train = train( train_loader, model, optimizer, epoch) writer.add_scalar('loss/train', loss_train, epoch_log) writer.add_scalar('mIoU/train', mIoU_train, epoch_log) writer.add_scalar('mAcc/train', mAcc_train, epoch_log) writer.add_scalar('allAcc/train', allAcc_train, epoch_log) if args.evaluate: loss_val, mIoU_val, mAcc_val, allAcc_val = validate( val_loader, model, criterion) writer.add_scalar('loss/val', loss_val, epoch_log) writer.add_scalar('mIoU/val', mIoU_val, epoch_log) writer.add_scalar('mAcc/val', mAcc_val, epoch_log) writer.add_scalar('allAcc/val', allAcc_val, epoch_log) if (epoch_log % args.save_freq == 0): filename = args.save_path + '/train_epoch_' + str( epoch_log) + '.pth' logger.info('Saving checkpoint to: ' + filename) torch.save( { 'epoch': epoch_log, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict() }, filename)
def main(): global args, logger args = get_parser() check(args) logger = get_logger() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.test_gpu) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] gray_folder = os.path.join(args.save_folder, 'gray') color_folder = os.path.join(args.save_folder, 'color') test_transform = transform.Compose([ transform.Resize(args.scale_image), transform.ToTensor() ]) test_list = args.test_list if (args.split == 'pca'): test_list = args.pca_list test_data = dataset.SemData(split=args.split, data_root=args.data_root, data_list=test_list, transform=test_transform) index_start = args.index_start if args.index_step == 0: index_end = len(test_data.data_list) else: index_end = min(index_start + args.index_step, len(test_data.data_list)) test_data.data_list = test_data.data_list[index_start:index_end] test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print(args.save_folder) colors = np.loadtxt(args.colors_path).astype('uint8') #colors = np.loadtxt('/home/agalex/PycharmProjects/torch_1_3_0/segmentation/dataset/voc2012/voc2012_colors.txt').astype('uint8') names = [line.rstrip('\n') for line in open(args.names_path)] #names = [line.rstrip('\n') for line in open('/home/agalex/PycharmProjects/torch_1_3_0/segmentation/dataset/voc2012/voc2012_names.txt')] if not args.has_prediction: if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, pretrained=False) elif args.arch == 'deeplabv3': print('deeplab') from model.deepLabV3 import DeepLabV3 model = DeepLabV3(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, pretrained=False) elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, pretrained=False) logger.info(model) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True if os.path.isfile(args.model_path): logger.info("=> loading checkpoint '{}'".format(args.model_path)) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict'], strict=False) logger.info("=> loaded checkpoint '{}'".format(args.model_path)) else: raise RuntimeError("=> no checkpoint found at '{}'".format(args.model_path)) if args.split != 'pca': test(test_loader, test_data.data_list, model, args.classes, mean, std, args.base_size, args.test_h, args.test_w, args.scales, gray_folder, color_folder, colors) else: return pca(test_loader, test_data.data_list, model, args.test_h, args.test_w, mean, std) if args.split not in ['test', 'pca']: cal_acc(test_data.data_list, gray_folder, args.classes, names, args.scale_image)
def get_dataloder(): value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] assert args.split in [0, 1, 2, 3, 999] train_transform = [ transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.padding_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.padding_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ] train_transform = transform.Compose(train_transform) train_data = dataset.SemData(split=args.split, shot=args.shot, max_sp=args.max_sp, data_root=args.data_root, \ data_list=args.train_list, transform=train_transform, mode='train', \ use_coco=args.use_coco, use_split_coco=args.use_split_coco) train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.evaluate: if args.resized_val: val_transform = transform.Compose([ transform.Resize(size=args.val_size), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) else: val_transform = transform.Compose([ transform.test_Resize(size=args.val_size), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) val_data = dataset.SemData(split=args.split, shot=args.shot, max_sp=args.max_sp, data_root=args.data_root, \ data_list=args.val_list, transform=val_transform, mode='val', \ use_coco=args.use_coco, use_split_coco=args.use_split_coco) val_sampler = None val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) return train_loader, val_loader
def main_worker(gpu, ngpus_per_node, argss): global args args = argss if args.sync_bn: if args.multiprocessing_distributed: BatchNorm = apex.parallel.SyncBatchNorm else: from segmentation.lib.sync_bn.modules import BatchNorm2d BatchNorm = BatchNorm2d else: BatchNorm = nn.BatchNorm2d if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) if args.arch == 'psp': from model.pspnet import PSPNet model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, criterion=criterion, BatchNorm=BatchNorm) modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.ppm, model.cls, model.aux] elif args.arch == 'deeplabv3': from model.deepLabV3 import DeepLabV3 model = DeepLabV3(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, criterion=criterion, BatchNorm=BatchNorm) modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.aspp, model.cls, model.aux] elif args.arch == 'psa': from model.psanet import PSANet model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, psa_type=args.psa_type, compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, criterion=criterion, BatchNorm=BatchNorm) modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] modules_new = [model.psa, model.cls, model.aux] params_list = [] for module in modules_ori: params_list.append(dict(params=module.parameters(), lr=args.base_lr)) for module in modules_new: params_list.append( dict(params=module.parameters(), lr=args.base_lr * 10)) args.index_split = 5 optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) if main_process(): global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) if args.distributed: torch.cuda.set_device(gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.batch_size_val = int(args.batch_size_val / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) if args.use_apex: model, optimizer = apex.amp.initialize( model.cuda(), optimizer, opt_level=args.opt_level, keep_batchnorm_fp32=args.keep_batchnorm_fp32, loss_scale=args.loss_scale) model = apex.parallel.DistributedDataParallel(model) else: model = torch.nn.parallel.DistributedDataParallel(model.cuda(), device_ids=[gpu]) else: model = torch.nn.DataParallel(model.cuda()) if args.weight: if os.path.isfile(args.weight): if main_process(): logger.info("=> loading weight '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) if main_process(): logger.info("=> loaded weight '{}'".format(args.weight)) else: if main_process(): logger.info("=> no weight found at '{}'".format(args.weight)) if args.resume: if os.path.isfile(args.resume): if main_process(): logger.info("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if main_process(): logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: if main_process(): logger.info("=> no checkpoint found at '{}'".format( args.resume)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = transform.Compose([ transform.Resize(0.8), transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) # train_transform = transform.Compose([ # transform.Resize((args.train_h, args.train_w)), #crop_type='rand', padding=mean, ignore_label=args.ignore_label), # transform.ToTensor(), # transform.Normalize(mean=mean, std=std)]) train_data = dataset.SemData(split='train', data_root=args.data_root, data_list=args.train_list, transform=train_transform) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.evaluate: val_transform = transform.Compose([ transform.Crop([args.train_h, args.train_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) val_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.val_list, transform=val_transform) if args.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data) else: val_sampler = None val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) for epoch in range(args.start_epoch, args.epochs): epoch_log = epoch + 1 if args.distributed: train_sampler.set_epoch(epoch) loss_train, mIoU_train, mAcc_train, allAcc_train = train( train_loader, model, optimizer, epoch) if main_process(): writer.add_scalar('loss_train', loss_train, epoch_log) writer.add_scalar('mIoU_train', mIoU_train, epoch_log) writer.add_scalar('mAcc_train', mAcc_train, epoch_log) writer.add_scalar('allAcc_train', allAcc_train, epoch_log) if (epoch_log % args.save_freq == 0) and main_process(): filename = args.save_path + '/train_epoch_' + str( epoch_log) + '.pth' logger.info('Saving checkpoint to: ' + filename) torch.save( { 'epoch': epoch_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, filename) if epoch_log / args.save_freq > 2: deletename = args.save_path + '/train_epoch_' + str( epoch_log - args.save_freq * 2) + '.pth' os.remove(deletename) if args.evaluate: loss_val, mIoU_val, mAcc_val, allAcc_val = validate( val_loader, model, criterion) if main_process(): writer.add_scalar('loss_val', loss_val, epoch_log) writer.add_scalar('mIoU_val', mIoU_val, epoch_log) writer.add_scalar('mAcc_val', mAcc_val, epoch_log) writer.add_scalar('allAcc_val', allAcc_val, epoch_log)
def main_worker(argss): global args args = argss criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) # 初始化模型 model = FSSNet(layers=args.layers, classes=2, criterion=nn.CrossEntropyLoss(ignore_index=255), pretrained=True, shot=args.shot, ppm_scales=args.ppm_scales, vgg=args.vgg, FPN=args.FPN) # 处理backbone optimizer = backbone_optimizer(model, args) global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) print(args) # 并行计算 model = model.cuda() # 加载模型参数,用以finetune或测试 if args.weight: if os.path.isfile(args.weight): logger.info("=> loading weight '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) logger.info("=> loaded weight '{}'".format(args.weight)) else: logger.info("=> no weight found at '{}'".format(args.weight)) # 加载模型继续训练 if args.resume: if os.path.isfile(args.resume): logger.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format(args.resume)) # 归一化相关 value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] assert args.split in [0, 1, 2, 999] # 设置训练transform,train data 和trainloader train_transform = [ transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.padding_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.padding_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std)] train_transform = transform.Compose(train_transform) train_data = dataset.SemData(split=args.split, shot=args.shot, normal=args.normal, data_root=args.data_root, \ data_list=args.train_list, nom_list=args.trainnom_list, transform=train_transform, mode='train') train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) # 设置测试transform,train data 和trainloader if args.evaluate: if args.resized_val: val_transform = transform.Compose([ transform.Resize(size=args.val_size), transform.ToTensor(), transform.Normalize(mean=mean, std=std)]) else: val_transform = transform.Compose([ transform.test_Resize(size=args.val_size), transform.ToTensor(), transform.Normalize(mean=mean, std=std)]) val_data = dataset.SemData(split=args.split, shot=args.shot, normal=args.normal, data_root=args.data_root, \ data_list=args.val_list,nom_list=args.valnom_list, transform=val_transform, mode='val') val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) max_iou = 0. max_fbiou = 0 best_epoch = 0 filename = 'FSSNet.pth' # 按epoch 进行训练和测试 for epoch in range(args.start_epoch, args.epochs): # 设定测试时的随机种子 if args.fix_random_seed_val: torch.cuda.manual_seed(args.manual_seed + epoch) np.random.seed(args.manual_seed + epoch) torch.manual_seed(args.manual_seed + epoch) torch.cuda.manual_seed_all(args.manual_seed + epoch) random.seed(args.manual_seed + epoch) epoch_log = epoch + 1 # 训练 loss_train, mIoU_train, mAcc_train, allAcc_train = train(train_loader, model, optimizer, epoch) if main_process(): writer.add_scalar('loss_train', loss_train, epoch_log) writer.add_scalar('mIoU_train', mIoU_train, epoch_log) writer.add_scalar('mAcc_train', mAcc_train, epoch_log) writer.add_scalar('allAcc_train', allAcc_train, epoch_log) # 测试 if args.evaluate and (epoch % 2 == 0 or (args.epochs<=50 and epoch%1==0)): loss_val, mIoU_val, mAcc_val, allAcc_val, class_miou = validate(val_loader, model, criterion) if main_process(): writer.add_scalar('loss_val', loss_val, epoch_log) writer.add_scalar('mIoU_val', mIoU_val, epoch_log) writer.add_scalar('mAcc_val', mAcc_val, epoch_log) writer.add_scalar('class_miou_val', class_miou, epoch_log) writer.add_scalar('allAcc_val', allAcc_val, epoch_log) if class_miou > max_iou: max_iou = class_miou best_epoch = epoch if os.path.exists(filename): os.remove(filename) filename = args.save_path + '/train_epoch_' + str(epoch) + '_'+str(max_iou)+'.pth' logger.info('Saving checkpoint to: ' + filename) torch.save({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, filename) if mIoU_val > max_fbiou : max_fbiou = mIoU_val logger.info('Best Epoch {:.1f} Best IoU {:.4f} Best FB-IoU {:.4f}'.format( best_epoch, max_iou, max_fbiou)) filename = args.save_path + '/final.pth' logger.info('Saving checkpoint to: ' + filename) torch.save({'epoch': args.epochs, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, filename)