def _do_evaluate(self, runner): """perform evaluation and save ckpt.""" # Synchronization of BatchNorm's buffer (running_mean # and running_var) is not supported in the DDP of pytorch, # which may cause the inconsistent performance of models in # different ranks, so we broadcast BatchNorm's buffers # of rank 0 to other ranks to avoid this. if self.broadcast_bn_buffer: model = runner.model for name, module in model.named_modules(): if isinstance(module, _BatchNorm) and module.track_running_stats: dist.broadcast(module.running_var, 0) dist.broadcast(module.running_mean, 0) if not self._should_evaluate(runner): return tmpdir = self.tmpdir if tmpdir is None: tmpdir = osp.join(runner.work_dir, '.eval_hook') from mmseg.apis import multi_gpu_test results = multi_gpu_test(runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect) if runner.rank == 0: print('\n') runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) key_score = self.evaluate(runner, results) if self.save_best: self._save_ckpt(runner, key_score)
def after_train_iter(self, runner): """After train epoch hook.""" ######### zzg def choice_iters(self, runner): # return (runner.iter + 1) % self.eval_kwargs['interval2'] == 0 and (runner.iter + 1) >= self.eval_kwargs['iter2'] # return (runner.iter + 1) % 100 == 0 and (runner.iter + 1) >= 17000 return (runner.iter + 1) % 100 == 0 and (runner.iter + 1) >= 37000 # return (runner.iter + 1) % 100 == 0 and (runner.iter + 1) >= 77000 if not (self.every_n_iters(runner, self.interval) or choice_iters(self, runner)): return ############################### # if not self.every_n_iters(runner, self.interval): # return from mmseg.apis import multi_gpu_test runner.log_buffer.clear() results = multi_gpu_test(runner.model, self.dataloader, tmpdir=osp.join(runner.work_dir, '.eval_hook'), gpu_collect=self.gpu_collect) if runner.rank == 0: print('\n') self.evaluate(runner, results)
def after_train_iter(self, runner): """After train epoch hook.""" if not self.every_n_iters(runner, self.interval): return from mmseg.apis import multi_gpu_test runner.log_buffer.clear() results = multi_gpu_test(runner.model, self.dataloader, tmpdir=osp.join(runner.work_dir, '.eval_hook'), gpu_collect=self.gpu_collect) if runner.rank == 0: print('\n') self.evaluate(runner, results)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.aug_test: # hard code index cfg.data.test.pipeline[1].img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 ] cfg.data.test.pipeline[1].flip = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # init the logger before other steps logger = None if args.eval: timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'test_{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # set random seeds if args.seed is not None: set_random_seed(args.seed, deterministic=args.deterministic) if logger is not None: logger.info(f'Set random seed to {args.seed}, deterministic: ' f'{args.deterministic}') else: print(f'Set random seed to {args.seed}, deterministic: ' f'{args.deterministic}') # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.val, dict(test_mode=True)) data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_segmentor(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') model.CLASSES = checkpoint['meta']['CLASSES'] model.PALETTE = checkpoint['meta']['PALETTE'] if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: dataset.evaluate(outputs, args.eval, logger, **kwargs)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.aug_test: # hard code index cfg.data.test.pipeline[1].img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 ] cfg.data.test.pipeline[1].flip = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint cfg.model.train_cfg = None model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') model.CLASSES = checkpoint['meta']['CLASSES'] model.PALETTE = checkpoint['meta']['PALETTE'] efficient_test = False if args.eval_options is not None: efficient_test = args.eval_options.get('efficient_test', False) if not distributed: #for concatenated (multi) image input. model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test_multi(model, data_loader, args.show, args.show_dir, args.show_original_dir, efficient_test) else: #currently did not support for multi image model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect, efficient_test) rank, _ = get_dist_info() if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: dataset.evaluate(outputs, args.eval, **kwargs)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.aug_test: # hard code index cfg.data.test.pipeline[1].img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 ] cfg.data.test.pipeline[1].flip = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) rank, _ = get_dist_info() # allows not to create if args.work_dir is not None and rank == 0: mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) json_file = osp.join(args.work_dir, f'eval_{timestamp}.json') # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint cfg.model.train_cfg = None model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if 'CLASSES' in checkpoint.get('meta', {}): model.CLASSES = checkpoint['meta']['CLASSES'] else: print('"CLASSES" not found in meta, use dataset.CLASSES instead') model.CLASSES = dataset.CLASSES if 'PALETTE' in checkpoint.get('meta', {}): model.PALETTE = checkpoint['meta']['PALETTE'] else: print('"PALETTE" not found in meta, use dataset.PALETTE instead') model.PALETTE = dataset.PALETTE # clean gpu memory when starting a new evaluation. torch.cuda.empty_cache() eval_kwargs = {} if args.eval_options is None else args.eval_options # Deprecated efficient_test = eval_kwargs.get('efficient_test', False) if efficient_test: warnings.warn( '``efficient_test=True`` does not have effect in tools/test.py, ' 'the evaluation and format results are CPU memory efficient by ' 'default') eval_on_format_results = (args.eval is not None and 'cityscapes' in args.eval) if eval_on_format_results: assert len(args.eval) == 1, 'eval on format results is not ' \ 'applicable for metrics other than ' \ 'cityscapes' if args.format_only or eval_on_format_results: if 'imgfile_prefix' in eval_kwargs: tmpdir = eval_kwargs['imgfile_prefix'] else: tmpdir = '.format_cityscapes' eval_kwargs.setdefault('imgfile_prefix', tmpdir) mmcv.mkdir_or_exist(tmpdir) else: tmpdir = None if not distributed: model = MMDataParallel(model, device_ids=[0]) results = single_gpu_test(model, data_loader, args.show, args.show_dir, False, args.opacity, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) results = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect, False, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) rank, _ = get_dist_info() if rank == 0: if args.out: warnings.warn( 'The behavior of ``args.out`` has been changed since MMSeg ' 'v0.16, the pickled outputs could be seg map as type of ' 'np.array, pre-eval results or file paths for ' '``dataset.format_results()``.') print(f'\nwriting results to {args.out}') mmcv.dump(results, args.out) if args.eval: eval_kwargs.update(metric=args.eval) metric = dataset.evaluate(results, **eval_kwargs) metric_dict = dict(config=args.config, metric=metric) if args.work_dir is not None and rank == 0: mmcv.dump(metric_dict, json_file, indent=4) if tmpdir is not None and eval_on_format_results: # remove tmp dir when cityscapes evaluation shutil.rmtree(tmpdir)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # set multi-process settings setup_multi_processes(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.aug_test: # hard code index cfg.data.test.pipeline[1].img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 ] cfg.data.test.pipeline[1].flip = True cfg.model.pretrained = None cfg.data.test.test_mode = True if args.gpu_id is not None: cfg.gpu_ids = [args.gpu_id] # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': cfg.gpu_ids = [args.gpu_id] distributed = False if len(cfg.gpu_ids) > 1: warnings.warn(f'The gpu-ids is reset from {cfg.gpu_ids} to ' f'{cfg.gpu_ids[0:1]} to avoid potential error in ' 'non-distribute testing time.') cfg.gpu_ids = cfg.gpu_ids[0:1] else: distributed = True init_dist(args.launcher, **cfg.dist_params) rank, _ = get_dist_info() # allows not to create if args.work_dir is not None and rank == 0: mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) if args.aug_test: json_file = osp.join(args.work_dir, f'eval_multi_scale_{timestamp}.json') else: json_file = osp.join(args.work_dir, f'eval_single_scale_{timestamp}.json') elif rank == 0: work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) mmcv.mkdir_or_exist(osp.abspath(work_dir)) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) if args.aug_test: json_file = osp.join(work_dir, f'eval_multi_scale_{timestamp}.json') else: json_file = osp.join(work_dir, f'eval_single_scale_{timestamp}.json') # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) # The default loader config loader_cfg = dict( # cfg.gpus will be ignored if distributed num_gpus=len(cfg.gpu_ids), dist=distributed, shuffle=False) # The overall dataloader settings loader_cfg.update({ k: v for k, v in cfg.data.items() if k not in [ 'train', 'val', 'test', 'train_dataloader', 'val_dataloader', 'test_dataloader' ] }) test_loader_cfg = { **loader_cfg, 'samples_per_gpu': 1, 'shuffle': False, # Not shuffle by default **cfg.data.get('test_dataloader', {}) } # build the dataloader data_loader = build_dataloader(dataset, **test_loader_cfg) # build the model and load checkpoint cfg.model.train_cfg = None model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if 'CLASSES' in checkpoint.get('meta', {}): model.CLASSES = checkpoint['meta']['CLASSES'] else: print('"CLASSES" not found in meta, use dataset.CLASSES instead') model.CLASSES = dataset.CLASSES if 'PALETTE' in checkpoint.get('meta', {}): model.PALETTE = checkpoint['meta']['PALETTE'] else: print('"PALETTE" not found in meta, use dataset.PALETTE instead') model.PALETTE = dataset.PALETTE # clean gpu memory when starting a new evaluation. torch.cuda.empty_cache() eval_kwargs = {} if args.eval_options is None else args.eval_options # Deprecated efficient_test = eval_kwargs.get('efficient_test', False) if efficient_test: warnings.warn( '``efficient_test=True`` does not have effect in tools/test.py, ' 'the evaluation and format results are CPU memory efficient by ' 'default') eval_on_format_results = (args.eval is not None and 'cityscapes' in args.eval) if eval_on_format_results: assert len(args.eval) == 1, 'eval on format results is not ' \ 'applicable for metrics other than ' \ 'cityscapes' if args.format_only or eval_on_format_results: if 'imgfile_prefix' in eval_kwargs: tmpdir = eval_kwargs['imgfile_prefix'] else: tmpdir = '.format_cityscapes' eval_kwargs.setdefault('imgfile_prefix', tmpdir) mmcv.mkdir_or_exist(tmpdir) else: tmpdir = None if not distributed: warnings.warn( 'SyncBN is only supported with DDP. To be compatible with DP, ' 'we convert SyncBN to BN. Please use dist_train.sh which can ' 'avoid this error.') if not torch.cuda.is_available(): assert digit_version(mmcv.__version__) >= digit_version('1.4.4'), \ 'Please use MMCV >= 1.4.4 for CPU training!' model = revert_sync_batchnorm(model) model = MMDataParallel(model, device_ids=cfg.gpu_ids) results = single_gpu_test(model, data_loader, args.show, args.show_dir, False, args.opacity, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) results = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect, False, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) rank, _ = get_dist_info() if rank == 0: if args.out: warnings.warn( 'The behavior of ``args.out`` has been changed since MMSeg ' 'v0.16, the pickled outputs could be seg map as type of ' 'np.array, pre-eval results or file paths for ' '``dataset.format_results()``.') print(f'\nwriting results to {args.out}') mmcv.dump(results, args.out) if args.eval: eval_kwargs.update(metric=args.eval) metric = dataset.evaluate(results, **eval_kwargs) metric_dict = dict(config=args.config, metric=metric) mmcv.dump(metric_dict, json_file, indent=4) if tmpdir is not None and eval_on_format_results: # remove tmp dir when cityscapes evaluation shutil.rmtree(tmpdir)