def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark torch.backends.cudnn.benchmark = False cfg.model.pretrained = None cfg.data.test.test_mode = True # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # build the model and load checkpoint cfg.model.train_cfg = None model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=[0]) model.eval() # the first several iterations may be very slow so skip them num_warmup = 5 pure_inf_time = 0 total_iters = 200 # benchmark with 200 image and take the average for i, data in enumerate(data_loader): torch.cuda.synchronize() start_time = time.perf_counter() with torch.no_grad(): model(return_loss=False, rescale=True, **data) torch.cuda.synchronize() elapsed = time.perf_counter() - start_time if i >= num_warmup: pure_inf_time += elapsed if (i + 1) % args.log_interval == 0: fps = (i + 1 - num_warmup) / pure_inf_time print(f'Done image [{i + 1:<3}/ {total_iters}], ' f'fps: {fps:.2f} img / s') if (i + 1) == total_iters: fps = (i + 1 - num_warmup) / pure_inf_time print(f'Overall fps: {fps:.2f} img / s') break
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. distributed = False # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # load onnx config and meta cfg.model.train_cfg = None model = ONNXRuntimeSegmentor(args.model, cfg=cfg, device_id=0) model.CLASSES = dataset.CLASSES model.PALETTE = dataset.PALETTE efficient_test = False if args.eval_options is not None: efficient_test = args.eval_options.get('efficient_test', False) model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, efficient_test, args.opacity) rank, _ = get_dist_info() if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: dataset.evaluate(outputs, args.eval, **kwargs)
def predict_rsImage_mmseg(config_file, trained_model, image_path, img_save_dir, batch_size=1, gpuid=0, tile_width=480, tile_height=480, overlay_x=160, overlay_y=160): cfg = mmcv.Config.fromfile(config_file) if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.data.test.test_mode = True distributed = False # test_mode=False,rsimage='',rsImg_id=0,tile_width=480,tile_height=480, # overlay_x=160,overlay_y=160 data_args = { 'rsImg_predict': True, 'rsimage': image_path, 'tile_width': tile_width, 'tile_height': tile_height, 'overlay_x': overlay_x, 'overlay_y': overlay_y } dataset = build_dataset(cfg.data.test, default_args=data_args) data_loader = build_dataloader(dataset, samples_per_gpu=batch_size, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) cfg.model.train_cfg = None model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, trained_model, map_location='cpu') if 'CLASSES' in checkpoint.get('meta', {}): model.CLASSES = checkpoint['meta']['CLASSES'] else: print('"CLASSES" not found in meta, use dataset.CLASSES instead') model.CLASSES = dataset.CLASSES if 'PALETTE' in checkpoint.get('meta', {}): model.PALETTE = checkpoint['meta']['PALETTE'] else: print('"PALETTE" not found in meta, use dataset.PALETTE instead') model.PALETTE = dataset.PALETTE # clean gpu memory when starting a new evaluation. torch.cuda.empty_cache() # no distributed model = MMDataParallel(model, device_ids=[gpuid]) single_gpu_prediction_rsImage(model, data_loader, img_save_dir)
logger.info(f'Config:\n{cfg.pretty_text}') model = build_segmentor(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) # checkpoint = '/home/zhouzhigong/Documents/mmsegmentation/work_dirs/pspnet_r18-d8_512x512_40k_cityscapes_skd/iter_40000.pth' # checkpoint = load_checkpoint(model, checkpoint) model.cuda(0) model_paral = MMDataParallel(model, device_ids=[0]) train_dataset = build_dataset(cfg.data.train) train_data_loader = build_dataloader( train_dataset, cfg.data.samples_per_gpu, # samples_per_gpu cfg.data.workers_per_gpu, # workers_per_gpu len(cfg.gpu_ids), dist=False, seed=cfg.seed, drop_last=True) val_dataset = build_dataset(cfg.data.test) val_data_loader = build_dataloader( val_dataset, samples_per_gpu=4, workers_per_gpu=4, # workers_per_gpu=S_cfg.data.workers_per_gpu, dist=False, shuffle=False, drop_last=True, ) # # checkpoint = '/home/zhouzhigong/Documents/mmsegmentation/work_dirs/pspnet_r18-d8_512x512_40k_cityscapes_skd/iter_20000.pth'
cfg = seg_model.cfg train_dataset = CityscapesDataset(data_root=cityspaces_path, pipeline=cfg.data.train.pipeline, img_dir=cfg.data.train.img_dir, ann_dir=cfg.data.train.ann_dir, test_mode=False) val_dataset = CityscapesDataset(data_root=cityspaces_path, pipeline=cfg.data.val.pipeline, img_dir=cfg.data.val.img_dir, ann_dir=cfg.data.val.ann_dir, test_mode=False) train_loader = build_dataloader(train_dataset, samples_per_gpu=batch_size, workers_per_gpu=0, dataloader_type="DataLoader") val_loader = build_dataloader(val_dataset, samples_per_gpu=batch_size, workers_per_gpu=0, dataloader_type="DataLoader") # Freeze Seg Network for param in seg_model.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(crf.parameters(), lr=1e-3) def get_evaluations(log_proba, seg, n_classes, ignore_idx):
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. distributed = False # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # load onnx config and meta cfg.model.train_cfg = None if args.backend == 'onnxruntime': model = ONNXRuntimeSegmentor(args.model, cfg=cfg, device_id=0) elif args.backend == 'tensorrt': model = TensorRTSegmentor(args.model, cfg=cfg, device_id=0) model.CLASSES = dataset.CLASSES model.PALETTE = dataset.PALETTE # clean gpu memory when starting a new evaluation. torch.cuda.empty_cache() eval_kwargs = {} if args.eval_options is None else args.eval_options # Deprecated efficient_test = eval_kwargs.get('efficient_test', False) if efficient_test: warnings.warn( '``efficient_test=True`` does not have effect in tools/test.py, ' 'the evaluation and format results are CPU memory efficient by ' 'default') eval_on_format_results = (args.eval is not None and 'cityscapes' in args.eval) if eval_on_format_results: assert len(args.eval) == 1, 'eval on format results is not ' \ 'applicable for metrics other than ' \ 'cityscapes' if args.format_only or eval_on_format_results: if 'imgfile_prefix' in eval_kwargs: tmpdir = eval_kwargs['imgfile_prefix'] else: tmpdir = '.format_cityscapes' eval_kwargs.setdefault('imgfile_prefix', tmpdir) mmcv.mkdir_or_exist(tmpdir) else: tmpdir = None model = MMDataParallel(model, device_ids=[0]) results = single_gpu_test(model, data_loader, args.show, args.show_dir, False, args.opacity, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) rank, _ = get_dist_info() if rank == 0: if args.out: warnings.warn( 'The behavior of ``args.out`` has been changed since MMSeg ' 'v0.16, the pickled outputs could be seg map as type of ' 'np.array, pre-eval results or file paths for ' '``dataset.format_results()``.') print(f'\nwriting results to {args.out}') mmcv.dump(results, args.out) if args.eval: dataset.evaluate(results, args.eval, **eval_kwargs) if tmpdir is not None and eval_on_format_results: # remove tmp dir when cityscapes evaluation shutil.rmtree(tmpdir)
def main(): args = parse_args() cfg = Config.fromfile(args.config) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) if args.work_dir is not None: mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) json_file = osp.join(args.work_dir, f'fps_{timestamp}.json') else: # use config filename as default work_dir if cfg.work_dir is None work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) mmcv.mkdir_or_exist(osp.abspath(work_dir)) json_file = osp.join(work_dir, f'fps_{timestamp}.json') repeat_times = args.repeat_times # set cudnn_benchmark torch.backends.cudnn.benchmark = False cfg.model.pretrained = None cfg.data.test.test_mode = True benchmark_dict = dict(config=args.config, unit='img / s') overall_fps_list = [] for time_index in range(repeat_times): print(f'Run {time_index + 1}:') # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # build the model and load checkpoint cfg.model.train_cfg = None model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) if 'checkpoint' in args and osp.exists(args.checkpoint): load_checkpoint(model, args.checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=[0]) model.eval() # the first several iterations may be very slow so skip them num_warmup = 5 pure_inf_time = 0 total_iters = 200 # benchmark with 200 image and take the average for i, data in enumerate(data_loader): torch.cuda.synchronize() start_time = time.perf_counter() with torch.no_grad(): model(return_loss=False, rescale=True, **data) torch.cuda.synchronize() elapsed = time.perf_counter() - start_time if i >= num_warmup: pure_inf_time += elapsed if (i + 1) % args.log_interval == 0: fps = (i + 1 - num_warmup) / pure_inf_time print(f'Done image [{i + 1:<3}/ {total_iters}], ' f'fps: {fps:.2f} img / s') if (i + 1) == total_iters: fps = (i + 1 - num_warmup) / pure_inf_time print(f'Overall fps: {fps:.2f} img / s\n') benchmark_dict[f'overall_fps_{time_index + 1}'] = round(fps, 2) overall_fps_list.append(fps) break benchmark_dict['average_fps'] = round(np.mean(overall_fps_list), 2) benchmark_dict['fps_variance'] = round(np.var(overall_fps_list), 4) print(f'Average fps of {repeat_times} evaluations: ' f'{benchmark_dict["average_fps"]}') print(f'The variance of {repeat_times} evaluations: ' f'{benchmark_dict["fps_variance"]}') mmcv.dump(benchmark_dict, json_file, indent=4)
def train_segmentor(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): """Launch segmentor training.""" logger = get_root_logger(cfg.log_level) # prepare data loaders # print('----------------------------') # print(type(dataset),len(dataset)) # <class 'list'> 1 # print(type(dataset[0])) # <class 'mmseg.datasets.cityscapes.CityscapesDataset'> # print(len(dataset[0])) # 2975 # print(dataset[0][0]['img'].size()) dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed, drop_last=True) for ds in dataset ] print('---------------------------') # print(data_loaders[0].next) print('before') print(cfg.gpu_ids) print(next(model.parameters()).device) # print(next(model.teacher.parameters()).device) # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) find_unused_parameters = True # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) print('after') print(next(model.parameters()).device) # print(next(model.teacher.parameters()).device) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = IterBasedRunner(model=model, batch_processor=None, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta) # register hooks runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # register eval hooks if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_hook = DistEvalHook if distributed else EvalHook runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_iters)
model = build_segmentor(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) checkpoint = '/home/zhouzhigong/Documents/mmsegmentation/work_dirs/pspnet_r18-d8_512x512_40k_cityscapes_skd/iter_40000.pth' checkpoint = load_checkpoint(model, checkpoint) model.eval() # model.cuda(0) model = MMDataParallel(model, device_ids=[1]) # dataset = build_dataset(cfg.data.train) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, samples_per_gpu=4, workers_per_gpu=4, # workers_per_gpu=S_cfg.data.workers_per_gpu, dist=False, shuffle=False, drop_last=True, ) outputs = single_gpu_test(model, data_loader, show=False, out_dir=None) eval_results = dataset.evaluate(outputs, metric='mIoU', logger=None) print( eval_results ) # {'mIoU': 0.5864396163552457, 'mAcc': 0.6625518273733926, 'aAcc': 0.9302386656807492} # for i in range(5): # data_batch = next(iter_loaders[0]) # print(data_batch.keys()) # img = data_batch['img'].data # img_metas = data_batch['img_metas'].data
def test_build_dataloader(): dataset = ToyDataset() samples_per_gpu = 3 # dist=True, shuffle=True, 1GPU dataloader = build_dataloader(dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=2) assert dataloader.batch_size == samples_per_gpu assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu)) assert isinstance(dataloader.sampler, DistributedSampler) assert dataloader.sampler.shuffle # dist=True, shuffle=False, 1GPU dataloader = build_dataloader(dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=2, shuffle=False) assert dataloader.batch_size == samples_per_gpu assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu)) assert isinstance(dataloader.sampler, DistributedSampler) assert not dataloader.sampler.shuffle # dist=True, shuffle=True, 8GPU dataloader = build_dataloader(dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=2, num_gpus=8) assert dataloader.batch_size == samples_per_gpu assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu)) assert dataloader.num_workers == 2 # dist=False, shuffle=True, 1GPU dataloader = build_dataloader(dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=2, dist=False) assert dataloader.batch_size == samples_per_gpu assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu)) assert isinstance(dataloader.sampler, RandomSampler) assert dataloader.num_workers == 2 # dist=False, shuffle=False, 1GPU dataloader = build_dataloader(dataset, samples_per_gpu=3, workers_per_gpu=2, shuffle=False, dist=False) assert dataloader.batch_size == samples_per_gpu assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu)) assert isinstance(dataloader.sampler, SequentialSampler) assert dataloader.num_workers == 2 # dist=False, shuffle=True, 8GPU dataloader = build_dataloader(dataset, samples_per_gpu=3, workers_per_gpu=2, num_gpus=8, dist=False) assert dataloader.batch_size == samples_per_gpu * 8 assert len(dataloader) == int(math.ceil( len(dataset) / samples_per_gpu / 8)) assert isinstance(dataloader.sampler, RandomSampler) assert dataloader.num_workers == 16
data_root=data_root, img_dir='train/image', ann_dir='train/label_cvt', split='train/split/val_mini.txt', mosaic_ratio=1, pipeline=train_pipeline), ) dataset = [build_dataset(data['train'])] bs = 8 data_loaders = [ build_dataloader( ds, bs, 0, # cfg.gpus will be ignored if distributed len(range(0, 1)), dist=False, seed=41) for ds in dataset ] data_loader = data_loaders[0] data_per_batch = iter(data_loader).__next__() img_metas = data_per_batch['img_metas'] img_metas_data = img_metas.data[0] images = data_per_batch['img'].data[0].numpy() labels = data_per_batch['gt_semantic_seg'].data[0] for image, label, data_info in zip(images, labels, img_metas_data):
def train_segmentor(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): """Launch segmentor training.""" logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] if not torch.cuda.is_available(): len_gpu_ids = 2 # need to be changed else: len_gpu_ids = len(cfg.gpu_ids) data_loaders = [ build_dataloader( ds, # A PyTorch dataset. cfg.data. samples_per_gpu, # Number of training samples on each GPU, i.e., batch size of each GPU. cfg.data. workers_per_gpu, # How many subprocesses to use for data loading for each GPU. # cfg.gpus will be ignored if distributed len_gpu_ids, # len(cfg.gpu_ids), # Number of GPUs. Only used in non-distributed training. dist=distributed, # Distributed training/test or not. Default: True. seed=cfg.seed, drop_last=True) for ds in dataset ] ''' About build_dataloader shuffle (bool): Whether to shuffle the data at every epoch. Default: True. seed (int | None): Seed to be used. Default: None. drop_last (bool): Whether to drop the last incomplete batch in epoch. Default: False pin_memory (bool): Whether to use pin_memory in DataLoader. Default: True dataloader_type (str): Type of dataloader. Default: 'PoolDataLoader' kwargs: any keyword argument to be used to initialize DataLoader''' # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: if torch.cuda.is_available(): model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) else: model = MMDataParallel(model.to('cpu')) # build runner optimizer = build_optimizer(model, cfg.optimizer) if cfg.get('runner') is None: cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) runner = build_runner(cfg.runner, default_args=dict(model=model, batch_processor=None, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # register hooks runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # register eval hooks if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=len_gpu_ids, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.aug_test: # hard code index cfg.data.test.pipeline[1].img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 ] cfg.data.test.pipeline[1].flip = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) rank, _ = get_dist_info() # allows not to create if args.work_dir is not None and rank == 0: mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) json_file = osp.join(args.work_dir, f'eval_{timestamp}.json') # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint cfg.model.train_cfg = None model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if 'CLASSES' in checkpoint.get('meta', {}): model.CLASSES = checkpoint['meta']['CLASSES'] else: print('"CLASSES" not found in meta, use dataset.CLASSES instead') model.CLASSES = dataset.CLASSES if 'PALETTE' in checkpoint.get('meta', {}): model.PALETTE = checkpoint['meta']['PALETTE'] else: print('"PALETTE" not found in meta, use dataset.PALETTE instead') model.PALETTE = dataset.PALETTE # clean gpu memory when starting a new evaluation. torch.cuda.empty_cache() eval_kwargs = {} if args.eval_options is None else args.eval_options # Deprecated efficient_test = eval_kwargs.get('efficient_test', False) if efficient_test: warnings.warn( '``efficient_test=True`` does not have effect in tools/test.py, ' 'the evaluation and format results are CPU memory efficient by ' 'default') eval_on_format_results = (args.eval is not None and 'cityscapes' in args.eval) if eval_on_format_results: assert len(args.eval) == 1, 'eval on format results is not ' \ 'applicable for metrics other than ' \ 'cityscapes' if args.format_only or eval_on_format_results: if 'imgfile_prefix' in eval_kwargs: tmpdir = eval_kwargs['imgfile_prefix'] else: tmpdir = '.format_cityscapes' eval_kwargs.setdefault('imgfile_prefix', tmpdir) mmcv.mkdir_or_exist(tmpdir) else: tmpdir = None if not distributed: model = MMDataParallel(model, device_ids=[0]) results = single_gpu_test(model, data_loader, args.show, args.show_dir, False, args.opacity, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) results = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect, False, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) rank, _ = get_dist_info() if rank == 0: if args.out: warnings.warn( 'The behavior of ``args.out`` has been changed since MMSeg ' 'v0.16, the pickled outputs could be seg map as type of ' 'np.array, pre-eval results or file paths for ' '``dataset.format_results()``.') print(f'\nwriting results to {args.out}') mmcv.dump(results, args.out) if args.eval: eval_kwargs.update(metric=args.eval) metric = dataset.evaluate(results, **eval_kwargs) metric_dict = dict(config=args.config, metric=metric) if args.work_dir is not None and rank == 0: mmcv.dump(metric_dict, json_file, indent=4) if tmpdir is not None and eval_on_format_results: # remove tmp dir when cityscapes evaluation shutil.rmtree(tmpdir)
# mean_IU, IU_array = model1.evalute_model(model1.student, valloader, gpu_id='0', input_size='512,512', num_classes=19, whole=True) # print('mean_IU:', mean_IU) # print('IU_array:', IU_array) ## mmseg checkpoint = load_checkpoint(model1.student, pretrain_model_1) # model = model1.student S_config = 'configs/pspnet/pspnet_r18-d8_512x512_40k_cityscapes_1gpu.py' S_cfg = Config.fromfile(S_config) dataset = build_dataset(S_cfg.data.train) # dataset = build_dataset(S_cfg.data.test) data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=1, # workers_per_gpu=S_cfg.data.workers_per_gpu, dist=False, shuffle=False) # model.CLASSES = checkpoint['meta']['CLASSES'] # model.PALETTE = checkpoint['meta']['PALETTE'] # distributed = False # if not distributed: # model = MMDataParallel(model, device_ids=[0]) # # outputs = single_gpu_test(model, data_loader, args.show, args.show_dir) # outputs = single_gpu_test(model, data_loader, show=False, out_dir=None) # else: # model = MMDistributedDataParallel( # model.cuda(), # device_ids=[torch.cuda.current_device()],
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # set multi-process settings setup_multi_processes(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.aug_test: # hard code index cfg.data.test.pipeline[1].img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 ] cfg.data.test.pipeline[1].flip = True cfg.model.pretrained = None cfg.data.test.test_mode = True if args.gpu_id is not None: cfg.gpu_ids = [args.gpu_id] # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': cfg.gpu_ids = [args.gpu_id] distributed = False if len(cfg.gpu_ids) > 1: warnings.warn(f'The gpu-ids is reset from {cfg.gpu_ids} to ' f'{cfg.gpu_ids[0:1]} to avoid potential error in ' 'non-distribute testing time.') cfg.gpu_ids = cfg.gpu_ids[0:1] else: distributed = True init_dist(args.launcher, **cfg.dist_params) rank, _ = get_dist_info() # allows not to create if args.work_dir is not None and rank == 0: mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) if args.aug_test: json_file = osp.join(args.work_dir, f'eval_multi_scale_{timestamp}.json') else: json_file = osp.join(args.work_dir, f'eval_single_scale_{timestamp}.json') elif rank == 0: work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) mmcv.mkdir_or_exist(osp.abspath(work_dir)) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) if args.aug_test: json_file = osp.join(work_dir, f'eval_multi_scale_{timestamp}.json') else: json_file = osp.join(work_dir, f'eval_single_scale_{timestamp}.json') # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) # The default loader config loader_cfg = dict( # cfg.gpus will be ignored if distributed num_gpus=len(cfg.gpu_ids), dist=distributed, shuffle=False) # The overall dataloader settings loader_cfg.update({ k: v for k, v in cfg.data.items() if k not in [ 'train', 'val', 'test', 'train_dataloader', 'val_dataloader', 'test_dataloader' ] }) test_loader_cfg = { **loader_cfg, 'samples_per_gpu': 1, 'shuffle': False, # Not shuffle by default **cfg.data.get('test_dataloader', {}) } # build the dataloader data_loader = build_dataloader(dataset, **test_loader_cfg) # build the model and load checkpoint cfg.model.train_cfg = None model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if 'CLASSES' in checkpoint.get('meta', {}): model.CLASSES = checkpoint['meta']['CLASSES'] else: print('"CLASSES" not found in meta, use dataset.CLASSES instead') model.CLASSES = dataset.CLASSES if 'PALETTE' in checkpoint.get('meta', {}): model.PALETTE = checkpoint['meta']['PALETTE'] else: print('"PALETTE" not found in meta, use dataset.PALETTE instead') model.PALETTE = dataset.PALETTE # clean gpu memory when starting a new evaluation. torch.cuda.empty_cache() eval_kwargs = {} if args.eval_options is None else args.eval_options # Deprecated efficient_test = eval_kwargs.get('efficient_test', False) if efficient_test: warnings.warn( '``efficient_test=True`` does not have effect in tools/test.py, ' 'the evaluation and format results are CPU memory efficient by ' 'default') eval_on_format_results = (args.eval is not None and 'cityscapes' in args.eval) if eval_on_format_results: assert len(args.eval) == 1, 'eval on format results is not ' \ 'applicable for metrics other than ' \ 'cityscapes' if args.format_only or eval_on_format_results: if 'imgfile_prefix' in eval_kwargs: tmpdir = eval_kwargs['imgfile_prefix'] else: tmpdir = '.format_cityscapes' eval_kwargs.setdefault('imgfile_prefix', tmpdir) mmcv.mkdir_or_exist(tmpdir) else: tmpdir = None if not distributed: warnings.warn( 'SyncBN is only supported with DDP. To be compatible with DP, ' 'we convert SyncBN to BN. Please use dist_train.sh which can ' 'avoid this error.') if not torch.cuda.is_available(): assert digit_version(mmcv.__version__) >= digit_version('1.4.4'), \ 'Please use MMCV >= 1.4.4 for CPU training!' model = revert_sync_batchnorm(model) model = MMDataParallel(model, device_ids=cfg.gpu_ids) results = single_gpu_test(model, data_loader, args.show, args.show_dir, False, args.opacity, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) results = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect, False, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) rank, _ = get_dist_info() if rank == 0: if args.out: warnings.warn( 'The behavior of ``args.out`` has been changed since MMSeg ' 'v0.16, the pickled outputs could be seg map as type of ' 'np.array, pre-eval results or file paths for ' '``dataset.format_results()``.') print(f'\nwriting results to {args.out}') mmcv.dump(results, args.out) if args.eval: eval_kwargs.update(metric=args.eval) metric = dataset.evaluate(results, **eval_kwargs) metric_dict = dict(config=args.config, metric=metric) mmcv.dump(metric_dict, json_file, indent=4) if tmpdir is not None and eval_on_format_results: # remove tmp dir when cityscapes evaluation shutil.rmtree(tmpdir)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.aug_test: # hard code index cfg.data.test.pipeline[1].img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 ] cfg.data.test.pipeline[1].flip = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint cfg.model.train_cfg = None model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') model.CLASSES = checkpoint['meta']['CLASSES'] model.PALETTE = checkpoint['meta']['PALETTE'] efficient_test = False if args.eval_options is not None: efficient_test = args.eval_options.get('efficient_test', False) if not distributed: #for concatenated (multi) image input. model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test_multi(model, data_loader, args.show, args.show_dir, args.show_original_dir, efficient_test) else: #currently did not support for multi image model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect, efficient_test) rank, _ = get_dist_info() if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: dataset.evaluate(outputs, args.eval, **kwargs)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.aug_test: # hard code index cfg.data.test.pipeline[1].img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 ] cfg.data.test.pipeline[1].flip = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # init the logger before other steps logger = None if args.eval: timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'test_{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # set random seeds if args.seed is not None: set_random_seed(args.seed, deterministic=args.deterministic) if logger is not None: logger.info(f'Set random seed to {args.seed}, deterministic: ' f'{args.deterministic}') else: print(f'Set random seed to {args.seed}, deterministic: ' f'{args.deterministic}') # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.val, dict(test_mode=True)) data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_segmentor(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') model.CLASSES = checkpoint['meta']['CLASSES'] model.PALETTE = checkpoint['meta']['PALETTE'] if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: dataset.evaluate(outputs, args.eval, logger, **kwargs)
def train_segmentor(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): """Launch segmentor training.""" logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed, drop_last=True) for ds in dataset ] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) if cfg.get('runner') is None: cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) runner = build_runner(cfg.runner, default_args=dict(model=model, batch_processor=None, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # register hooks runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # register eval hooks if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'. runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW') # user-defined hooks if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks assert isinstance(custom_hooks, list), \ f'custom_hooks expect list type, but got {type(custom_hooks)}' for hook_cfg in cfg.custom_hooks: assert isinstance(hook_cfg, dict), \ 'Each item in custom_hooks expects dict type, but got ' \ f'{type(hook_cfg)}' hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow)
def train_segmentor(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): """Launch segmentor training.""" logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed, drop_last=True) for ds in dataset ] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', True) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) # model.ddp = model else: model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # print(model) # build runner optimizer = build_optimizer(model, cfg.optimizer) if cfg.get('runner') is None: cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) runner = build_runner(cfg.runner, default_args=dict(model=model, batch_processor=None, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # print(cfg.optimizer) # print(cfg.optimizer_config) optimizer_config = OptimizerHookLW(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # register eval hooks if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow)
def train_segmentor(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): """Launch segmentor training.""" logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] # The default loader config loader_cfg = dict( # cfg.gpus will be ignored if distributed num_gpus=len(cfg.gpu_ids), dist=distributed, seed=cfg.seed, drop_last=True) # The overall dataloader settings loader_cfg.update({ k: v for k, v in cfg.data.items() if k not in [ 'train', 'val', 'test', 'train_dataloader', 'val_dataloader', 'test_dataloader' ] }) # The specific dataloader settings train_loader_cfg = {**loader_cfg, **cfg.data.get('train_dataloader', {})} data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: if not torch.cuda.is_available(): assert digit_version(mmcv.__version__) >= digit_version('1.4.4'), \ 'Please use MMCV >= 1.4.4 for CPU training!' model = MMDataParallel(model, device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) if cfg.get('runner') is None: cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) runner = build_runner(cfg.runner, default_args=dict(model=model, batch_processor=None, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # register hooks runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) if distributed: # when distributed training by epoch, using`DistSamplerSeedHook` to set # the different seed to distributed sampler for each epoch, it will # shuffle dataset at each epoch and avoid overfitting. if isinstance(runner, EpochBasedRunner): runner.register_hook(DistSamplerSeedHook()) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # register eval hooks if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) # The specific dataloader settings val_loader_cfg = { **loader_cfg, 'samples_per_gpu': 1, 'shuffle': False, # Not shuffle by default **cfg.data.get('val_dataloader', {}), } val_dataloader = build_dataloader(val_dataset, **val_loader_cfg) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'. runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW') # user-defined hooks if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks assert isinstance(custom_hooks, list), \ f'custom_hooks expect list type, but got {type(custom_hooks)}' for hook_cfg in cfg.custom_hooks: assert isinstance(hook_cfg, dict), \ 'Each item in custom_hooks expects dict type, but got ' \ f'{type(hook_cfg)}' hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) if cfg.resume_from is None and cfg.get('auto_resume'): resume_from = find_latest_checkpoint(cfg.work_dir) if resume_from is not None: cfg.resume_from = resume_from if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow)