def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir resume_file = "" # auto resume, scan existing file if os.path.exists(args.work_dir): work_dir_files = os.listdir(args.work_dir) work_dir_files = [ f for f in work_dir_files if f.endswith(".pth") and f.split('.')[0] != 'latest' ] if len(work_dir_files) != 0: work_dir_files = sorted( work_dir_files, key=lambda y: int(y.split('.')[0].split('_')[-1]), reverse=True) resume_file = work_dir_files[0] if args.resume_from is not None: cfg.resume_from = args.resume_from # auto resume if args.resume_from is None and resume_file != "": cfg.resume_from = os.path.join(args.work_dir, resume_file) print("Auto resume from {}".format(cfg.resume_from)) cfg.gpus = args.gpus if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * cfg.gpus / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # init logger before other steps logger = get_root_logger(cfg.log_level) logger.info('Distributed training: {}'.format(distributed)) logger.info('MMDetection Version: {}'.format(__version__)) logger.info('Config: {}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}'.format(args.seed)) set_random_seed(args.seed) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: datasets.append(build_dataset(cfg.data.val)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=args.validate, logger=logger)
wx, wy = w / 2 * math.cos(ag), w / 2 * math.sin(ag) hx, hy = -h / 2 * math.sin(ag), h / 2 * math.cos(ag) rect = plt.Rectangle((xc - wx - hx, yc - wy - hy), w, h, angle=ag / math.pi * 180, linewidth=1, facecolor='none', edgecolor='r') ax.add_patch(rect) ax.plot([xc], [yc], '+', c='g', markersize=1) cfg = Config.fromfile('../configs/r3det/r3det_r50_fpn_2x.py') datasets = build_dataset(cfg.data.train) data_loader = build_dataloader(datasets, 6, 0, dist=False) for data in data_loader: print(data.keys()) img_metas = data['img_metas'].data[0] img = data['img'].data[0] gt_bboxes = data['gt_bboxes'].data[0] gt_bboxes_ignore = data['gt_bboxes_ignore'].data[0] for i in range(len(img_metas)): fig = plt.figure(dpi=300) mean = img_metas[i]['img_norm_cfg']['mean'] std = img_metas[i]['img_norm_cfg']['std'] img_show = img[i].permute(1, 2, 0) * std[None, None, :] + mean[None, None, :]
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show, \ ('Please specify at least one operation (save/eval/format/show the ' 'results) with the argument "--out", "--eval", "--format_only" ' 'or "--show"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, imgs_per_gpu=cfg.data.imgs_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_module(model) # old versions did not save class info in checkpoints, this walkaround is # for backward compatibility if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES outputs = [] if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test( model, data_loader, args.show, args.eval if args.eval[0] == 'panoptic' else None) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test( model, data_loader, args.tmpdir, args.gpu_collect, args.eval if args.eval[0] == 'panoptic' else None) rank, _ = get_dist_info() if rank == 0: if args.out: print('\nwriting results to {}'.format(args.out)) mmcv.dump(outputs, args.out) kwargs = {} if args.options is None else args.options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: dataset.evaluate(outputs, args.eval, **kwargs)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # import modules from string list. if cfg.get('custom_imports', None): from mmcv.utils import import_modules_from_strings import_modules_from_strings(**cfg['custom_imports']) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # re-set gpu_ids with distributed training mode _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) # create work_dir print('Outputs will be saved in: ', cfg.work_dir) mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed meta['exp_name'] = osp.basename(args.config) model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg')) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # import modules from string list. if cfg.get('custom_imports', None): from mmcv.utils import import_modules_from_strings import_modules_from_strings(**cfg['custom_imports']) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # build the dataloader samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) # samples_per_gpu = 1 if samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_conv_bn(model) model = MMDataParallel(model, device_ids=[0]) model.eval() # the first several iterations may be very slow so skip them num_warmup = 5 pure_inf_time = 0 # benchmark with 2000 image and take the average for i, data in enumerate(data_loader): torch.cuda.synchronize() start_time = time.perf_counter() with torch.no_grad(): model(return_loss=False, rescale=True, **data) # mmcv.parallel.data_parallel.py torch.cuda.synchronize() elapsed = time.perf_counter() - start_time if i >= num_warmup: pure_inf_time += elapsed if (i + 1) % args.log_interval == 0: fps = (i + 1 - num_warmup) / pure_inf_time print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s') if (i + 1) == 2000: pure_inf_time += elapsed fps = (i + 1 - num_warmup) / pure_inf_time print(f'Overall fps: {fps:.1f} img / s') break
def train_detector(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] if 'imgs_per_gpu' in cfg.data: logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 'Please use "samples_per_gpu" instead') if 'samples_per_gpu' in cfg.data: logger.warning( f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' f'={cfg.data.imgs_per_gpu} is used in this experiments') else: logger.warning( 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' f'{cfg.data.imgs_per_gpu} in this experiments') cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed) for ds in dataset ] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = EpochBasedRunner(model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta) # an ugly workaround to make .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg, distributed=distributed) elif distributed and 'type' not in cfg.optimizer_config: optimizer_config = OptimizerHook(**cfg.optimizer_config) else: optimizer_config = cfg.optimizer_config # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) if distributed: runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_hook = DistEvalHook if distributed else EvalHook runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) cfg = compat_cfg(cfg) # in case the test dataset is concatenated samples_per_gpu = 1 if isinstance(cfg.data.test, dict): cfg.data.test.test_mode = True samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) if samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.test.pipeline = replace_ImageToTensor( cfg.data.test.pipeline) elif isinstance(cfg.data.test, list): for ds_cfg in cfg.data.test: ds_cfg.test_mode = True samples_per_gpu = max( [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test]) if samples_per_gpu > 1: for ds_cfg in cfg.data.test: ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline) # build the dataloader dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) if args.backend == 'onnxruntime': from mmdet.core.export.model_wrappers import ONNXRuntimeDetector model = ONNXRuntimeDetector(args.model, class_names=dataset.CLASSES, device_id=0) elif args.backend == 'tensorrt': from mmdet.core.export.model_wrappers import TensorRTDetector model = TensorRTDetector(args.model, class_names=dataset.CLASSES, device_id=0) model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, args.show_score_thr) if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: eval_kwargs = cfg.get('evaluation', {}).copy() # hard-code way to remove EvalHook args for key in [ 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 'rule' ]: eval_kwargs.pop(key, None) eval_kwargs.update(dict(metric=args.eval, **kwargs)) print(dataset.evaluate(outputs, **eval_kwargs))
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.dir is not None: if args.dir.startswith('//'): cfg.work_dir = args.dir[2:] else: localhost = get_localhost().split('.')[0] # results from server saved to /private if 'gpu' in localhost: output_dir = '/private/huangchenxi/mmdet/outputs' else: output_dir = 'work_dirs' if args.dir.endswith('-c'): args.dir = args.dir[:-2] args.resume_from = search_and_delete(os.path.join( output_dir, args.dir), prefix=cfg.work_dir, suffix=localhost) cfg.work_dir += time.strftime("_%m%d_%H%M") + '_' + localhost cfg.work_dir = os.path.join(output_dir, args.dir, cfg.work_dir) if args.workers_per_gpu != -1: cfg.data['workers_per_gpu'] = args.workers_per_gpu if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus if args.profiler or args.speed: cfg.data.imgs_per_gpu = 1 if cfg.resume_from or cfg.load_from: cfg.model['pretrained'] = None if args.test: cfg.data.train['ann_file'] = cfg.data.val['ann_file'] cfg.data.train['img_prefix'] = cfg.data.val['img_prefix'] # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False num_gpus = args.gpus rank = 0 else: distributed = True init_dist(args.launcher, **cfg.dist_params) num_gpus = torch.cuda.device_count() rank, _ = get_dist_info() if cfg.optimizer['type'] == 'SGD': cfg.optimizer['lr'] *= num_gpus * cfg.data.imgs_per_gpu / 256 else: cfg.optimizer['lr'] *= ((num_gpus / 8) * (cfg.data.imgs_per_gpu / 2)) # init logger before other steps logger = get_root_logger(nlogger, cfg.log_level) if rank == 0: logger.set_logger_dir(cfg.work_dir, 'd') logger.info("Config: ------------------------------------------\n" + cfg.text) logger.info('Distributed training: {}'.format(distributed)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}'.format(args.seed)) set_random_seed(args.seed) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) if rank == 0: # describe_vars(model) writer = set_writer(cfg.work_dir) # try: # # describe_features(model.backbone) # writer.add_graph(model, torch.zeros((1, 3, 800, 800))) # except (NotImplementedError, TypeError): # logger.warn("Add graph failed.") # except Exception as e: # logger.warn("Add graph failed:", e) if not args.graph and not args.profiler and not args.speed: if distributed: model = MMDistributedDataParallel(model.cuda()) else: model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() if isinstance(cfg.data.train, list): for t in cfg.data.train: logger.info("loading training set: " + str(t.ann_file)) train_dataset = [build_dataset(t) for t in cfg.data.train] CLASSES = train_dataset[0].CLASSES else: logger.info("loading training set: " + str(cfg.data.train.ann_file)) train_dataset = build_dataset(cfg.data.train) logger.info("{} images loaded!".format(len(train_dataset))) CLASSES = train_dataset.CLASSES if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=CLASSES) # add an attribute for visualization convenience if hasattr(model, 'module'): model.module.CLASSES = CLASSES else: model.CLASSES = CLASSES train_detector(model, train_dataset, cfg, distributed=distributed, validate=args.validate, logger=logger, runner_attr_dict={'task_name': args.dir}) else: from mmcv.runner.checkpoint import load_checkpoint from mmdet.datasets import build_dataloader from mmdet.core.utils.model_utils import register_hooks from mmdet.apis.train import parse_losses model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() if args.profiler == 'test' or args.speed == 'test': model.eval() dataset = build_dataset(cfg.data.test) else: model.train() dataset = build_dataset(cfg.data.train) if cfg.load_from and (args.profiler or args.speed): logger.info('load checkpoint from %s', cfg.load_from) load_checkpoint(model, cfg.load_from, map_location='cpu', strict=True) data_loader = build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, cfg.gpus, dist=False, shuffle=False) if args.graph: id_dict = {} for name, parameter in model.named_parameters(): id_dict[id(parameter)] = name for i, data_batch in enumerate(data_loader): if args.graph: outputs = model(**data_batch) loss, log_vars = parse_losses(outputs) get_dot = register_hooks(loss, id_dict) loss.backward() dot = get_dot() dot.save('graph.dot') break elif args.profiler: with torch.autograd.profiler.profile(use_cuda=True) as prof: if args.profiler == 'train': outputs = model(**data_batch) loss, log_vars = parse_losses(outputs) loss.backward() else: with torch.no_grad(): model(**data_batch, return_loss=False) if i == 20: prof.export_chrome_trace('./trace.json') logger.info(prof) break elif args.speed: if args.speed == 'train': start = time.perf_counter() outputs = model(**data_batch) loss, log_vars = parse_losses(outputs) loss.backward() torch.cuda.synchronize() end = time.perf_counter() else: start = time.perf_counter() with torch.no_grad(): model(**data_batch, return_loss=False) end = time.perf_counter() logger.info("{:.3f} s/iter, {:.1f} iters/s".format( end - start, 1. / (end - start)))
def main(): args = parse_args() cfg = Config.fromfile(args.config) # update data root according to MMDET_DATASETS update_data_root(cfg) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) if args.auto_scale_lr: if 'auto_scale_lr' in cfg and \ 'enable' in cfg.auto_scale_lr and \ 'base_batch_size' in cfg.auto_scale_lr: cfg.auto_scale_lr.enable = True else: warnings.warn('Can not find "auto_scale_lr" or ' '"auto_scale_lr.enable" or ' '"auto_scale_lr.base_batch_size" in your' ' configuration file. Please update all the ' 'configuration files to mmdet >= 2.24.1.') # set multi-process settings setup_multi_processes(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.auto_resume = args.auto_resume if args.gpus is not None: cfg.gpu_ids = range(1) warnings.warn('`--gpus` is deprecated because we only support ' 'single GPU mode in non-distributed training. ' 'Use `gpus=1` now.') if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids[0:1] warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. ' 'Because we only support single GPU mode in ' 'non-distributed training. Use the first GPU ' 'in `gpu_ids` now.') if args.gpus is None and args.gpu_ids is None: cfg.gpu_ids = [args.gpu_id] # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # re-set gpu_ids with distributed training mode _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') cfg.device = get_device() # set random seeds seed = init_random_seed(args.seed, device=cfg.device) seed = seed + dist.get_rank() if args.diff_seed else seed logger.info(f'Set random seed to {seed}, ' f'deterministic: {args.deterministic}') set_random_seed(seed, deterministic=args.deterministic) cfg.seed = seed meta['seed'] = seed meta['exp_name'] = osp.basename(args.config) model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg')) model.init_weights() datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def evaluate_model(model_name, paper_arxiv_id, file_id, weights_name, paper_results, config): print('---') print('Now Evaluating %s' % model_name) evaluator = COCOEvaluator(root='./.data/vision/coco', model_name=model_name, paper_arxiv_id=paper_arxiv_id, paper_results=paper_results) out = 'results.pkl' launcher = 'none' if out is not None and not out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(config) cfg.data.test[ 'ann_file'] = './.data/vision/coco/annotations/instances_val2017.json' cfg.data.test['img_prefix'] = './.data/vision/coco/val2017/' # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if launcher == 'none': distributed = False else: distributed = True init_dist(launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) destination = '%s/.cache/torch/' % (str(Path.home())) download_file_from_google_drive(file_id, destination, filename=weights_name) local_checkpoint = os.path.join(destination, weights_name) print(local_checkpoint) # '/home/ubuntu/GCNet/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth' checkpoint = load_checkpoint(model, local_checkpoint, map_location='cpu') # old versions did not save class info in checkpoints, this walkaround is # for backward compatibility if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES evaluator.reset_time() if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs, cache_exists = single_gpu_test(model, data_loader, False, evaluator) else: model = MMDistributedDataParallel(model.cuda()) outputs = multi_gpu_test(model, data_loader, args.tmpdir) if cache_exists: print('Cache exists: %s' % (evaluator.batch_hash)) evaluator.save() else: rank, _ = get_dist_info() if out and rank == 0: print('\nwriting results to {}'.format(out)) mmcv.dump(outputs, out) eval_types = ['bbox'] if eval_types: print('Starting evaluate {}'.format(' and '.join(eval_types))) if eval_types == ['proposal_fast']: result_file = out else: if not isinstance(outputs[0], dict): result_files = dataset.results2json(outputs, out) else: for name in outputs[0]: print('\nEvaluating {}'.format(name)) outputs_ = [out[name] for out in outputs] result_file = out + '.{}'.format(name) result_files = dataset.results2json( outputs_, result_file) anns = json.load(open(result_files['bbox'])) evaluator.detections = [] evaluator.add(anns) evaluator.save()
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show, \ ('Please specify at least one operation (save/eval/format/show the ' 'results) with the argument "--out", "--eval", "--format_only" ' 'or "--show"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') # old versions did not save class info in checkpoints, this walkaround is # for backward compatibility if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES # 计算需要剪枝的变量个数total model.cuda() total = 0 for m in model.backbone.modules(): if isinstance(m, nn.BatchNorm2d): total += m.weight.data.shape[0] # 确定剪枝的全局阈值 bn = torch.zeros(total) index = 0 for m in model.backbone.modules(): if isinstance(m, nn.BatchNorm2d): size = m.weight.data.shape[0] bn[index:(index + size)] = m.weight.data.abs().clone() index += size # 按照权值大小排序 y, i = torch.sort(bn) thre_index = int(total * args.percent) # 确定要剪枝的阈值 thre = y[thre_index].cuda() # ********************************预剪枝*********************************# pruned = 0 cfg_ori = [] cfg = [] cfg_mask = [] model_backbone = list(model.backbone.modules()) for layer_id, m in enumerate(model_backbone): if isinstance(m, nn.BatchNorm2d): weight_copy = m.weight.data.abs().clone() if isinstance(model_backbone[layer_id + 1], channel_selection): mask = torch.ones(weight_copy.shape[0]).cuda() else: # 要保留的通道标记Mask图 mask = weight_copy.gt(thre).float().cuda() # 要保留的通道标记Mask图 pruned = pruned + mask.shape[0] - torch.sum(mask) # m.weight.data.mul_(mask) # m.bias.data.mul_(mask) cfg.append(int(torch.sum(mask))) cfg_ori.append(mask.shape[0]) cfg_mask.append(mask.clone()) print( 'layer index: {:d} \t total channel: {:d} \t remaining channel: {:d}' .format(layer_id, mask.shape[0], int(torch.sum(mask)))) pruned_ratio = pruned / total print("剪枝比例:") print(pruned_ratio) print('Pre-processing Successful!') print('cfg:') print(cfg) # ******************************* 正式剪枝 ********************************# # 每个阶的最一层不剪枝 newmodel = copy.deepcopy(model) newmodel.backbone = PResNet(depth=101, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), style='pytorch', cfg=cfg) newmodel.cuda() # print(newmodel.backbone) num_parameters = sum([param.nelement() for param in newmodel.parameters()]) savepath = os.path.join(args.save_path, "prune.txt") # with open(savepath, "w") as fp: # fp.write("Configuration: \n" + str(cfg) + "\n") # fp.write("Number of parameters: \n" + str(num_parameters) + "\n") # fp.write("Test accuracy: \n" + str(acc)) old_modules = list(model.backbone.modules()) new_modules = list(newmodel.backbone.modules()) layer_id_in_cfg = 0 start_mask = torch.ones(3) end_mask = cfg_mask[layer_id_in_cfg] conv_count = 0 # downsample_conv_list = [17, 48, 88, 299] downsample_conv_list = [17, 49, 90, 302] for layer_id, m0 in enumerate(old_modules): # m0 = old_modules[layer_id] # print('m0:') # print(m0) m1 = new_modules[layer_id] # print('m1:') # print(m1) if isinstance(m0, nn.BatchNorm2d): idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy()))) if idx1.size == 1: idx1 = np.resize(idx1, (1, )) if isinstance(old_modules[layer_id + 1], channel_selection): # If the next layer is the channel selection layer, then the current batchnorm 2d layer won't be pruned. m1.weight.data = m0.weight.data.clone() m1.bias.data = m0.bias.data.clone() m1.running_mean = m0.running_mean.clone() m1.running_var = m0.running_var.clone() # We need to set the channel selection layer. m2 = new_modules[layer_id + 1] m2.indexes.data.zero_() m2.indexes.data[idx1.tolist()] = 1.0 layer_id_in_cfg += 1 start_mask = end_mask.clone() if layer_id_in_cfg < len(cfg_mask): end_mask = cfg_mask[layer_id_in_cfg] else: m1.weight.data = m0.weight.data[idx1.tolist()].clone() m1.bias.data = m0.bias.data[idx1.tolist()].clone() m1.running_mean = m0.running_mean[idx1.tolist()].clone() m1.running_var = m0.running_var[idx1.tolist()].clone() layer_id_in_cfg += 1 start_mask = end_mask.clone() if layer_id_in_cfg < len( cfg_mask): # do not change in Final FC end_mask = cfg_mask[layer_id_in_cfg] elif isinstance(m0, nn.Conv2d): if conv_count == 0: m1.weight.data = m0.weight.data.clone() conv_count += 1 continue if layer_id in downsample_conv_list: # We need to consider the case where there are downsampling convolutions. # For these convolutions, we just copy the weights. m1.weight.data = m0.weight.data.clone() continue if isinstance(old_modules[layer_id + 1], nn.BatchNorm2d): # This convers the convolutions in the residual block. # The convolutions are either after the channel selection layer or after the batch normalization layer. idx0 = np.squeeze( np.argwhere(np.asarray(start_mask.cpu().numpy()))) idx1 = np.squeeze( np.argwhere(np.asarray(end_mask.cpu().numpy()))) print('In shape: {:d}, Out shape {:d}.'.format( idx0.size, idx1.size)) if idx0.size == 1: idx0 = np.resize(idx0, (1, )) if idx1.size == 1: idx1 = np.resize(idx1, (1, )) w1 = m0.weight.data[:, idx0.tolist(), :, :].clone() # If the current convolution is not the last convolution in the residual block, then we can change the # number of output channels. Currently we use `conv_count` to detect whether it is such convolution. # if conv_count % 3 != 0: w1 = w1[idx1.tolist(), :, :, :].clone() m1.weight.data = w1.clone() conv_count += 1 continue elif isinstance(m0, nn.Linear): idx0 = np.squeeze(np.argwhere(np.asarray( start_mask.cpu().numpy()))) if idx0.size == 1: idx0 = np.resize(idx0, (1, )) m1.weight.data = m0.weight.data[:, idx0].clone() m1.bias.data = m0.bias.data.clone() # torch.save({'cfg': cfg, 'state_dict': newmodel.state_dict()}, os.path.join(args.save_path, 'pruned.pth.tar')) # torch.save(newmodel.state_dict(), os.path.join(args.save_path, 'pruned.pth')) print(newmodel) torch.save(newmodel, os.path.join(args.save_path, 'pruned.pth')) # print(newmodel) if not distributed: newmodel = MMDataParallel(newmodel, device_ids=[0]) newmodel = single_gpu_test(newmodel, data_loader, args.show) else: newmodel = MMDistributedDataParallel( newmodel.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(newmodel, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() if rank == 0: if args.out: print('\nwriting results to {}'.format(args.out)) mmcv.dump(outputs, args.out) kwargs = {} if args.options is None else args.options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: dataset.evaluate(outputs, args.eval, **kwargs)
# test_img = '/home/chen/ai-competition/global_wheat_detection/test/51b3e36ab.jpg' # model = init_detector(config, checkpoint, device='cuda:0') # result = inference_detector(model, test_img) # show_result_pyplot(model,test_img,result,score_thr=0.3) cfg = Config.fromfile(config) cfg.data.test.test_mode = True distributed = False #set device cpu or gpu device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') #build dataset dataset = build_dataset(cfg.data.test) print(cfg.data.test) #build dataloader data_loader = build_dataloader(dataset, samples_per_gpu=1, workers_per_gpu=1, dist=distributed, shuffle=False) #build detector model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) #load weights checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') # 'cuda:0'
def main(**kwargs): args = parse_args() for k, v in kwargs.items(): args.__setattr__(k, v) assert args.out or args.show or args.json_out, \ ('Please specify at least one operation (save or show the results) ' 'with the argument "--out" or "--show" or "--json_out"') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') if args.json_out is not None and args.json_out.endswith('.json'): args.json_out = args.json_out[:-5] if isinstance(args.config, str): cfg = mmcv.Config.fromfile(args.config) else: cfg = args.config # have first model? first_model = None if 'first_model_cfg' in cfg._cfg_dict and cfg.first_model_cfg is not None: first_code_py = import_module(cfg.first_code_py) first_model = first_code_py.Inference(cfg.first_model_cfg, cfg.first_model_path) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None if 'val' in cfg.data: cfg.data.val.test_mode = True if 'test' in cfg.data: cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) if args.mode == 'val': dataset = build_dataset(cfg.data.val) else: dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, imgs_per_gpu=args.imgs_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') # old versions did not save class info in checkpoints, this walkaround is # for backward compatibility if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs, result_times = single_gpu_test(model, data_loader, args.show, first_model=first_model) else: model = MMDistributedDataParallel(model.cuda()) outputs, result_times = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() if args.out and rank == 0: print('\nwriting results to {}'.format(args.out)) mmcv.dump(outputs, args.out) eval_types = args.eval if eval_types: print('Starting evaluate {}'.format(' and '.join(eval_types))) if eval_types == ['proposal_fast']: result_file = args.out coco_eval(result_file, eval_types, dataset.coco, classwise=True) else: if not isinstance(outputs[0], dict): result_files = results2json(dataset, outputs, args.out) coco_eval(result_files, eval_types, dataset.coco, classwise=True) else: for name in outputs[0]: print('\nEvaluating {}'.format(name)) outputs_ = [out[name] for out in outputs] result_file = args.out + '.{}'.format(name) result_files = results2json(dataset, outputs_, result_file) coco_eval(result_files, eval_types, dataset.coco, classwise=True) # Save predictions in the COCO json format defect_test_results = {} if args.json_out and rank == 0: if not isinstance(outputs[0], dict): result_files = results2json(dataset, outputs, args.json_out) eval_types = args.eval if eval_types: print('Starting evaluate {}'.format(' and '.join(eval_types))) if 'ignore_ids' not in cfg.data[args.mode]: cfg.data[args.mode]['ignore_ids'] = None ignore_ids = cfg.data[args.mode]['ignore_ids'] coco_result = coco_eval(result_files, eval_types, dataset.coco, classwise=True, ignore_ids=ignore_ids) threshold = cfg.test_cfg['rcnn']['score_thr'] defect_rst = defect_eval(result_files['bbox'], dataset.coco.dataset, result_times, threshold=threshold) defect_test_results.update(log='\n'.join([coco_result['bbox']['log'], defect_rst['log']])) defect_test_results.update(coco_result=coco_result['bbox']['data']) defect_test_results.update(defect_result=defect_rst['data']) else: for name in outputs[0]: outputs_ = [out[name] for out in outputs] result_file = args.json_out + '.{}'.format(name) results2json(dataset, outputs_, result_file) return defect_test_results
def main(): # os.environ["CUDA_VISIBLE_DEVICES"] = "1" args = parse_args() assert args.out or args.show, \ ('Please specify at least one operation (save or show the results) ' 'with the argument "--out" or "--show"') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') # old versions did not save class info in checkpoints, this walkaround is # for backward compatibility if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show) else: model = MMDistributedDataParallel(model.cuda()) outputs = multi_gpu_test(model, data_loader, args.tmpdir) rank, _ = get_dist_info() if args.out and rank == 0: print('\nwriting results to {}'.format(args.out)) mmcv.dump(outputs, args.out) result_file = args.out # args = parser.parse_args() # cfg = mmcv.Config.fromfile(args.config) # test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets) # txt_eval(args.result, test_dataset, args.iou_thr) txt_eval(result_file, dataset, iou_thr=args.iou_thr)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # import modules from string list. if cfg.get('custom_imports', None): from mmcv.utils import import_modules_from_strings import_modules_from_strings(**cfg['custom_imports']) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None if cfg.model.get('neck'): if isinstance(cfg.model.neck, list): for neck_cfg in cfg.model.neck: if neck_cfg.get('rfp_backbone'): if neck_cfg.rfp_backbone.get('pretrained'): neck_cfg.rfp_backbone.pretrained = None elif cfg.model.neck.get('rfp_backbone'): if cfg.model.neck.rfp_backbone.get('pretrained'): cfg.model.neck.rfp_backbone.pretrained = None # in case the test dataset is concatenated if isinstance(cfg.data.test, dict): cfg.data.test.test_mode = True elif isinstance(cfg.data.test, list): for ds_cfg in cfg.data.test: ds_cfg.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) if samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_conv_bn(model) # old versions did not save class info in checkpoints, this walkaround is # for backward compatibility if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES listdist = [] if not distributed: model = MMDataParallel(model, device_ids=args.gpu_ids) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, args.show_score_thr) else: model = MMDistributedDataParallel(model.cuda(), device_ids=args.gpu_ids, broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: eval_kwargs = cfg.get('evaluation', {}).copy() # hard-code way to remove EvalHook args for key in ['interval', 'tmpdir', 'start', 'gpu_collect']: eval_kwargs.pop(key, None) eval_kwargs.update(dict(metric=args.eval, **kwargs)) print(dataset.evaluate(outputs, **eval_kwargs))
def main(): args = parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * cfg.gpus / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict( mmdet_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector( model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def _dist_train(model, dataset, cfg, validate=False, logger=None, timestamp=None, meta=None): # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader(ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True, seed=cfg.seed) for ds in dataset ] # put model on gpus model = MMDistributedDataParallel(model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, batch_processor, optimizer, cfg.work_dir, logger=logger, meta=meta) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=True, shuffle=False) eval_cfg = cfg.get('evaluation', {}) runner.register_hook(DistEvalHook(val_dataloader, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
# config_file = 'configs/pascal_voc/faster_rcnn_r101_fpn_1x_voc0712.py' # checkpoint_file = 'work_dirs/faster_rcnn_r101_fpn_1x_voc0712/epoch_4.pth' # syn_weights = '/raid/mun/codes/zero_shot_detection/cvpr18xian_pascal_voc/checkpoints/classifier_best.pth' score_thr = 0.34 try: os.makedirs('det_results/coco') os.makedirs(f'det_results/coco_{score_thr}') os.makedirs('det_results/voc') except OSError: pass # import pdb; pdb.set_trace() model = init_detector(config_file, checkpoint_file, device='cuda:0') cfg = Config.fromfile(config_file) dataset = build_dataset(cfg.data.test, {'test_mode': True}) # copy_syn_weights(syn_weights, model) copy_synthesised_weights(model, syn_weights, 'coco', split='65_15') root = '/raid/mun/codes/data/coco2014/val2014' # df = pd.read_csv('../MSCOCO/validation_coco_unseen_all.csv', header=None) # file_names = np.unique(df.iloc[:, 0].values) # files_path = [f"{root}{file_name}" for file_name in file_names] # files_path = np.array(files_path) # img_infos # for idx, img in enumerate(files_path[:1000]): # import pdb; pdb.set_trace() import random # color = "%06x" % random.randint(0, 0xFFFFFF) from splits import COCO_ALL_CLASSES color_map = {label: (random.randint(0, 255), random.randint(120, 255), random.randint(200, 255)) for label in COCO_ALL_CLASSES} # det_results = mmcv.load('gen_coco_results.pkl')
def main(): args = parse_args() # assert args.show or args.json_out, \ # ('Please specify at least one operation (save or show the results) ' # 'with the argument "--out" or "--show" or "--json_out"') if args.json_out is not None and args.json_out.endswith('.json'): args.json_out = args.json_out[:-5] cfg = mmcv.Config.fromfile(args.config) checkpoint_file = args.checkpoint if not checkpoint_file: def _epoch_num(name): return int( re.findall('epoch_[0-9]*.pth', name)[0].replace('epoch_', '').replace('.pth', '')) pths = sorted(glob.glob(os.path.join(cfg.work_dir, 'epoch_*.pth')), key=_epoch_num) if len(pths) > 0: print("Found {}, use it as checkpoint by default.".format( pths[-1])) checkpoint_file = pths[-1] if not checkpoint_file: raise ValueError("Checkpoints not found, check work_dir non empty.") # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=args.shuffle) # TODO: hack shuffle True # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, checkpoint_file, map_location='cpu') # old versions did not save class info in checkpoints, this walkaround is # for backward compatibility if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES num_evals = args.num_evals if num_evals < 0: num_evals = len(data_loader) if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, num_evals, args.show) else: model = MMDistributedDataParallel(model.cuda()) outputs = multi_gpu_test(model, data_loader, num_evals, args.tmpdir) rank, _ = get_dist_info() if rank == 0: gt_bboxes, gt_labels, gt_ignore, dataset_name = get_pascal_gts(dataset) print('\nStarting evaluate {}'.format(dataset_name)) eval_map(outputs, gt_bboxes, gt_labels, gt_ignore, scale_ranges=None, iou_thr=0.5, dataset=dataset_name, print_summary=True) # Save predictions in the COCO json format if args.json_out and rank == 0: if not isinstance(outputs[0], dict): results2json(dataset, outputs, args.json_out) else: for name in outputs[0]: outputs_ = [out[name] for out in outputs] result_file = args.json_out + '.{}'.format(name) results2json(dataset, outputs_, result_file)
def train_detector(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] if 'imgs_per_gpu' in cfg.data: logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 'Please use "samples_per_gpu" instead') if 'samples_per_gpu' in cfg.data: logger.warning( f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' f'={cfg.data.imgs_per_gpu} is used in this experiments') else: logger.warning( 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' f'{cfg.data.imgs_per_gpu} in this experiments') cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed) for ds in dataset ] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: model = MMDataParallel( model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) if 'runner' not in cfg: cfg.runner = { 'type': 'EpochBasedRunner', 'max_epochs': cfg.total_epochs } warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) else: if 'total_epochs' in cfg: assert cfg.total_epochs == cfg.runner.max_epochs runner = build_runner( cfg.runner, default_args=dict( model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # an ugly workaround to make .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook( **cfg.optimizer_config, **fp16_cfg, distributed=distributed) elif distributed and 'type' not in cfg.optimizer_config: optimizer_config = OptimizerHook(**cfg.optimizer_config) else: optimizer_config = cfg.optimizer_config # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) if distributed: if isinstance(runner, EpochBasedRunner): runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: # Support batch_size > 1 in validation val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1) if val_samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.val.pipeline = replace_ImageToTensor( cfg.data.val.pipeline) val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=val_samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) # user-defined hooks if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks assert isinstance(custom_hooks, list), \ f'custom_hooks expect list type, but got {type(custom_hooks)}' for hook_cfg in cfg.custom_hooks: assert isinstance(hook_cfg, dict), \ 'Each item in custom_hooks expects dict type, but got ' \ f'{type(hook_cfg)}' hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) # runner.run(data_loaders, cfg.workflow) anchor_generator = build_anchor_generator(cfg.model.rpn_head.anchor_generator) assigner = build_assigner(cfg.model.train_cfg.rpn.assigner) total_num_targets = torch.tensor([0] * 5) for iteration, data in enumerate(data_loaders): for i in data: # print(i.keys()) img_metas = i['img_metas']._data # print(img_metas) num_imgs = len(img_metas) images = i['img']._data gt_bboxes = i['gt_bboxes']._data h, w = images[0].size()[-2:] features_shape = [] for i in range(2, 7): f_shape = [int(h/(2**i)), int(w/(2**i))] features_shape.append(f_shape) multi_level_anchors = anchor_generator.grid_anchors( features_shape) anchor_list = [multi_level_anchors for _ in range(num_imgs)] # for each image, we compute valid flags of multi level anchors valid_flag_list = [] for img_id, img_meta in enumerate(img_metas): multi_level_flags = anchor_generator.valid_flags( features_shape, img_meta[0]['pad_shape']) valid_flag_list.append(multi_level_flags) # print(anchor_list, valid_flag_list) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors to a single tensor concat_anchor_list = [] concat_valid_flag_list = [] for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) concat_anchor_list.append(torch.cat(anchor_list[i])) concat_valid_flag_list.append(torch.cat(valid_flag_list[i])) gt_bboxes_ignore_list= None # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] inside_flags = anchor_inside_flags(concat_anchor_list[0], concat_valid_flag_list[0], img_metas[0][0]['img_shape'][:2], 0) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = concat_anchor_list[0][inside_flags, :] assign_result = assigner.assign( anchors.cpu(), gt_bboxes[0][0], gt_bboxes_ignore_list[0], None) print(assign_result.pos_gt_bboxes) pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) labels = anchors.new_full((anchors.shape[0], ), -1, dtype=torch.long) labels[pos_inds] = 1 num_total_anchors = concat_anchor_list[0].size(0) labels = unmap( labels, num_total_anchors, inside_flags, fill=-1) # fill bg label match_results = images_to_levels([labels], num_level_anchors) # print(match_results) for idx, match_result in enumerate(match_results): num = torch.where(match_result==1)[0].numel() total_num_targets[idx] += num # print(total_num_targets) print(total_num_targets) print(total_num_targets)
def train_detector(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): logger = get_root_logger(log_level=cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] if 'imgs_per_gpu' in cfg.data: logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 'Please use "samples_per_gpu" instead') if 'samples_per_gpu' in cfg.data: logger.warning( f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' f'={cfg.data.imgs_per_gpu} is used in this experiments') else: logger.warning( 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' f'{cfg.data.imgs_per_gpu} in this experiments') cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu runner_type = 'EpochBasedRunner' if 'runner' not in cfg else cfg.runner[ 'type'] data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # `num_gpus` will be ignored if distributed num_gpus=len(cfg.gpu_ids), dist=distributed, seed=cfg.seed, runner_type=runner_type, persistent_workers=cfg.data.get('persistent_workers', False)) for ds in dataset ] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) if 'runner' not in cfg: cfg.runner = { 'type': 'EpochBasedRunner', 'max_epochs': cfg.total_epochs } warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) else: if 'total_epochs' in cfg: assert cfg.total_epochs == cfg.runner.max_epochs runner = build_runner(cfg.runner, default_args=dict(model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # an ugly workaround to make .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg, distributed=distributed) elif distributed and 'type' not in cfg.optimizer_config: optimizer_config = OptimizerHook(**cfg.optimizer_config) else: optimizer_config = cfg.optimizer_config # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None), custom_hooks_config=cfg.get( 'custom_hooks', None)) if distributed: if isinstance(runner, EpochBasedRunner): runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: # Support batch_size > 1 in validation val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1) if val_samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.val.pipeline = replace_ImageToTensor( cfg.data.val.pipeline) val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=val_samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'. runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW') resume_from = None if cfg.resume_from is None and cfg.get('auto_resume'): resume_from = find_latest_checkpoint(cfg.work_dir) if resume_from is not None: cfg.resume_from = resume_from if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow)
def main(): args = parse_args() args = prepare_data_on_modelarts(args) cfg = Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: pass distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES try: train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta) except Exception: print('training interrupted in advance') finally: # 将work_dir复制到obs中 target_dir = os.path.join(args.train_url, 'work_dir') if not os.path.exists(target_dir): os.makedirs(target_dir) mox.file.copy_parallel(args.work_dir, target_dir)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * cfg.gpus / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('MMDetection Version: {}'.format(__version__)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: datasets.append(build_dataset(cfg.data.val)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = Config.fromfile(args.config) # update data root according to MMDET_DATASETS update_data_root(cfg) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) cfg = compat_cfg(cfg) # set multi-process settings setup_multi_processes(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if 'pretrained' in cfg.model: cfg.model.pretrained = None elif 'init_cfg' in cfg.model.backbone: cfg.model.backbone.init_cfg = None if cfg.model.get('neck'): if isinstance(cfg.model.neck, list): for neck_cfg in cfg.model.neck: if neck_cfg.get('rfp_backbone'): if neck_cfg.rfp_backbone.get('pretrained'): neck_cfg.rfp_backbone.pretrained = None elif cfg.model.neck.get('rfp_backbone'): if cfg.model.neck.rfp_backbone.get('pretrained'): cfg.model.neck.rfp_backbone.pretrained = None if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids[0:1] warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. ' 'Because we only support single GPU mode in ' 'non-distributed testing. Use the first GPU ' 'in `gpu_ids` now.') else: cfg.gpu_ids = [args.gpu_id] cfg.device = get_device() # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) test_dataloader_default_args = dict( samples_per_gpu=1, workers_per_gpu=2, dist=distributed, shuffle=False) # in case the test dataset is concatenated if isinstance(cfg.data.test, dict): cfg.data.test.test_mode = True if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.test.pipeline = replace_ImageToTensor( cfg.data.test.pipeline) elif isinstance(cfg.data.test, list): for ds_cfg in cfg.data.test: ds_cfg.test_mode = True if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1: for ds_cfg in cfg.data.test: ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline) test_loader_cfg = { **test_dataloader_default_args, **cfg.data.get('test_dataloader', {}) } rank, _ = get_dist_info() # allows not to create if args.work_dir is not None and rank == 0: mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) json_file = osp.join(args.work_dir, f'eval_{timestamp}.json') # build the dataloader dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, **test_loader_cfg) # build the model and load checkpoint cfg.model.train_cfg = None model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_conv_bn(model) # old versions did not save class info in checkpoints, this walkaround is # for backward compatibility if 'CLASSES' in checkpoint.get('meta', {}): model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES if not distributed: model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, args.show_score_thr) else: model = build_ddp( model, cfg.device, device_ids=[int(os.environ['LOCAL_RANK'])], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: eval_kwargs = cfg.get('evaluation', {}).copy() # hard-code way to remove EvalHook args for key in [ 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 'rule', 'dynamic_intervals' ]: eval_kwargs.pop(key, None) eval_kwargs.update(dict(metric=args.eval, **kwargs)) metric = dataset.evaluate(outputs, **eval_kwargs) print(metric) metric_dict = dict(config=args.config, metric=metric) if args.work_dir is not None and rank == 0: mmcv.dump(metric_dict, json_file)
def main(): args = parse_args() assert args.out or args.show or args.json_out, \ ('Please specify at least one operation (save or show the results) ' 'with the argument "--out" or "--show" or "--json_out"') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') if args.json_out is not None and args.json_out.endswith('.json'): args.json_out = args.json_out[:-5] cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') # old versions did not save class info in checkpoints, this walkaround is # for backward compatibility if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show) else: model = MMDistributedDataParallel(model.cuda()) outputs = multi_gpu_test(model, data_loader, args.tmpdir) rank, _ = get_dist_info() if args.out and rank == 0: print('\nwriting results to {}'.format(args.out)) mmcv.dump(outputs, args.out) eval_types = args.eval if eval_types: print('Starting evaluate {}'.format(' and '.join(eval_types))) if eval_types == ['proposal_fast']: result_file = args.out coco_eval(result_file, eval_types, dataset.coco) else: if not isinstance(outputs[0], dict): result_files = results2json(dataset, outputs, args.out) coco_eval(result_files, eval_types, dataset.coco) else: for name in outputs[0]: print('\nEvaluating {}'.format(name)) outputs_ = [out[name] for out in outputs] result_file = args.out + '.{}'.format(name) result_files = results2json(dataset, outputs_, result_file) coco_eval(result_files, eval_types, dataset.coco) # Save predictions in the COCO json format if args.json_out and rank == 0: if not isinstance(outputs[0], dict): results2json(dataset, outputs, args.json_out) else: for name in outputs[0]: outputs_ = [out[name] for out in outputs] result_file = args.json_out + '.{}'.format(name) results2json(dataset, outputs_, result_file)
def main(args: argparse.Namespace, wandb_init_cfg: dict) -> int: """ Main """ """ Configuration """ # Random seed set_random_seed(args.seed) # MMDetection config cfg = Config.fromfile(args.config) # Paths and directories to save results if cfg.resume_from is not None: exp_dir = os.path.dirname(cfg.resume_from) elif wandb_init_cfg['name'] is None or wandb_init_cfg['use_sweep']: exp_dir = os.path.join(args.save_dir, r'_NEW_EXPERIMENT') os.makedirs(exp_dir, exist_ok=True) else: exp_dir = os.path.join(args.save_dir, wandb_init_cfg['name']) try: os.makedirs(exp_dir, exist_ok=False) except OSError as err: print( f'[ERROR: {err}] The directory exists. Check the directory to save checkpoints.' ) return 1 """ MMDetection """ cfg.seed = args.seed cfg.gpu_ids = [0] # W&B cfg.log_config.hooks[1].init_kwargs.project = wandb_init_cfg['project'] cfg.log_config.hooks[1].init_kwargs.entity = wandb_init_cfg['entity'] cfg.log_config.hooks[1].init_kwargs.group = wandb_init_cfg['group'] cfg.log_config.hooks[1].init_kwargs.job_type = wandb_init_cfg['job_type'] cfg.log_config.hooks[1].init_kwargs.tags = wandb_init_cfg['tags'] cfg.log_config.hooks[1].init_kwargs.name = wandb_init_cfg['name'] cfg.log_config.hooks[1].init_kwargs.notes = wandb_init_cfg['notes'] if wandb_init_cfg['use_sweep']: cfg.checkpoint_config.max_keep_ckpts = 1 cfg.checkpoint_config.interval = 99999999 cfg.evaluation.save_best = None cfg.work_dir = exp_dir # DEBUG: Training for debugging if args.debug: path, ext = os.path.splitext(cfg.data.train.ann_file) cfg.data.train.ann_file = path + r'_dev' + ext path, ext = os.path.splitext(cfg.data.val.ann_file) cfg.data.val.ann_file = path + r'_dev' + ext path, ext = os.path.splitext(cfg.data.test.ann_file) cfg.data.test.ann_file = path + r'_dev' + ext # Experiments using a smaller dataset if cfg.n_train_data == 3272: cfg.checkpoint_config.max_keep_ckpts = 10 cfg.checkpoint_config.interval = 1 cfg.evaluation.save_best = None elif cfg.n_train_data != 2617: path, ext = os.path.splitext(cfg.data.train.ann_file) cfg.data.train.ann_file = path + f'_{cfg.n_train_data}' + ext if cfg.n_train_data == 1024: cfg.log_config.interval = cfg.steps_per_epoch // 4 elif cfg.n_train_data == 512: cfg.log_config.interval = cfg.steps_per_epoch // 2 cfg.checkpoint_config.max_keep_ckpts = 1 cfg.checkpoint_config.interval = 99999999 cfg.evaluation.save_best = None datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: datasets.append(build_dataset(cfg.data.val)) model = build_detector(cfg.model) train_detector(model, datasets, cfg, distributed=False, validate=True)
def main(): args = parse_args() assert args.out or args.show, \ ('Please specify at least one operation (save or show the results) ' 'with the argument "--out" or "--show"') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True if args.workers == 0: args.workers = cfg.data.workers_per_gpu # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # set random seeds if args.seed is not None: set_random_seed(args.seed) if 'all' in args.corruptions: corruptions = [ 'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog', 'brightness', 'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression', 'speckle_noise', 'gaussian_blur', 'spatter', 'saturate' ] elif 'benchmark' in args.corruptions: corruptions = [ 'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog', 'brightness', 'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression' ] elif 'noise' in args.corruptions: corruptions = ['gaussian_noise', 'shot_noise', 'impulse_noise'] elif 'blur' in args.corruptions: corruptions = [ 'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur' ] elif 'weather' in args.corruptions: corruptions = ['snow', 'frost', 'fog', 'brightness'] elif 'digital' in args.corruptions: corruptions = [ 'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression' ] elif 'holdout' in args.corruptions: corruptions = ['speckle_noise', 'gaussian_blur', 'spatter', 'saturate'] elif 'None' in args.corruptions: corruptions = ['None'] args.severities = [0] else: corruptions = args.corruptions aggregated_results = {} for corr_i, corruption in enumerate(corruptions): aggregated_results[corruption] = {} for sev_i, corruption_severity in enumerate(args.severities): # evaluate severity 0 (= no corruption) only once if corr_i > 0 and corruption_severity == 0: aggregated_results[corruption][0] = \ aggregated_results[corruptions[0]][0] continue test_data_cfg = copy.deepcopy(cfg.data.test) # assign corruption and severity if corruption_severity > 0: corruption_trans = dict( type='Corrupt', corruption=corruption, severity=corruption_severity) # TODO: hard coded "1", we assume that the first step is # loading images, which needs to be fixed in the future test_data_cfg['pipeline'].insert(1, corruption_trans) # print info print('\nTesting {} at severity {}'.format(corruption, corruption_severity)) # build the dataloader # TODO: support multiple images per gpu # (only minor changes are needed) dataset = build_dataset(test_data_cfg) data_loader = build_dataloader( dataset, imgs_per_gpu=1, workers_per_gpu=args.workers, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_detector( cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint( model, args.checkpoint, map_location='cpu') # old versions did not save class info in checkpoints, # this walkaround is for backward compatibility if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show) else: model = MMDistributedDataParallel(model.cuda()) outputs = multi_gpu_test(model, data_loader, args.tmpdir) rank, _ = get_dist_info() if args.out and rank == 0: eval_results_filename = ( osp.splitext(args.out)[0] + '_results' + osp.splitext(args.out)[1]) mmcv.dump(outputs, args.out) eval_types = args.eval if cfg.dataset_type == 'VOCDataset': if eval_types: for eval_type in eval_types: if eval_type == 'bbox': test_dataset = mmcv.runner.obj_from_dict( cfg.data.test, datasets) mean_ap, eval_results = \ voc_eval_with_return( args.out, test_dataset, args.iou_thr, args.summaries) aggregated_results[corruption][ corruption_severity] = eval_results else: print('\nOnly "bbox" evaluation \ is supported for pascal voc') else: if eval_types: print('Starting evaluate {}'.format( ' and '.join(eval_types))) if eval_types == ['proposal_fast']: result_file = args.out else: if not isinstance(outputs[0], dict): result_files = results2json( dataset, outputs, args.out) else: for name in outputs[0]: print('\nEvaluating {}'.format(name)) outputs_ = [out[name] for out in outputs] result_file = args.out + '.{}'.format(name) result_files = results2json( dataset, outputs_, result_file) eval_results = coco_eval_with_return( result_files, eval_types, dataset.coco) aggregated_results[corruption][ corruption_severity] = eval_results else: print('\nNo task was selected for evaluation;' '\nUse --eval to select a task') # save results after each evaluation mmcv.dump(aggregated_results, eval_results_filename) # print filan results print('\nAggregated results:') prints = args.final_prints aggregate = args.final_prints_aggregate if cfg.dataset_type == 'VOCDataset': get_results( eval_results_filename, dataset='voc', prints=prints, aggregate=aggregate) else: get_results( eval_results_filename, dataset='coco', prints=prints, aggregate=aggregate)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_directory is determined in this priority: CLI > segment in file > filename if args.work_directory is not None: # update configs according to CLI args if args.work_directory is not None cfg.work_directory = args.work_directory elif cfg.get('work_directory', None) is None: # use config filename as default work_directory if cfg.work_directory is None cfg.work_directory = osp.join( './work_directory/retina', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_directory mmcv.mkdir_or_exist(osp.abspath(cfg.work_directory)) # dump config cfg.dump(osp.join(cfg.work_directory, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_directory, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # ---------- MI-AOD Training and Test Start Here ---------- # # set random seeds if args.seed is not None: logger.info( f'Set random seed to {args.seed}, deterministic: {args.deterministic}' ) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed X_L, X_U, X_all, all_anns = get_X_L_0(cfg) # # load set and model # # Please change it to the timestamp directory which you want to load data from. # last_timestamp = '/20201013_154728' # # Please change it to the cycle which you want to load data from. # load_cycle = 0 # X_L = np.load(cfg.work_directory + last_timestamp +'/X_L_' + str(load_cycle) + '.npy') # X_U = np.load(cfg.work_directory + last_timestamp +'/X_U_' + str(load_cycle) + '.npy') # cfg.cycles = list(range(load_cycle, 7)) cfg.work_directory = cfg.work_directory + '/' + timestamp mmcv.mkdir_or_exist(osp.abspath(cfg.work_directory)) np.save(cfg.work_directory + '/X_L_' + '0' + '.npy', X_L) np.save(cfg.work_directory + '/X_U_' + '0' + '.npy', X_U) initial_step = cfg.lr_config.step theta_f_1 = [ 'bbox_head.f_1_convs.0.conv.weight', 'bbox_head.f_1_convs.0.conv.bias', 'bbox_head.f_1_convs.1.conv.weight', 'bbox_head.f_1_convs.1.conv.bias', 'bbox_head.f_1_convs.2.conv.weight', 'bbox_head.f_1_convs.2.conv.bias', 'bbox_head.f_1_convs.3.conv.weight', 'bbox_head.f_1_convs.3.conv.bias', 'bbox_head.f_1_retina.weight', 'bbox_head.f_1_retina.bias' ] theta_f_2 = [ 'bbox_head.f_2_convs.0.conv.weight', 'bbox_head.f_2_convs.0.conv.bias', 'bbox_head.f_2_convs.1.conv.weight', 'bbox_head.f_2_convs.1.conv.bias', 'bbox_head.f_2_convs.2.conv.weight', 'bbox_head.f_2_convs.2.conv.bias', 'bbox_head.f_2_convs.3.conv.weight', 'bbox_head.f_2_convs.3.conv.bias', 'bbox_head.f_2_retina.weight', 'bbox_head.f_2_retina.bias' ] for cycle in cfg.cycles: # set random seeds if args.seed is not None: logger.info( f'Set random seed to {args.seed}, deterministic: {args.deterministic}' ) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed # get the config of the labeled dataset cfg = create_X_L_file(cfg, X_L, all_anns, cycle) # load model model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) # # Please change it to the epoch which you want to load model at. # model_file_name = '/latest.pth' # model.load_state_dict(torch.load(cfg.work_directory[:16] + last_timestamp + model_file_name)['state_dict']) # load dataset datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None and cycle == 0: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__ + get_git_hash()[:7], config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) model.CLASSES = datasets[0].CLASSES for epoch in range(cfg.epoch): # Only in the last 3 epoch does the learning rate need to be reduced and the model needs to be evaluated. if epoch == cfg.epoch - 1: cfg.lr_config.step = initial_step cfg.evaluation.interval = cfg.epoch_ratio[0] else: cfg.lr_config.step = [1000] cfg.evaluation.interval = 100 # ---------- Label Set Training ---------- if epoch == 0: cfg = create_X_L_file(cfg, X_L, all_anns, cycle) datasets = [build_dataset(cfg.data.train)] losstype.update_vars(0) cfg.total_epochs = cfg.epoch_ratio[0] cfg_bak = cfg.deepcopy() time.sleep(2) for name, value in model.named_parameters(): value.requires_grad = True train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta) cfg = cfg_bak # ---------- Re-weighting and Minimizing Instance Uncertainty ---------- cfg_u = create_X_U_file(cfg.deepcopy(), X_U, all_anns, cycle) cfg = create_X_L_file(cfg, X_L, all_anns, cycle) datasets_u = [build_dataset(cfg_u.data.train)] datasets = [build_dataset(cfg.data.train)] losstype.update_vars(1) cfg_u.total_epochs = cfg_u.epoch_ratio[1] cfg.total_epochs = cfg.epoch_ratio[1] cfg_u_bak = cfg_u.deepcopy() cfg_bak = cfg.deepcopy() time.sleep(2) for name, value in model.named_parameters(): if name in theta_f_1: value.requires_grad = False elif name in theta_f_2: value.requires_grad = False else: value.requires_grad = True train_detector(model, [datasets, datasets_u], [cfg, cfg_u], distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta) cfg_u = cfg_u_bak cfg = cfg_bak # ---------- Re-weighting and Maximizing Instance Uncertainty ---------- cfg_u = create_X_U_file(cfg.deepcopy(), X_U, all_anns, cycle) cfg = create_X_L_file(cfg, X_L, all_anns, cycle) datasets_u = [build_dataset(cfg_u.data.train)] datasets = [build_dataset(cfg.data.train)] losstype.update_vars(2) cfg_u.total_epochs = cfg_u.epoch_ratio[1] cfg.total_epochs = cfg.epoch_ratio[1] cfg_u_bak = cfg_u.deepcopy() cfg_bak = cfg.deepcopy() time.sleep(2) for name, value in model.named_parameters(): if name in theta_f_1: value.requires_grad = True elif name in theta_f_2: value.requires_grad = True else: value.requires_grad = False train_detector(model, [datasets, datasets_u], [cfg, cfg_u], distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta) cfg_u = cfg_u_bak cfg = cfg_bak # ---------- Label Set Training ---------- cfg = create_X_L_file(cfg, X_L, all_anns, cycle) datasets = [build_dataset(cfg.data.train)] losstype.update_vars(0) cfg.total_epochs = cfg.epoch_ratio[0] cfg_bak = cfg.deepcopy() for name, value in model.named_parameters(): value.requires_grad = True time.sleep(2) train_detector(model, datasets, cfg, distributed=distributed, validate=args.no_validate, timestamp=timestamp, meta=meta) cfg = cfg_bak # ---------- Informative Image Selection ---------- if cycle != cfg.cycles[-1]: # get new labeled data dataset_al = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset_al, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # set random seeds if args.seed is not None: logger.info( f'Set random seed to {args.seed}, deterministic: {args.deterministic}' ) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed uncertainty = calculate_uncertainty(cfg, model, data_loader, return_box=False) # update labeled set X_L, X_U = update_X_L(uncertainty, X_all, X_L, cfg.X_S_size) # save set and model np.save(cfg.work_directory + '/X_L_' + str(cycle + 1) + '.npy', X_L) np.save(cfg.work_directory + '/X_U_' + str(cycle + 1) + '.npy', X_U)
def main(): args = parse_args() assert args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (eval/format/show the ' 'results / save the results) with the argument , "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None if cfg.model.get('neck'): if isinstance(cfg.model.neck, list): for neck_cfg in cfg.model.neck: if neck_cfg.get('rfp_backbone'): if neck_cfg.rfp_backbone.get('pretrained'): neck_cfg.rfp_backbone.pretrained = None elif cfg.model.neck.get('rfp_backbone'): if cfg.model.neck.rfp_backbone.get('pretrained'): cfg.model.neck.rfp_backbone.pretrained = None # in case the test dataset is concatenated if isinstance(cfg.data.test, dict): cfg.data.test.test_mode = True elif isinstance(cfg.data.test, list): for ds_cfg in cfg.data.test: ds_cfg.test_mode = True # build the dataloader samples_per_gpu = cfg.data.pop('samples_per_gpu', 1) # cfg.data.test.pop if samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, args.show_score_thr) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: eval_kwargs = cfg.get('evaluation', {}).copy() # hard-code way to remove EvalHook args for key in ['interval', 'tmpdir', 'start', 'gpu_collect']: eval_kwargs.pop(key, None) eval_kwargs.update(dict(metric=args.eval, **kwargs)) print(dataset.evaluate(outputs, **eval_kwargs))
def train_detector(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] if 'imgs_per_gpu' in cfg.data: logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 'Please use "samples_per_gpu" instead') if 'samples_per_gpu' in cfg.data: logger.warning( f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' f'={cfg.data.imgs_per_gpu} is used in this experiments') else: logger.warning( 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' f'{cfg.data.imgs_per_gpu} in this experiments') cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed) for ds in dataset ] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # model.module.backbone._init_weights('/home/pengyi/freq_attention/log/20201010_042828freq_resnet50_fp16_freq_sel8in1_vanillaSE_4layers_0.1lr_100epoch_cosinedecay_LSR_16freq/model_best.pth.tar') # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = EpochBasedRunner(model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta) # an ugly workaround to make .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg, distributed=distributed) elif distributed and 'type' not in cfg.optimizer_config: optimizer_config = OptimizerHook(**cfg.optimizer_config) else: optimizer_config = cfg.optimizer_config # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) if distributed: runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_hook = DistEvalHook if distributed else EvalHook runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) # user-defined hooks if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks assert isinstance(custom_hooks, list), \ f'custom_hooks expect list type, but got {type(custom_hooks)}' for hook_cfg in cfg.custom_hooks: assert isinstance(hook_cfg, dict), \ 'Each item in custom_hooks expects dict type, but got ' \ f'{type(hook_cfg)}' hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)