def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True args.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test, dict(test_mode=True)) data_loader = build_dataloader(dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_posenet(cfg.model) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) _ = load_checkpoint(model, args.checkpoint, map_location='cpu') # for backward compatibility if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() eval_config = cfg.get('eval_config', {}) eval_config = merge_configs(eval_config, dict(metrics=args.eval)) if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) dataset.evaluate(outputs, args.work_dir, **eval_config)
def get_pose_net(device='cuda:0'): config = Config.fromfile('./configs/hrnet.py') if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) model = pose_builder.build_posenet(config.model) fp16_config = config.get('fp16', None) if fp16_config is not None: wrap_fp16_model(model) load_checkpoint(model, config.checkpoint_path, map_location='cpu') # if args.fuse_conv_bn: # model = fuse_conv_bn(model) if not distributed: model = MMDataParallel(model, device_ids=[0]) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) return model
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # build the dataloader dataset = build_dataset(cfg.data.val) data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # build the model and load checkpoint model = build_posenet(cfg.model) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) model = MMDataParallel(model, device_ids=[0]) # the first several iterations may be very slow so skip them num_warmup = 5 pure_inf_time = 0 # benchmark with total batch and take the average for i, data in enumerate(data_loader): torch.cuda.synchronize() start_time = time.perf_counter() with torch.no_grad(): model(return_loss=False, **data) torch.cuda.synchronize() elapsed = time.perf_counter() - start_time if i >= num_warmup: pure_inf_time += elapsed if (i + 1) % args.log_interval == 0: its = (i + 1 - num_warmup) / pure_inf_time print(f'Done item [{i + 1:<3}], {its:.2f} items / s') print(f'Overall average: {its:.2f} items / s') print(f'Total time: {pure_inf_time:.2f} s')
def posenet(pretrained=False, arch='litehrnet_30_coco_384x288', model_dir=None, force_reload=False, unload_after=False, **kwargs): """ Kwargs: pretrained(bool, str): True for official checkpoint or path(str) to load a custom checkpoint tag(str): git repo tag to explicitly specify a particular commit url(str): direct url to download checkpoint s3(dict): S3 source containing bucket and key to download a checkpoint from threshold(float): """ ARCH = dict(litehrnet_18_coco_256x192='1ZewlvpncTvahbqcCFb-95C3NHet30mk5', litehrnet_18_coco_384x288='1E3S18YbUfBm7YtxYOV7I9FmrntnlFKCp', litehrnet_30_coco_256x192='1KLjNInzFfmZWSbEQwx-zbyaBiLB7SnEj', litehrnet_30_coco_384x288='1BcHnLka4FWiXRmPnJgJKmsSuXXqN4dgn', litehrnet_18_mpii_256x256='1bcnn5Ic2-FiSNqYOqLd1mOfQchAz_oCf', litehrnet_30_mpii_256x256='1JB9LOwkuz5OUtry0IQqXammFuCrGvlEd') tag = kwargs.get('tag', GITHUB['tag']) modules = sys.modules.copy() entry = 'posenet' m = None try: logging.info(f"Creating '{entry}(arch={arch})'") m = hub.load(github(tag=tag), entry, arch, force_reload=force_reload) m.tag = tag if pretrained: if isinstance(pretrained, bool): # official pretrained state_dict = from_pretrained(f"{entry}.pt", force_reload=force_reload, gdrive=dict(id=ARCH[arch])) else: # custom checkpoint path = Path(pretrained) if not path.exists(): path = f"{hub.get_dir()}/{pretrained}" state_dict = io.load(path, map_location='cpu') state_dict = { k: v for k, v in state_dict.items() if m.state_dict()[k].shape == v.shape } # load_checkpoint(model, path, map_location='cpu') m.load_state_dict(state_dict, strict=True) logging.info(f"kwargs={kwargs}") if kwargs.get('fp16', False): from mmpose.core import wrap_fp16_model wrap_fp16_model(m) logging.info(f"[posnet] wrapped in fp16") if kwargs.get('fuse_conv_bn', True): from mmcv.cnn import fuse_conv_bn m = fuse_conv_bn(m) logging.info(f"[posenet] fused conv and bn") except Exception as e: logging.error(f"Failed to load '{entry}': {e}") raise e finally: # XXX Remove newly imported modules in case of conflict with next load if unload_after: for module in sys.modules.keys() - modules.keys(): del sys.modules[module] m.to('cpu') return m
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True args.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader dataset = build_dataset(cfg.data.test, dict(test_mode=True)) dataloader_setting = dict( samples_per_gpu=512, #1 512 workers_per_gpu=cfg.data.get('workers_per_gpu', 1), dist=distributed, shuffle=False, drop_last=False) dataloader_setting = dict(dataloader_setting, **cfg.data.get('test_dataloader', {})) data_loader = build_dataloader(dataset, **dataloader_setting) # build the model and load checkpoint model = build_posenet(cfg.model) # print(model) # model=model.cuda() # summary(model,input_size=(3, 256, 192)) # with open('../logs/dark-HRNet-w32.out','w+') as f: # f.write(model) # f.close() fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') #结果是因为文件名写错了,只能说mmcv报的错误实在是不够明显。 # model.load_state_dict(torch.load(args.checkpoint, map_location='cpu')['state_dict']) #"meta", "state_dict", "optimizer" if args.fuse_conv_bn: model = fuse_conv_bn(model) if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() eval_config = cfg.get('evaluation', {}) eval_config = merge_configs(eval_config, dict(metric=args.eval)) if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) results = dataset.evaluate(outputs, args.work_dir, **eval_config) for k, v in sorted(results.items()): print(f'{k}: {v}')