def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) setup_multi_processes(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.load_from is not None: cfg.load_from = args.load_from if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpus is not None: cfg.gpu_ids = range(1) warnings.warn('`--gpus` is deprecated because we only support ' 'single GPU mode in non-distributed training. ' 'Use `gpus=1` now.') if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids[0:1] warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. ' 'Because we only support single GPU mode in ' 'non-distributed training. Use the first GPU ' 'in `gpu_ids` now.') if args.gpus is None and args.gpu_ids is None: cfg.gpu_ids = [args.gpu_id] # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # re-set gpu_ids with distributed training mode _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds seed = init_random_seed(args.seed) seed = seed + dist.get_rank() if args.diff_seed else seed logger.info(f'Set random seed to {seed}, ' f'deterministic: {args.deterministic}') set_random_seed(seed, deterministic=args.deterministic) cfg.seed = seed meta['seed'] = seed meta['exp_name'] = osp.basename(args.config) model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg')) model.init_weights() datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) if cfg.data.train.get('pipeline', None) is None: if is_2dlist(cfg.data.train.datasets): train_pipeline = cfg.data.train.datasets[0][0].pipeline else: train_pipeline = cfg.data.train.datasets[0].pipeline elif is_2dlist(cfg.data.train.pipeline): train_pipeline = cfg.data.train.pipeline[0] else: train_pipeline = cfg.data.train.pipeline if val_dataset['type'] in ['ConcatDataset', 'UniformConcatDataset']: for dataset in val_dataset['datasets']: dataset.pipeline = train_pipeline else: val_dataset.pipeline = train_pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmocr_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # update mc config if args.mc_config: mc = Config.fromfile(args.mc_config) if isinstance(cfg.data.train, list): for i in range(len(cfg.data.train)): cfg.data.train[i].pipeline[0].update( file_client_args=mc['mc_file_client_args']) else: cfg.data.train.pipeline[0].update( file_client_args=mc['mc_file_client_args']) # import modules from string list. if cfg.get('custom_imports', None): from mmcv.utils import import_modules_from_strings import_modules_from_strings(**cfg['custom_imports']) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True print( '-----------------------------------------------------------------------', torch.backends.cudnn.benchmark) # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # re-set gpu_ids with distributed training mode _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed meta['exp_name'] = osp.basename(args.config) model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg')) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmocr_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def main(): args = parse_args() assert ( args.out or args.eval or args.format_only or args.show or args.show_dir), ( 'Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir".') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified.') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) setup_multi_processes(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if cfg.model.get('pretrained'): cfg.model.pretrained = None if cfg.model.get('neck'): if isinstance(cfg.model.neck, list): for neck_cfg in cfg.model.neck: if neck_cfg.get('rfp_backbone'): if neck_cfg.rfp_backbone.get('pretrained'): neck_cfg.rfp_backbone.pretrained = None elif cfg.model.neck.get('rfp_backbone'): if cfg.model.neck.rfp_backbone.get('pretrained'): cfg.model.neck.rfp_backbone.pretrained = None # in case the test dataset is concatenated samples_per_gpu = (cfg.data.get('test_dataloader', {})).get( 'samples_per_gpu', cfg.data.get('samples_per_gpu', 1)) if samples_per_gpu > 1: cfg = disable_text_recog_aug_test(cfg) cfg = replace_image_to_tensor(cfg) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': cfg.gpu_ids = [args.gpu_id] distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader dataset = build_dataset(cfg.data.test, dict(test_mode=True)) # step 1: give default values and override (if exist) from cfg.data default_loader_cfg = { **dict(seed=cfg.get('seed'), drop_last=False, dist=distributed), **({} if torch.__version__ != 'parrots' else dict( prefetch_num=2, pin_memory=False, )) } default_loader_cfg.update({ k: v for k, v in cfg.data.items() if k not in [ 'train', 'val', 'test', 'train_dataloader', 'val_dataloader', 'test_dataloader' ] }) test_loader_cfg = { **default_loader_cfg, **dict(shuffle=False, drop_last=False), **cfg.data.get('test_dataloader', {}), **dict(samples_per_gpu=samples_per_gpu) } data_loader = build_dataloader(dataset, **test_loader_cfg) # build the model and load checkpoint cfg.model.train_cfg = None model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) model = revert_sync_batchnorm(model) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_conv_bn(model) if not distributed: model = MMDataParallel(model, device_ids=cfg.gpu_ids) is_kie = cfg.model.type in ['SDMGR'] outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, is_kie, args.show_score_thr) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: eval_kwargs = cfg.get('evaluation', {}).copy() # hard-code way to remove EvalHook args for key in [ 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 'rule' ]: eval_kwargs.pop(key, None) eval_kwargs.update(dict(metric=args.eval, **kwargs)) print(dataset.evaluate(outputs, **eval_kwargs))
def main(): args = parse_args() assert ( args.out or args.eval or args.format_only or args.show or args.show_dir), ( 'Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir".') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified.') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # import modules from string list. if cfg.get('custom_imports', None): from mmcv.utils import import_modules_from_strings import_modules_from_strings(**cfg['custom_imports']) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None if cfg.model.get('neck'): if isinstance(cfg.model.neck, list): for neck_cfg in cfg.model.neck: if neck_cfg.get('rfp_backbone'): if neck_cfg.rfp_backbone.get('pretrained'): neck_cfg.rfp_backbone.pretrained = None elif cfg.model.neck.get('rfp_backbone'): if cfg.model.neck.rfp_backbone.get('pretrained'): cfg.model.neck.rfp_backbone.pretrained = None # in case the test dataset is concatenated samples_per_gpu = 1 if isinstance(cfg.data.test, dict): cfg.data.test.test_mode = True samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) if samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.test.pipeline = replace_ImageToTensor( cfg.data.test.pipeline) elif isinstance(cfg.data.test, list): for ds_cfg in cfg.data.test: ds_cfg.test_mode = True samples_per_gpu = max( [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test]) if samples_per_gpu > 1: for ds_cfg in cfg.data.test: ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint cfg.model.train_cfg = None model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_conv_bn(model) if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, args.show_score_thr) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = {} if args.eval_options is None else args.eval_options if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: eval_kwargs = cfg.get('evaluation', {}).copy() # hard-code way to remove EvalHook args for key in [ 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 'rule' ]: eval_kwargs.pop(key, None) eval_kwargs.update(dict(metric=args.eval, **kwargs)) print(dataset.evaluate(outputs, **eval_kwargs))
def test_drrg(cfg_file): model = _get_detector_cfg(cfg_file) model['pretrained'] = None from mmocr.models import build_detector detector = build_detector(model) detector = revert_sync_batchnorm(detector) input_shape = (1, 3, 224, 224) num_kernels = 1 mm_inputs = _demo_mm_inputs(num_kernels, input_shape) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') gt_text_mask = mm_inputs.pop('gt_text_mask') gt_center_region_mask = mm_inputs.pop('gt_center_region_mask') gt_mask = mm_inputs.pop('gt_mask') gt_top_height_map = mm_inputs.pop('gt_radius_map') gt_bot_height_map = gt_top_height_map.copy() gt_sin_map = mm_inputs.pop('gt_sin_map') gt_cos_map = mm_inputs.pop('gt_cos_map') num_rois = 32 x = np.random.randint(4, 224, (num_rois, 1)) y = np.random.randint(4, 224, (num_rois, 1)) h = 4 * np.ones((num_rois, 1)) w = 4 * np.ones((num_rois, 1)) angle = (np.random.random_sample((num_rois, 1)) * 2 - 1) * np.pi / 2 cos, sin = np.cos(angle), np.sin(angle) comp_labels = np.random.randint(1, 3, (num_rois, 1)) num_rois = num_rois * np.ones((num_rois, 1)) comp_attribs = np.hstack([num_rois, x, y, h, w, cos, sin, comp_labels]) gt_comp_attribs = np.expand_dims(comp_attribs.astype(np.float32), axis=0) # Test forward train losses = detector.forward( imgs, img_metas, gt_text_mask=gt_text_mask, gt_center_region_mask=gt_center_region_mask, gt_mask=gt_mask, gt_top_height_map=gt_top_height_map, gt_bot_height_map=gt_bot_height_map, gt_sin_map=gt_sin_map, gt_cos_map=gt_cos_map, gt_comp_attribs=gt_comp_attribs) assert isinstance(losses, dict) # Test forward test model['bbox_head']['in_channels'] = 6 model['bbox_head']['text_region_thr'] = 0.8 model['bbox_head']['center_region_thr'] = 0.8 detector = build_detector(model) maps = torch.zeros((1, 6, 224, 224), dtype=torch.float) maps[:, 0:2, :, :] = -10. maps[:, 0, 60:100, 50:170] = 10. maps[:, 1, 75:85, 60:160] = 10. maps[:, 2, 75:85, 60:160] = 0. maps[:, 3, 75:85, 60:160] = 1. maps[:, 4, 75:85, 60:160] = 10. maps[:, 5, 75:85, 60:160] = 10. with torch.no_grad(): full_pass_weight = torch.zeros((6, 6, 1, 1)) for i in range(6): full_pass_weight[i, i, 0, 0] = 1 detector.bbox_head.out_conv.weight.data = full_pass_weight detector.bbox_head.out_conv.bias.data.fill_(0.) outs = detector.bbox_head.single_test(maps) boundaries = detector.bbox_head.get_boundary(*outs, img_metas, True) assert len(boundaries) == 1 # Test show result results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]} img = np.random.rand(5, 5) detector.show_result(img, results)
def test_detector_wrapper(): try: import onnxruntime as ort # noqa: F401 import tensorrt as trt from mmcv.tensorrt import (onnx2trt, save_trt_engine) except ImportError: pytest.skip('ONNXRuntime or TensorRT is not available.') cfg = dict( model=dict( type='DBNet', backbone=dict( type='ResNet', depth=18, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=-1, norm_cfg=dict(type='BN', requires_grad=True), init_cfg=dict( type='Pretrained', checkpoint='torchvision://resnet18'), norm_eval=False, style='caffe'), neck=dict( type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256), bbox_head=dict( type='DBHead', text_repr_type='quad', in_channels=256, loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)), train_cfg=None, test_cfg=None)) cfg = mmcv.Config(cfg) pytorch_model = build_detector(cfg.model, None, None) # prepare data inputs = torch.rand(1, 3, 224, 224) img_metas = [{ 'img_shape': [1, 3, 224, 224], 'ori_shape': [1, 3, 224, 224], 'pad_shape': [1, 3, 224, 224], 'filename': None, 'scale_factor': np.array([1, 1, 1, 1]) }] pytorch_model.forward = pytorch_model.forward_dummy with tempfile.TemporaryDirectory() as tmpdirname: onnx_path = f'{tmpdirname}/tmp.onnx' with torch.no_grad(): torch.onnx.export( pytorch_model, inputs, onnx_path, input_names=['input'], output_names=['output'], export_params=True, keep_initializers_as_inputs=False, verbose=False, opset_version=11) # TensorRT part def get_GiB(x: int): """return x GiB.""" return x * (1 << 30) trt_path = onnx_path.replace('.onnx', '.trt') min_shape = [1, 3, 224, 224] max_shape = [1, 3, 224, 224] # create trt engine and wrapper opt_shape_dict = {'input': [min_shape, min_shape, max_shape]} max_workspace_size = get_GiB(1) trt_engine = onnx2trt( onnx_path, opt_shape_dict, log_level=trt.Logger.ERROR, fp16_mode=False, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_path) print(f'Successfully created TensorRT engine: {trt_path}') wrap_onnx = ONNXRuntimeDetector(onnx_path, cfg, 0) wrap_trt = TensorRTDetector(trt_path, cfg, 0) assert isinstance(wrap_onnx, ONNXRuntimeDetector) assert isinstance(wrap_trt, TensorRTDetector) with torch.no_grad(): onnx_outputs = wrap_onnx.simple_test(inputs, img_metas, rescale=False) trt_outputs = wrap_onnx.simple_test(inputs, img_metas, rescale=False) assert isinstance(onnx_outputs[0], dict) assert isinstance(trt_outputs[0], dict) assert 'boundary_result' in onnx_outputs[0] assert 'boundary_result' in trt_outputs[0]
def test_recognizer_wrapper(): try: import onnxruntime as ort # noqa: F401 import tensorrt as trt from mmcv.tensorrt import (onnx2trt, save_trt_engine) except ImportError: pytest.skip('ONNXRuntime or TensorRT is not available.') cfg = dict( label_convertor=dict( type='CTCConvertor', dict_type='DICT36', with_unknown=False, lower=True), model=dict( type='CRNNNet', preprocessor=None, backbone=dict( type='VeryDeepVgg', leaky_relu=False, input_channels=1), encoder=None, decoder=dict(type='CRNNDecoder', in_channels=512, rnn_flag=True), loss=dict(type='CTCLoss'), label_convertor=dict( type='CTCConvertor', dict_type='DICT36', with_unknown=False, lower=True), pretrained=None), train_cfg=None, test_cfg=None) cfg = mmcv.Config(cfg) pytorch_model = build_detector(cfg.model, None, None) # prepare data inputs = torch.rand(1, 1, 32, 32) img_metas = [{ 'img_shape': [1, 1, 32, 32], 'ori_shape': [1, 1, 32, 32], 'pad_shape': [1, 1, 32, 32], 'filename': None, 'scale_factor': np.array([1, 1, 1, 1]) }] pytorch_model.forward = partial( pytorch_model.forward, img_metas=img_metas, return_loss=False, rescale=True) with tempfile.TemporaryDirectory() as tmpdirname: onnx_path = f'{tmpdirname}/tmp.onnx' with torch.no_grad(): torch.onnx.export( pytorch_model, inputs, onnx_path, input_names=['input'], output_names=['output'], export_params=True, keep_initializers_as_inputs=False, verbose=False, opset_version=11) # TensorRT part def get_GiB(x: int): """return x GiB.""" return x * (1 << 30) trt_path = onnx_path.replace('.onnx', '.trt') min_shape = [1, 1, 32, 32] max_shape = [1, 1, 32, 32] # create trt engine and wrapper opt_shape_dict = {'input': [min_shape, min_shape, max_shape]} max_workspace_size = get_GiB(1) trt_engine = onnx2trt( onnx_path, opt_shape_dict, log_level=trt.Logger.ERROR, fp16_mode=False, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_path) print(f'Successfully created TensorRT engine: {trt_path}') wrap_onnx = ONNXRuntimeRecognizer(onnx_path, cfg, 0) wrap_trt = TensorRTRecognizer(trt_path, cfg, 0) assert isinstance(wrap_onnx, ONNXRuntimeRecognizer) assert isinstance(wrap_trt, TensorRTRecognizer) with torch.no_grad(): onnx_outputs = wrap_onnx.simple_test(inputs, img_metas, rescale=False) trt_outputs = wrap_onnx.simple_test(inputs, img_metas, rescale=False) assert isinstance(onnx_outputs[0], dict) assert isinstance(trt_outputs[0], dict) assert 'text' in onnx_outputs[0] assert 'text' in trt_outputs[0]
def __init__(self, det='PANet_IC15', det_config='', det_ckpt='', recog='SEG', recog_config='', recog_ckpt='', kie='', kie_config='', kie_ckpt='', config_dir=os.path.join(str(Path.cwd()), 'configs/'), device=None, **kwargs): textdet_models = { 'DB_r18': { 'config': 'dbnet/dbnet_r18_fpnc_1200e_icdar2015.py', 'ckpt': 'dbnet/' 'dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth' }, 'DB_r50': { 'config': 'dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py', 'ckpt': 'dbnet/' 'dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20211025-9fe3b590.pth' }, 'DBPP_r50': { 'config': 'dbnetpp/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015.py', 'ckpt': 'dbnet/' 'dbnetpp_r50dcnv2_fpnc_1200e_icdar2015-20220502-d7a76fff.pth' }, 'DRRG': { 'config': 'drrg/drrg_r50_fpn_unet_1200e_ctw1500.py', 'ckpt': 'drrg/drrg_r50_fpn_unet_1200e_ctw1500_20211022-fb30b001.pth' }, 'FCE_IC15': { 'config': 'fcenet/fcenet_r50_fpn_1500e_icdar2015.py', 'ckpt': 'fcenet/fcenet_r50_fpn_1500e_icdar2015_20211022-daefb6ed.pth' }, 'FCE_CTW_DCNv2': { 'config': 'fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py', 'ckpt': 'fcenet/' + 'fcenet_r50dcnv2_fpn_1500e_ctw1500_20211022-e326d7ec.pth' }, 'MaskRCNN_CTW': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth' }, 'MaskRCNN_IC15': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth' }, 'MaskRCNN_IC17': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.pth' }, 'PANet_CTW': { 'config': 'panet/panet_r18_fpem_ffm_600e_ctw1500.py', 'ckpt': 'panet/' 'panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.pth' }, 'PANet_IC15': { 'config': 'panet/panet_r18_fpem_ffm_600e_icdar2015.py', 'ckpt': 'panet/' 'panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth' }, 'PS_CTW': { 'config': 'psenet/psenet_r50_fpnf_600e_ctw1500.py', 'ckpt': 'psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth' }, 'PS_IC15': { 'config': 'psenet/psenet_r50_fpnf_600e_icdar2015.py', 'ckpt': 'psenet/psenet_r50_fpnf_600e_icdar2015_pretrain-eefd8fe6.pth' }, 'TextSnake': { 'config': 'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py', 'ckpt': 'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500-27f65b64.pth' }, 'Tesseract': {} } textrecog_models = { 'CRNN': { 'config': 'crnn/crnn_academic_dataset.py', 'ckpt': 'crnn/crnn_academic-a723a1c5.pth' }, 'SAR': { 'config': 'sar/sar_r31_parallel_decoder_academic.py', 'ckpt': 'sar/sar_r31_parallel_decoder_academic-dba3a4a3.pth' }, 'SAR_CN': { 'config': 'sar/sar_r31_parallel_decoder_chinese.py', 'ckpt': 'sar/sar_r31_parallel_decoder_chineseocr_20210507-b4be8214.pth' }, 'NRTR_1/16-1/8': { 'config': 'nrtr/nrtr_r31_1by16_1by8_academic.py', 'ckpt': 'nrtr/nrtr_r31_1by16_1by8_academic_20211124-f60cebf4.pth' }, 'NRTR_1/8-1/4': { 'config': 'nrtr/nrtr_r31_1by8_1by4_academic.py', 'ckpt': 'nrtr/nrtr_r31_1by8_1by4_academic_20211123-e1fdb322.pth' }, 'RobustScanner': { 'config': 'robust_scanner/robustscanner_r31_academic.py', 'ckpt': 'robustscanner/robustscanner_r31_academic-5f05874f.pth' }, 'SATRN': { 'config': 'satrn/satrn_academic.py', 'ckpt': 'satrn/satrn_academic_20211009-cb8b1580.pth' }, 'SATRN_sm': { 'config': 'satrn/satrn_small.py', 'ckpt': 'satrn/satrn_small_20211009-2cf13355.pth' }, 'ABINet': { 'config': 'abinet/abinet_academic.py', 'ckpt': 'abinet/abinet_academic-f718abf6.pth' }, 'SEG': { 'config': 'seg/seg_r31_1by16_fpnocr_academic.py', 'ckpt': 'seg/seg_r31_1by16_fpnocr_academic-72235b11.pth' }, 'CRNN_TPS': { 'config': 'tps/crnn_tps_academic_dataset.py', 'ckpt': 'tps/crnn_tps_academic_dataset_20210510-d221a905.pth' }, 'Tesseract': {}, 'MASTER': { 'config': 'master/master_academic.py', 'ckpt': 'master/master_r31_12e_ST_MJ_SA-787edd36.pth' } } kie_models = { 'SDMGR': { 'config': 'sdmgr/sdmgr_unet16_60e_wildreceipt.py', 'ckpt': 'sdmgr/sdmgr_unet16_60e_wildreceipt_20210520-7489e6de.pth' } } self.td = det self.tr = recog self.kie = kie self.device = device if self.device is None: self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') # Check if the det/recog model choice is valid if self.td and self.td not in textdet_models: raise ValueError(self.td, 'is not a supported text detection algorthm') elif self.tr and self.tr not in textrecog_models: raise ValueError(self.tr, 'is not a supported text recognition algorithm') elif self.kie: if self.kie not in kie_models: raise ValueError( self.kie, 'is not a supported key information extraction' ' algorithm') elif not (self.td and self.tr): raise NotImplementedError( self.kie, 'has to run together' ' with text detection and recognition algorithms.') self.detect_model = None if self.td and self.td == 'Tesseract': if tesserocr is None: raise ImportError('Please install tesserocr first. ' 'Check out the installation guide at ' 'https://github.com/sirfz/tesserocr') self.detect_model = 'Tesseract_det' elif self.td: # Build detection model if not det_config: det_config = os.path.join(config_dir, 'textdet/', textdet_models[self.td]['config']) if not det_ckpt: det_ckpt = 'https://download.openmmlab.com/mmocr/textdet/' + \ textdet_models[self.td]['ckpt'] self.detect_model = init_detector(det_config, det_ckpt, device=self.device) self.detect_model = revert_sync_batchnorm(self.detect_model) self.recog_model = None if self.tr and self.tr == 'Tesseract': if tesserocr is None: raise ImportError('Please install tesserocr first. ' 'Check out the installation guide at ' 'https://github.com/sirfz/tesserocr') self.recog_model = 'Tesseract_recog' elif self.tr: # Build recognition model if not recog_config: recog_config = os.path.join( config_dir, 'textrecog/', textrecog_models[self.tr]['config']) if not recog_ckpt: recog_ckpt = 'https://download.openmmlab.com/mmocr/' + \ 'textrecog/' + textrecog_models[self.tr]['ckpt'] self.recog_model = init_detector(recog_config, recog_ckpt, device=self.device) self.recog_model = revert_sync_batchnorm(self.recog_model) self.kie_model = None if self.kie: # Build key information extraction model if not kie_config: kie_config = os.path.join(config_dir, 'kie/', kie_models[self.kie]['config']) if not kie_ckpt: kie_ckpt = 'https://download.openmmlab.com/mmocr/' + \ 'kie/' + kie_models[self.kie]['ckpt'] kie_cfg = Config.fromfile(kie_config) self.kie_model = build_detector(kie_cfg.model, test_cfg=kie_cfg.get('test_cfg')) self.kie_model = revert_sync_batchnorm(self.kie_model) self.kie_model.cfg = kie_cfg load_checkpoint(self.kie_model, kie_ckpt, map_location=self.device) # Attribute check for model in list(filter(None, [self.recog_model, self.detect_model])): if hasattr(model, 'module'): model = model.module
def build_model(cfg): model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) model = revert_sync_batchnorm(model) model = MMDataParallel(model) return model
def __init__(self, det='PANet_IC15', det_config='', det_ckpt='', recog='SEG', recog_config='', recog_ckpt='', kie='', kie_config='', kie_ckpt='', config_dir=os.path.join(str(Path.cwd()), 'configs/'), device='cuda:0', **kwargs): textdet_models = { 'DB_r18': { 'config': 'dbnet/dbnet_r18_fpnc_1200e_icdar2015.py', 'ckpt': 'dbnet/' 'dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth' }, 'DB_r50': { 'config': 'dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py', 'ckpt': 'dbnet/' 'dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20210325-91cef9af.pth' }, 'DRRG': { 'config': 'drrg/drrg_r50_fpn_unet_1200e_ctw1500.py', 'ckpt': 'drrg/drrg_r50_fpn_unet_1200e_ctw1500-1abf4f67.pth' }, 'FCE_IC15': { 'config': 'fcenet/fcenet_r50_fpn_1500e_icdar2015.py', 'ckpt': 'fcenet/fcenet_r50_fpn_1500e_icdar2015-d435c061.pth' }, 'FCE_CTW_DCNv2': { 'config': 'fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py', 'ckpt': 'fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500-05d740bb.pth' }, 'MaskRCNN_CTW': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth' }, 'MaskRCNN_IC15': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth' }, 'MaskRCNN_IC17': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.pth' }, 'PANet_CTW': { 'config': 'panet/panet_r18_fpem_ffm_600e_ctw1500.py', 'ckpt': 'panet/' 'panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.pth' }, 'PANet_IC15': { 'config': 'panet/panet_r18_fpem_ffm_600e_icdar2015.py', 'ckpt': 'panet/' 'panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth' }, 'PS_CTW': { 'config': 'psenet/psenet_r50_fpnf_600e_ctw1500.py', 'ckpt': 'psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth' }, 'PS_IC15': { 'config': 'psenet/psenet_r50_fpnf_600e_icdar2015.py', 'ckpt': 'psenet/psenet_r50_fpnf_600e_icdar2015_pretrain-eefd8fe6.pth' }, 'TextSnake': { 'config': 'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py', 'ckpt': 'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500-27f65b64.pth' } } textrecog_models = { 'CRNN': { 'config': 'crnn/crnn_academic_dataset.py', 'ckpt': 'crnn/crnn_academic-a723a1c5.pth' }, 'SAR': { 'config': 'sar/sar_r31_parallel_decoder_academic.py', 'ckpt': 'sar/sar_r31_parallel_decoder_academic-dba3a4a3.pth' }, 'NRTR_1/16-1/8': { 'config': 'nrtr/nrtr_r31_1by16_1by8_academic.py', 'ckpt': 'nrtr/nrtr_r31_academic_20210406-954db95e.pth' }, 'NRTR_1/8-1/4': { 'config': 'nrtr/nrtr_r31_1by8_1by4_academic.py', 'ckpt': 'nrtr/nrtr_r31_1by8_1by4_academic_20210406-ce16e7cc.pth' }, 'RobustScanner': { 'config': 'robust_scanner/robustscanner_r31_academic.py', 'ckpt': 'robustscanner/robustscanner_r31_academic-5f05874f.pth' }, 'SEG': { 'config': 'seg/seg_r31_1by16_fpnocr_academic.py', 'ckpt': 'seg/seg_r31_1by16_fpnocr_academic-72235b11.pth' }, 'CRNN_TPS': { 'config': 'tps/crnn_tps_academic_dataset.py', 'ckpt': 'tps/crnn_tps_academic_dataset_20210510-d221a905.pth' } } kie_models = { 'SDMGR': { 'config': 'sdmgr/sdmgr_unet16_60e_wildreceipt.py', 'ckpt': 'sdmgr/sdmgr_unet16_60e_wildreceipt_20210520-7489e6de.pth' } } self.td = det self.tr = recog self.kie = kie self.device = device # Check if the det/recog model choice is valid if self.td and self.td not in textdet_models: raise ValueError(self.td, 'is not a supported text detection algorthm') elif self.tr and self.tr not in textrecog_models: raise ValueError(self.tr, 'is not a supported text recognition algorithm') elif self.kie and self.kie not in kie_models: raise ValueError( self.kie, 'is not a supported key information extraction' ' algorithm') self.detect_model = None if self.td: # Build detection model if not det_config: det_config = os.path.join(config_dir, 'textdet/', textdet_models[self.td]['config']) if not det_ckpt: det_ckpt = 'https://download.openmmlab.com/mmocr/textdet/' + \ textdet_models[self.td]['ckpt'] self.detect_model = init_detector(det_config, det_ckpt, device=self.device) self.recog_model = None if self.tr: # Build recognition model if not recog_config: recog_config = os.path.join( config_dir, 'textrecog/', textrecog_models[self.tr]['config']) if not recog_ckpt: recog_ckpt = 'https://download.openmmlab.com/mmocr/' + \ 'textrecog/' + textrecog_models[self.tr]['ckpt'] self.recog_model = init_detector(recog_config, recog_ckpt, device=self.device) self.kie_model = None if self.kie: # Build key information extraction model if not kie_config: kie_config = os.path.join(config_dir, 'kie/', kie_models[self.kie]['config']) if not kie_ckpt: kie_ckpt = 'https://download.openmmlab.com/mmocr/' + \ 'kie/' + kie_models[self.kie]['ckpt'] kie_cfg = Config.fromfile(kie_config) self.kie_model = build_detector(kie_cfg.model, test_cfg=kie_cfg.get('test_cfg')) self.kie_model.cfg = kie_cfg load_checkpoint(self.kie_model, kie_ckpt, map_location=self.device) # Attribute check for model in list(filter(None, [self.recog_model, self.detect_model])): if hasattr(model, 'module'): model = model.module if model.cfg.data.test['type'] == 'ConcatDataset': model.cfg.data.test.pipeline = \ model.cfg.data.test['datasets'][0].pipeline