def init_predictor(args): if args.model_dir: config = Config(args.model_dir) else: config = Config(args.model_file, args.params_file) if args.use_gpu: config.enable_use_gpu(1000, 0) if args.enable_tensorRT: config.enable_tensorrt_engine( max_batch_size=args.batchsize, min_subgraph_size=1, precision_mode=paddle.inference.PrecisionType.Float32) else: config.disable_gpu() config.delete_pass("repeated_fc_relu_fuse_pass") config.set_cpu_math_library_num_threads(args.cpu_threads) if args.enable_mkldnn: config.enable_mkldnn() predictor = create_predictor(config) return predictor, config
def init_predictor(args): if args.model_dir: has_model = 0 pdmodel_name = 0 pdiparams_name = 0 for file_name in os.listdir(args.model_dir): if re.search("__model__", file_name): has_model = 1 if file_name.endswith(".pdmodel"): pdmodel_name = os.path.join(args.model_dir, file_name) if file_name.endswith(".pdiparams"): pdiparams_name = os.path.join(args.model_dir, file_name) if has_model == 1: config = Config(args.model_dir) elif pdmodel_name and pdiparams_name: config = Config(pdmodel_name, pdiparams_name) else: raise ValueError( "config setting error, please check your model path") else: config = Config(args.model_file, args.params_file) if args.use_gpu: config.enable_use_gpu(1000, 0) if args.enable_tensorRT: config.enable_tensorrt_engine( max_batch_size=args.batchsize, min_subgraph_size=9, precision_mode=paddle.inference.PrecisionType.Float32) else: config.disable_gpu() config.set_cpu_math_library_num_threads(args.cpu_threads) if args.enable_mkldnn: config.enable_mkldnn() config.delete_pass("scale_matmul_fuse_pass") predictor = create_predictor(config) return predictor, config
def load_predictor(model_dir, run_mode='paddle', batch_size=1, device='CPU', min_subgraph_size=3, use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, trt_calib_mode=False, cpu_threads=1, enable_mkldnn=False, enable_mkldnn_bfloat16=False, delete_shuffle_pass=False): """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8) use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt trt_calib_mode (bool): If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True delete_shuffle_pass (bool): whether to remove shuffle_channel_detect_pass in TensorRT. Used by action model. Returns: predictor (PaddlePredictor): AnalysisPredictor Raises: ValueError: predict by TensorRT need device == 'GPU'. """ if device != 'GPU' and run_mode != 'paddle': raise ValueError( "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" .format(run_mode, device)) config = Config(os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams')) if device == 'GPU': # initial GPU memory(M), device ID config.enable_use_gpu(200, 0) # optimize graph and fuse op config.switch_ir_optim(True) elif device == 'XPU': config.enable_lite_engine() config.enable_xpu(10 * 1024 * 1024) else: config.disable_gpu() config.set_cpu_math_library_num_threads(cpu_threads) if enable_mkldnn: try: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() if enable_mkldnn_bfloat16: config.enable_mkldnn_bfloat16() except Exception as e: print( "The current environment does not support `mkldnn`, so disable mkldnn." ) pass precision_map = { 'trt_int8': Config.Precision.Int8, 'trt_fp32': Config.Precision.Float32, 'trt_fp16': Config.Precision.Half } if run_mode in precision_map.keys(): config.enable_tensorrt_engine(workspace_size=(1 << 25) * batch_size, max_batch_size=batch_size, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], use_static=False, use_calib_mode=trt_calib_mode) if use_dynamic_shape: min_input_shape = { 'image': [batch_size, 3, trt_min_shape, trt_min_shape] } max_input_shape = { 'image': [batch_size, 3, trt_max_shape, trt_max_shape] } opt_input_shape = { 'image': [batch_size, 3, trt_opt_shape, trt_opt_shape] } config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, opt_input_shape) print('trt set dynamic shape done!') # disable print log when predict config.disable_glog_info() # enable shared memory config.enable_memory_optim() # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) if delete_shuffle_pass: config.delete_pass("shuffle_channel_detect_pass") predictor = create_predictor(config) return predictor, config