def create_paddle_predictor(args): config = Config(args.model_file, args.params_file) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() if args.enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() config.set_cpu_math_library_num_threads(args.cpu_num_threads) if args.enable_profile: config.enable_profile() config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # default true if args.use_tensorrt: config.enable_tensorrt_engine( precision_mode=Config.Precision.Half if args.use_fp16 else Config.Precision.Float32, max_batch_size=args.batch_size) config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor
def create_paddle_predictor(self, args, inference_model_dir=None): if inference_model_dir is None: inference_model_dir = args.inference_model_dir params_file = os.path.join(inference_model_dir, "inference.pdiparams") model_file = os.path.join(inference_model_dir, "inference.pdmodel") config = Config(model_file, params_file) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() if args.enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() config.set_cpu_math_library_num_threads(args.cpu_num_threads) if args.enable_profile: config.enable_profile() config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # default true if args.use_tensorrt: config.enable_tensorrt_engine( precision_mode=Config.Precision.Half if args.use_fp16 else Config.Precision.Float32, max_batch_size=args.batch_size, min_subgraph_size=30) config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor, config
def init_model(self): from paddle.inference import Config from paddle.inference import PrecisionType from paddle.inference import create_predictor precision_mode = PrecisionType.Float32 use_calib_mode = False if self.param_type == "fp16": precision_mode = PrecisionType.Half elif self.param_type == "int8": precision_mode = PrecisionType.Int8 use_calib_mode = True mode_path = os.path.join(self.model_dir,"yolov3/__model__") param_path = os.path.join(self.model_dir,"yolov3/__params__") config = Config(mode_path, param_path) config.enable_use_gpu(100, 0) config.switch_ir_optim(True) size = (self.batch_size * 101) << 20 config.enable_tensorrt_engine( workspace_size= size, max_batch_size=self.batch_size, min_subgraph_size=3, precision_mode=precision_mode, use_static=False, use_calib_mode=use_calib_mode) if not self.debug: config.disable_glog_info() else: config.enable_profile() config.enable_memory_optim() config.switch_use_feed_fetch_ops(False) config.enable_mkldnn() #exit(1) self.predictor = create_predictor(config)