def init_predictor(args): if args.model_dir is not "": config = Config(args.model_dir) else: config = Config(args.model_file, args.params_file) config.enable_memory_optim() if args.tune: config.collect_shape_range_info(shape_file) if args.use_gpu: config.enable_use_gpu(1000, 0) if args.use_trt: # using dynamic shpae mode, the max_batch_size will be ignored. config.enable_tensorrt_engine(workspace_size=1 << 30, max_batch_size=1, min_subgraph_size=5, precision_mode=PrecisionType.Float32, use_static=False, use_calib_mode=False) if args.tuned_dynamic_shape: config.enable_tuned_tensorrt_dynamic_shape(shape_file, True) else: # If not specific mkldnn, you can set the blas thread. # The thread num should not be greater than the number of cores in the CPU. config.set_cpu_math_library_num_threads(4) config.enable_mkldnn() predictor = create_predictor(config) return predictor
def auto_tune(args, dataset, img_nums): """ Use images to auto tune the dynamic shape for trt sub graph. The tuned shape saved in args.auto_tuned_shape_file. Args: args(dict): input args. dataset(dataset): an dataset. img_nums(int): the nums of images used for auto tune. Returns: None """ logger.info("Auto tune the dynamic shape for GPU TRT.") assert use_auto_tune(args) num = min(len(dataset), img_nums) cfg = DeployConfig(args.cfg) pred_cfg = PredictConfig(cfg.model, cfg.params) pred_cfg.enable_use_gpu(100, 0) if not args.print_detail: pred_cfg.disable_glog_info() pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file) predictor = create_predictor(pred_cfg) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) for idx, (img, _) in enumerate(dataset): data = np.array([img]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) try: predictor.run() except: logger.info( "Auto tune fail. Usually, the error is out of GPU memory, " "because the model and image is too large. \n") del predictor if os.path.exists(args.auto_tuned_shape_file): os.remove(args.auto_tuned_shape_file) return if idx + 1 >= num: break logger.info("Auto tune success.\n")
def auto_tune(args, imgs, img_nums): """ Use images to auto tune the dynamic shape for trt sub graph. The tuned shape saved in args.auto_tuned_shape_file. Args: args(dict): input args. imgs(str, list[str]): the path for images. img_nums(int): the nums of images used for auto tune. Returns: None """ logger.info("Auto tune the dynamic shape for GPU TRT.") assert use_auto_tune(args) if not isinstance(imgs, (list, tuple)): imgs = [imgs] num = min(len(imgs), img_nums) cfg = DeployConfig(args.cfg) pred_cfg = PredictConfig(cfg.model, cfg.params) pred_cfg.enable_use_gpu(100, 0) if not args.print_detail: pred_cfg.disable_glog_info() pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file) predictor = create_predictor(pred_cfg) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) for i in range(0, num): data = np.array([cfg.transforms(imgs[i])[0]]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) try: predictor.run() except: logger.info( "Auto tune fail. Usually, the error is out of GPU memory, " "because the model and image is too large. \n") del predictor if os.path.exists(args.auto_tuned_shape_file): os.remove(args.auto_tuned_shape_file) return logger.info("Auto tune success.\n")
def collect_dynamic_shape(args): if not is_support_collecting(): logger.error("The Paddle does not support collecting dynamic shape, " \ "please reinstall the PaddlePaddle (latest gpu version).") # prepare config cfg = DeployConfig(args.config) pred_cfg = PredictConfig(cfg.model, cfg.params) pred_cfg.enable_use_gpu(1000, 0) pred_cfg.collect_shape_range_info(args.dynamic_shape_path) # create predictor predictor = create_predictor(pred_cfg) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) # get images img_path_list, _ = get_image_list(args.image_path) if not isinstance(img_path_list, (list, tuple)): img_path_list = [img_path_list] logger.info(f"The num of images is {len(img_path_list)} \n") # collect progbar_val = progbar.Progbar(target=len(img_path_list)) for idx, img_path in enumerate(img_path_list): data = np.array([cfg.transforms(img_path)[0]]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) try: predictor.run() except: logger.info( "Fail to collect dynamic shape. Usually, the error is out of " "GPU memory, for the model and image are too large.\n") del predictor if os.path.exists(args.dynamic_shape_path): os.remove(args.dynamic_shape_path) progbar_val.update(idx + 1) logger.info(f"The dynamic shape is save in {args.dynamic_shape_path}")
def get_config(self, model, params, tuned=False): config = Config() config.set_model_buffer(model, len(model), params, len(params)) config.enable_use_gpu(100, 0) config.set_optim_cache_dir('tuned_test') if tuned: config.collect_shape_range_info('shape_range.pbtxt') else: config.enable_tensorrt_engine( workspace_size=1024, max_batch_size=1, min_subgraph_size=0, precision_mode=paddle.inference.PrecisionType.Float32, use_static=True, use_calib_mode=False) config.enable_tuned_tensorrt_dynamic_shape('shape_range.pbtxt', True) return config