def load_model(self, modelpath, use_gpu): # 对运行位置进行配置 if use_gpu: try: places = os.environ["CUDA_VISIBLE_DEVICES"] places = int(places[0]) except Exception as e: print( 'Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) use_gpu = False # 加载模型参数 config = AnalysisConfig(modelpath) # 设置参数 if use_gpu: config.enable_use_gpu(100, places) else: config.disable_gpu() config.enable_mkldnn() config.disable_glog_info() config.switch_ir_optim(True) config.enable_memory_optim() config.switch_use_feed_fetch_ops(False) config.switch_specify_input_names(True) # 通过参数加载模型预测器 predictor = create_paddle_predictor(config) # 返回预测器 return predictor
def _get_analysis_config(self, use_gpu=False, use_trt=False, use_mkldnn=False): ''' Return a new object of AnalysisConfig. ''' config = AnalysisConfig(os.path.join(self.path, "model"), os.path.join(self.path, "params")) config.disable_gpu() config.switch_specify_input_names(True) config.switch_ir_optim(True) config.switch_use_feed_fetch_ops(False) if use_gpu: config.enable_use_gpu(100, 0) if use_trt: config.enable_tensorrt_engine( self.trt_parameters.workspace_size, self.trt_parameters.max_batch_size, self.trt_parameters.min_subgraph_size, self.trt_parameters.precision, self.trt_parameters.use_static, self.trt_parameters.use_calib_mode) elif use_mkldnn: config.enable_mkldnn() return config
def _set_config(self): # predictor config setting. cpu_config = AnalysisConfig(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) self.cpu_predictor = create_paddle_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: gpu_config = AnalysisConfig(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) self.gpu_predictor = create_paddle_predictor(gpu_config) # model config setting. if not self.model_config: with open(os.path.join(self.directory, 'config.yml')) as fp: self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader) self.multi_box_head_config = self.model_config['MultiBoxHead'] self.output_decoder_config = self.model_config['SSDOutputDecoder'] self.mobilenet_config = self.model_config['MobileNet']
def _set_config(self): """ predictor config setting """ self.model_file_path = self.default_pretrained_model_path cpu_config = AnalysisConfig(self.model_file_path) cpu_config.disable_glog_info() cpu_config.switch_ir_optim(True) cpu_config.enable_memory_optim() cpu_config.switch_use_feed_fetch_ops(False) cpu_config.switch_specify_input_names(True) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_paddle_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: gpu_config = AnalysisConfig(self.model_file_path) gpu_config.disable_glog_info() gpu_config.switch_ir_optim(True) gpu_config.enable_memory_optim() gpu_config.switch_use_feed_fetch_ops(False) gpu_config.switch_specify_input_names(True) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(100, 0) self.gpu_predictor = create_paddle_predictor(gpu_config)
def set_config(self, model_flie, params_file, use_feed_fetch_ops, specify_input_names): config = AnalysisConfig(model_flie, params_file) config.disable_gpu() config.enable_mkldnn() config.disable_glog_info() config.switch_ir_optim(True) config.switch_use_feed_fetch_ops(use_feed_fetch_ops) config.switch_specify_input_names(specify_input_names) return config
def set_config(model_path): config = None if os.path.exists(os.path.join(model_path, '__model__')): config = AnalysisConfig(model_path) else: config = AnalysisConfig(model_path + '/model', model_path + '/params') if test_args.use_analysis: config.switch_ir_optim(True) config.enable_mkldnn() config.set_mkldnn_cache_capacity(test_args.mkldnn_cache_capacity) config.set_cpu_math_library_num_threads(test_args.num_threads) else: config.to_native_config() return config
def create_predictor(args, mode): if mode == "det": model_dir = args.det_model_dir else: model_dir = args.rec_model_dir if model_dir is None: logger.info("not find {} model file path {}".format(mode, model_dir)) sys.exit(0) model_file_path = model_dir + "/model" params_file_path = model_dir + "/params" if not os.path.exists(model_file_path): logger.info("not find model file path {}".format(model_file_path)) sys.exit(0) if not os.path.exists(params_file_path): logger.info("not find params file path {}".format(params_file_path)) sys.exit(0) config = AnalysisConfig(model_file_path, params_file_path) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # if args.use_tensorrt: # config.enable_tensorrt_engine( # precision_mode=AnalysisConfig.Precision.Half # if args.use_fp16 else AnalysisConfig.Precision.Float32, # max_batch_size=args.batch_size) config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_paddle_predictor(config) input_names = predictor.get_input_names() input_tensor = predictor.get_input_tensor(input_names[0]) output_names = predictor.get_output_names() output_tensors = [] for output_name in output_names: output_tensor = predictor.get_output_tensor(output_name) output_tensors.append(output_tensor) return predictor, input_tensor, output_tensors
def _get_analysis_config(self, use_gpu=False, use_trt=False, use_mkldnn=False): ''' Return a new object of AnalysisConfig. ''' config = AnalysisConfig(self.path) config.disable_gpu() config.switch_specify_input_names(True) config.switch_ir_optim(True) config.switch_use_feed_fetch_ops(False) if use_gpu: config.enable_use_gpu(100, 0) if use_trt: config.enable_tensorrt_engine( self.trt_parameters.workspace_size, self.trt_parameters.max_batch_size, self.trt_parameters.min_subgraph_size, self.trt_parameters.precision, self.trt_parameters.use_static, self.trt_parameters.use_calib_mode) if self.trt_parameters.use_inspector: config.enable_tensorrt_inspector() self.assertTrue( config.tensorrt_inspector_enabled(), "The inspector option is not set correctly.") if self.dynamic_shape_params: config.set_trt_dynamic_shape_info( self.dynamic_shape_params.min_input_shape, self.dynamic_shape_params.max_input_shape, self.dynamic_shape_params.optim_input_shape, self.dynamic_shape_params.disable_trt_plugin_fp16) if self.enable_tensorrt_varseqlen: config.enable_tensorrt_varseqlen() elif use_mkldnn: config.enable_mkldnn() if self.enable_mkldnn_bfloat16: config.enable_mkldnn_bfloat16() print('config summary:', config.summary()) return config
def _set_config(self): """ predictor config setting. """ cpu_config = AnalysisConfig(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) self.cpu_predictor = create_paddle_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: gpu_config = AnalysisConfig(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) self.gpu_predictor = create_paddle_predictor(gpu_config)
def create_predictor(args): config = AnalysisConfig(args.model_file, args.params_file) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # default true if args.use_tensorrt: config.enable_tensorrt_engine( precision_mode=AnalysisConfig.Precision.Half if args.use_fp16 else AnalysisConfig.Precision.Float32, max_batch_size=args.batch_size) config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_paddle_predictor(config) return predictor
def _load_model_and_set_config(self): ''' load model from file and set analysis config ''' if os.path.exists(os.path.join(self.model_path, self.params_file)): config = AnalysisConfig( os.path.join(self.model_path, "__model__"), os.path.join(self.model_path, self.params_file)) else: config = AnalysisConfig(os.path.join(self.model_path)) if fluid.is_compiled_with_cuda(): config.enable_use_gpu(100, 0) else: config.disable_gpu() config.switch_specify_input_names(True) config.switch_use_feed_fetch_ops(False) config.enable_memory_optim() config.disable_glog_info() config.switch_ir_optim(True) return config
def set_config(self, model_path, num_threads, mkldnn_cache_capacity, warmup_data=None, use_analysis=False, enable_ptq=False): config = AnalysisConfig(model_path) config.set_cpu_math_library_num_threads(num_threads) if use_analysis: config.disable_gpu() config.switch_use_feed_fetch_ops(True) config.switch_ir_optim(True) config.enable_mkldnn() config.set_mkldnn_cache_capacity(mkldnn_cache_capacity) if enable_ptq: # This pass to work properly, must be added before fc_fuse_pass config.pass_builder().insert_pass(5, "fc_lstm_fuse_pass") config.enable_quantizer() config.quantizer_config().set_quant_data(warmup_data) config.quantizer_config().set_quant_batch_size(1) return config
def _load_model_and_set_config(self): ''' load model from file and set analysis config ''' if os.path.exists(os.path.join(self.model_path, self.params_file)): config = AnalysisConfig( os.path.join(self.model_path, self.model_file), os.path.join(self.model_path, self.params_file)) else: config = AnalysisConfig(os.path.join(self.model_path)) if fluid.is_compiled_with_cuda(): config.enable_use_gpu(100, 0) else: config.disable_gpu() config.switch_specify_input_names(True) config.switch_use_feed_fetch_ops(False) config.enable_memory_optim() config.disable_glog_info() # TODO: set it to True after PaddleInference fix the precision error # in CUDA11 config.switch_ir_optim(False) return config
def set_config_ptq(model_path, warmup_data): config = None if os.path.exists(os.path.join(model_path, '__model__')): config = AnalysisConfig(model_path) else: config = AnalysisConfig(model_path + '/model', model_path + '/params') config.switch_ir_optim(True) # This pass must be added before fc_fuse_pass to work properly config.pass_builder().insert_pass(5, "fc_lstm_fuse_pass") config.pass_builder().append_pass("fc_mkldnn_pass") config.enable_mkldnn() config.set_mkldnn_cache_capacity(test_args.mkldnn_cache_capacity) config.set_cpu_math_library_num_threads(test_args.num_threads) config.enable_quantizer() config.quantizer_config().set_quant_data(warmup_data) config.quantizer_config().set_quant_batch_size(1) ops_to_quantize = set() if len(test_args.ops_to_quantize) > 0: ops_to_quantize = set(test_args.ops_to_quantize.split(',')) config.quantizer_config().set_enabled_op_types(ops_to_quantize) return config
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError("args 'init_checkpoint' should be set for prediction!") assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) log.info("save inference model to %s" % model_path) fluid.io.save_inference_model( model_path, feed_target_names, [probs], exe, main_program=predict_prog) # Set config #config = AnalysisConfig(args.model_dir) #config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, "")) config = AnalysisConfig(model_path) if not args.use_cuda: log.info("disable gpu") config.disable_gpu() config.switch_ir_optim(True) else: log.info("using gpu") config.enable_use_gpu(1024) # Create PaddlePredictor predictor = create_paddle_predictor(config) predict_data_generator = reader.data_generator( input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) log.info("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) index = 0 total_time = 0 for sample in predict_data_generator(): src_ids = sample[0] sent_ids = sample[1] pos_ids = sample[2] task_ids = sample[3] input_mask = sample[4] inputs = [array2tensor(ndarray) for ndarray in [src_ids, sent_ids, pos_ids, input_mask]] begin_time = time.time() outputs = predictor.run(inputs) end_time = time.time() total_time += end_time - begin_time # parse outputs output = outputs[0] batch_result = output.as_ndarray() for single_example_probs in batch_result: print('\t'.join(map(str, single_example_probs.tolist()))) index += 1 log.info("qps:{}\ttotal_time:{}\ttotal_example:{}\tbatch_size:{}".format(index/total_time, total_time, index, args.batch_size))
def main(args): reader = XWReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) # 如果存在inference_model 则不会再去save一次 if os.path.isdir(model_path): log.info("{} already exist,directly load".format(model_path)) else: ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError("args 'init_checkpoint' should be set for prediction!") log.info("save inference model to %s" % model_path) fluid.io.save_inference_model( model_path, feed_target_names, [probs], exe, main_program=predict_prog) # Set config # config = AnalysisConfig(args.model_dir) # config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, "")) config = AnalysisConfig(model_path) if not args.use_cuda: log.info("disable gpu") config.disable_gpu() config.switch_ir_optim(True) else: log.info("using gpu") config.enable_use_gpu(1024) # Create PaddlePredictor predictor = create_paddle_predictor(config) return reader, predictor