示例#1
0
    def load_model(self, modelpath, use_gpu):
        # 对运行位置进行配置
        if use_gpu:
            try:
                places = os.environ["CUDA_VISIBLE_DEVICES"]
                places = int(places[0])
            except Exception as e:
                print(
                    'Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".'
                    % e)
                use_gpu = False

        # 加载模型参数
        config = AnalysisConfig(modelpath)

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, places)
        else:
            config.disable_gpu()
            config.enable_mkldnn()
        config.disable_glog_info()
        config.switch_ir_optim(True)
        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.switch_specify_input_names(True)

        # 通过参数加载模型预测器
        predictor = create_paddle_predictor(config)

        # 返回预测器
        return predictor
示例#2
0
    def _get_analysis_config(self,
                             use_gpu=False,
                             use_trt=False,
                             use_mkldnn=False):
        '''
        Return a new object of AnalysisConfig. 
        '''
        config = AnalysisConfig(os.path.join(self.path, "model"),
                                os.path.join(self.path, "params"))
        config.disable_gpu()
        config.switch_specify_input_names(True)
        config.switch_ir_optim(True)
        config.switch_use_feed_fetch_ops(False)
        if use_gpu:
            config.enable_use_gpu(100, 0)
            if use_trt:
                config.enable_tensorrt_engine(
                    self.trt_parameters.workspace_size,
                    self.trt_parameters.max_batch_size,
                    self.trt_parameters.min_subgraph_size,
                    self.trt_parameters.precision,
                    self.trt_parameters.use_static,
                    self.trt_parameters.use_calib_mode)
        elif use_mkldnn:
            config.enable_mkldnn()

        return config
示例#3
0
    def _set_config(self):
        # predictor config setting.
        cpu_config = AnalysisConfig(self.default_pretrained_model_path)
        cpu_config.disable_glog_info()
        cpu_config.disable_gpu()
        cpu_config.switch_ir_optim(False)
        self.cpu_predictor = create_paddle_predictor(cpu_config)

        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False
        if use_gpu:
            gpu_config = AnalysisConfig(self.default_pretrained_model_path)
            gpu_config.disable_glog_info()
            gpu_config.enable_use_gpu(memory_pool_init_size_mb=500,
                                      device_id=0)
            self.gpu_predictor = create_paddle_predictor(gpu_config)

        # model config setting.
        if not self.model_config:
            with open(os.path.join(self.directory, 'config.yml')) as fp:
                self.model_config = yaml.load(fp.read(),
                                              Loader=yaml.FullLoader)

        self.multi_box_head_config = self.model_config['MultiBoxHead']
        self.output_decoder_config = self.model_config['SSDOutputDecoder']
        self.mobilenet_config = self.model_config['MobileNet']
示例#4
0
    def _set_config(self):
        """
        predictor config setting
        """
        self.model_file_path = self.default_pretrained_model_path
        cpu_config = AnalysisConfig(self.model_file_path)
        cpu_config.disable_glog_info()
        cpu_config.switch_ir_optim(True)
        cpu_config.enable_memory_optim()
        cpu_config.switch_use_feed_fetch_ops(False)
        cpu_config.switch_specify_input_names(True)
        cpu_config.disable_glog_info()
        cpu_config.disable_gpu()
        self.cpu_predictor = create_paddle_predictor(cpu_config)

        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False
        if use_gpu:
            gpu_config = AnalysisConfig(self.model_file_path)
            gpu_config.disable_glog_info()
            gpu_config.switch_ir_optim(True)
            gpu_config.enable_memory_optim()
            gpu_config.switch_use_feed_fetch_ops(False)
            gpu_config.switch_specify_input_names(True)
            gpu_config.disable_glog_info()
            gpu_config.enable_use_gpu(100, 0)
            self.gpu_predictor = create_paddle_predictor(gpu_config)
 def set_config(self, model_flie, params_file, use_feed_fetch_ops,
                specify_input_names):
     config = AnalysisConfig(model_flie, params_file)
     config.disable_gpu()
     config.enable_mkldnn()
     config.disable_glog_info()
     config.switch_ir_optim(True)
     config.switch_use_feed_fetch_ops(use_feed_fetch_ops)
     config.switch_specify_input_names(specify_input_names)
     return config
def set_config(model_path):
    config = None
    if os.path.exists(os.path.join(model_path, '__model__')):
        config = AnalysisConfig(model_path)
    else:
        config = AnalysisConfig(model_path + '/model', model_path + '/params')
    if test_args.use_analysis:
        config.switch_ir_optim(True)
        config.enable_mkldnn()
        config.set_mkldnn_cache_capacity(test_args.mkldnn_cache_capacity)
        config.set_cpu_math_library_num_threads(test_args.num_threads)
    else:
        config.to_native_config()

    return config
示例#7
0
def create_predictor(args, mode):
    if mode == "det":
        model_dir = args.det_model_dir
    else:
        model_dir = args.rec_model_dir

    if model_dir is None:
        logger.info("not find {} model file path {}".format(mode, model_dir))
        sys.exit(0)
    model_file_path = model_dir + "/model"
    params_file_path = model_dir + "/params"
    if not os.path.exists(model_file_path):
        logger.info("not find model file path {}".format(model_file_path))
        sys.exit(0)
    if not os.path.exists(params_file_path):
        logger.info("not find params file path {}".format(params_file_path))
        sys.exit(0)

    config = AnalysisConfig(model_file_path, params_file_path)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()

    config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)
    #     if args.use_tensorrt:
    #         config.enable_tensorrt_engine(
    #             precision_mode=AnalysisConfig.Precision.Half
    #             if args.use_fp16 else AnalysisConfig.Precision.Float32,
    #             max_batch_size=args.batch_size)

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_paddle_predictor(config)
    input_names = predictor.get_input_names()
    input_tensor = predictor.get_input_tensor(input_names[0])
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
        output_tensor = predictor.get_output_tensor(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors
示例#8
0
    def _get_analysis_config(self,
                             use_gpu=False,
                             use_trt=False,
                             use_mkldnn=False):
        '''
        Return a new object of AnalysisConfig. 
        '''
        config = AnalysisConfig(self.path)
        config.disable_gpu()
        config.switch_specify_input_names(True)
        config.switch_ir_optim(True)
        config.switch_use_feed_fetch_ops(False)
        if use_gpu:
            config.enable_use_gpu(100, 0)
            if use_trt:
                config.enable_tensorrt_engine(
                    self.trt_parameters.workspace_size,
                    self.trt_parameters.max_batch_size,
                    self.trt_parameters.min_subgraph_size,
                    self.trt_parameters.precision,
                    self.trt_parameters.use_static,
                    self.trt_parameters.use_calib_mode)
                if self.trt_parameters.use_inspector:
                    config.enable_tensorrt_inspector()
                    self.assertTrue(
                        config.tensorrt_inspector_enabled(),
                        "The inspector option is not set correctly.")

                if self.dynamic_shape_params:
                    config.set_trt_dynamic_shape_info(
                        self.dynamic_shape_params.min_input_shape,
                        self.dynamic_shape_params.max_input_shape,
                        self.dynamic_shape_params.optim_input_shape,
                        self.dynamic_shape_params.disable_trt_plugin_fp16)
                if self.enable_tensorrt_varseqlen:
                    config.enable_tensorrt_varseqlen()

        elif use_mkldnn:
            config.enable_mkldnn()
            if self.enable_mkldnn_bfloat16:
                config.enable_mkldnn_bfloat16()
        print('config summary:', config.summary())
        return config
示例#9
0
    def _set_config(self):
        """
        predictor config setting.
        """
        cpu_config = AnalysisConfig(self.default_pretrained_model_path)
        cpu_config.disable_glog_info()
        cpu_config.disable_gpu()
        cpu_config.switch_ir_optim(False)
        self.cpu_predictor = create_paddle_predictor(cpu_config)

        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False
        if use_gpu:
            gpu_config = AnalysisConfig(self.default_pretrained_model_path)
            gpu_config.disable_glog_info()
            gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
            self.gpu_predictor = create_paddle_predictor(gpu_config)
示例#10
0
def create_predictor(args):
    config = AnalysisConfig(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()

    config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)  # default true
    if args.use_tensorrt:
        config.enable_tensorrt_engine(
            precision_mode=AnalysisConfig.Precision.Half
            if args.use_fp16 else AnalysisConfig.Precision.Float32,
            max_batch_size=args.batch_size)

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_paddle_predictor(config)

    return predictor
示例#11
0
    def _load_model_and_set_config(self):
        '''
        load model from file and set analysis config 
        '''
        if os.path.exists(os.path.join(self.model_path, self.params_file)):
            config = AnalysisConfig(
                os.path.join(self.model_path, "__model__"),
                os.path.join(self.model_path, self.params_file))
        else:
            config = AnalysisConfig(os.path.join(self.model_path))

        if fluid.is_compiled_with_cuda():
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
        config.switch_specify_input_names(True)
        config.switch_use_feed_fetch_ops(False)
        config.enable_memory_optim()
        config.disable_glog_info()
        config.switch_ir_optim(True)

        return config
示例#12
0
 def set_config(self,
                model_path,
                num_threads,
                mkldnn_cache_capacity,
                warmup_data=None,
                use_analysis=False,
                enable_ptq=False):
     config = AnalysisConfig(model_path)
     config.set_cpu_math_library_num_threads(num_threads)
     if use_analysis:
         config.disable_gpu()
         config.switch_use_feed_fetch_ops(True)
         config.switch_ir_optim(True)
         config.enable_mkldnn()
         config.set_mkldnn_cache_capacity(mkldnn_cache_capacity)
         if enable_ptq:
             # This pass to work properly, must be added before fc_fuse_pass
             config.pass_builder().insert_pass(5, "fc_lstm_fuse_pass")
             config.enable_quantizer()
             config.quantizer_config().set_quant_data(warmup_data)
             config.quantizer_config().set_quant_batch_size(1)
     return config
示例#13
0
    def _load_model_and_set_config(self):
        '''
        load model from file and set analysis config 
        '''
        if os.path.exists(os.path.join(self.model_path, self.params_file)):
            config = AnalysisConfig(
                os.path.join(self.model_path, self.model_file),
                os.path.join(self.model_path, self.params_file))
        else:
            config = AnalysisConfig(os.path.join(self.model_path))

        if fluid.is_compiled_with_cuda():
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
        config.switch_specify_input_names(True)
        config.switch_use_feed_fetch_ops(False)
        config.enable_memory_optim()
        config.disable_glog_info()
        # TODO: set it to True after PaddleInference fix the precision error
        # in CUDA11
        config.switch_ir_optim(False)

        return config
def set_config_ptq(model_path, warmup_data):
    config = None
    if os.path.exists(os.path.join(model_path, '__model__')):
        config = AnalysisConfig(model_path)
    else:
        config = AnalysisConfig(model_path + '/model', model_path + '/params')
    config.switch_ir_optim(True)
    # This pass must be added before fc_fuse_pass to work properly
    config.pass_builder().insert_pass(5, "fc_lstm_fuse_pass")
    config.pass_builder().append_pass("fc_mkldnn_pass")

    config.enable_mkldnn()
    config.set_mkldnn_cache_capacity(test_args.mkldnn_cache_capacity)
    config.set_cpu_math_library_num_threads(test_args.num_threads)

    config.enable_quantizer()
    config.quantizer_config().set_quant_data(warmup_data)
    config.quantizer_config().set_quant_batch_size(1)
    ops_to_quantize = set()
    if len(test_args.ops_to_quantize) > 0:
        ops_to_quantize = set(test_args.ops_to_quantize.split(','))
    config.quantizer_config().set_enabled_op_types(ops_to_quantize)

    return config
def main(args):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    reader = ClassifyReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=False,
        is_inference=True)

    predict_prog = fluid.Program()
    predict_startup = fluid.Program()
    with fluid.program_guard(predict_prog, predict_startup):
        with fluid.unique_name.guard():
            predict_pyreader, probs, feed_target_names = create_model(
                args,
                pyreader_name='predict_reader',
                ernie_config=ernie_config,
                is_classify=True,
                is_prediction=True)

    predict_prog = predict_prog.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(predict_startup)

    if args.init_checkpoint:
        init_pretraining_params(exe, args.init_checkpoint, predict_prog)
    else:
        raise ValueError("args 'init_checkpoint' should be set for prediction!")

    assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction"
    _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/'))
    dir_name = ckpt_dir + '_inference_model'
    model_path = os.path.join(args.save_inference_model_path, dir_name)
    log.info("save inference model to %s" % model_path)
    fluid.io.save_inference_model(
        model_path,
        feed_target_names, [probs],
        exe,
        main_program=predict_prog)

    # Set config
    #config = AnalysisConfig(args.model_dir)
    #config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, ""))
    config = AnalysisConfig(model_path)
    if not args.use_cuda:
        log.info("disable gpu")
        config.disable_gpu()
        config.switch_ir_optim(True) 
    else:
        log.info("using gpu")
        config.enable_use_gpu(1024)

    # Create PaddlePredictor
    predictor = create_paddle_predictor(config)

    predict_data_generator = reader.data_generator(
        input_file=args.predict_set,
        batch_size=args.batch_size,
        epoch=1,
        shuffle=False)

    log.info("-------------- prediction results --------------")
    np.set_printoptions(precision=4, suppress=True)
    index = 0
    total_time = 0
    for sample in predict_data_generator():
        src_ids    = sample[0]
        sent_ids   = sample[1]
        pos_ids    = sample[2]
        task_ids   = sample[3]
        input_mask = sample[4]

        inputs = [array2tensor(ndarray) for ndarray in [src_ids, sent_ids, pos_ids, input_mask]]
        begin_time = time.time()
        outputs = predictor.run(inputs)
        end_time = time.time()
        total_time += end_time - begin_time

        # parse outputs
        output = outputs[0]
        batch_result  = output.as_ndarray()
        for single_example_probs in batch_result:
            print('\t'.join(map(str, single_example_probs.tolist())))
            index += 1
    log.info("qps:{}\ttotal_time:{}\ttotal_example:{}\tbatch_size:{}".format(index/total_time, total_time, index, args.batch_size))
def main(args):
    reader = XWReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=False,
        is_inference=True)

    assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction"
    _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/'))
    dir_name = ckpt_dir + '_inference_model'
    model_path = os.path.join(args.save_inference_model_path, dir_name)

    # 如果存在inference_model 则不会再去save一次
    if os.path.isdir(model_path):
        log.info("{} already exist,directly load".format(model_path))
    else:
        ernie_config = ErnieConfig(args.ernie_config_path)
        ernie_config.print_config()
        predict_prog = fluid.Program()
        predict_startup = fluid.Program()
        with fluid.program_guard(predict_prog, predict_startup):
            with fluid.unique_name.guard():
                predict_pyreader, probs, feed_target_names = create_model(
                    args,
                    pyreader_name='predict_reader',
                    ernie_config=ernie_config,
                    is_classify=True,
                    is_prediction=True)

        predict_prog = predict_prog.clone(for_test=True)

        if args.use_cuda:
            place = fluid.CUDAPlace(0)
            dev_count = fluid.core.get_cuda_device_count()
        else:
            place = fluid.CPUPlace()
            dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

        place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(predict_startup)

        if args.init_checkpoint:
            init_pretraining_params(exe, args.init_checkpoint, predict_prog)
        else:
            raise ValueError("args 'init_checkpoint' should be set for prediction!")

        log.info("save inference model to %s" % model_path)
        fluid.io.save_inference_model(
            model_path,
            feed_target_names, [probs],
            exe,
            main_program=predict_prog)

    # Set config
    # config = AnalysisConfig(args.model_dir)
    # config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, ""))
    config = AnalysisConfig(model_path)
    if not args.use_cuda:
        log.info("disable gpu")
        config.disable_gpu()
        config.switch_ir_optim(True)
    else:
        log.info("using gpu")
        config.enable_use_gpu(1024)

    # Create PaddlePredictor
    predictor = create_paddle_predictor(config)

    return reader, predictor