示例#1
0
    def __init__(self, cfg, name='BMN'):
        name = name.upper()
        self.name = name
        model_file = cfg[name]['model_file']
        params_file = cfg[name]['params_file']
        gpu_mem = cfg[name]['gpu_mem']
        device_id = cfg[name]['device_id']

        self.nms_thread = cfg[name]['nms_thread']
        self.min_pred_score = cfg[name]['score_thread']
        self.min_frame_thread = cfg['COMMON']['fps']

        # model init
        config = Config(model_file, params_file)
        config.enable_use_gpu(gpu_mem, device_id)
        config.switch_ir_optim(True)  # default true
        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        self.predictor = create_predictor(config)

        input_names = self.predictor.get_input_names()
        self.input_tensor = self.predictor.get_input_handle(input_names[0])

        output_names = self.predictor.get_output_names()
        self.output1_tensor = self.predictor.get_output_handle(output_names[0])
        self.output2_tensor = self.predictor.get_output_handle(output_names[1])
        self.output3_tensor = self.predictor.get_output_handle(output_names[2])
示例#2
0
    def load_predictor(self, model_file, params_file):
        config = Config(model_file, params_file)
        if self.predictor_config["use_gpu"]:
            config.enable_use_gpu(200, 0)
            config.switch_ir_optim(True)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(
                self.predictor_config["cpu_threads"])

            if self.predictor_config["enable_mkldnn"]:
                try:
                    # cache 10 different shapes for mkldnn to avoid memory leak
                    config.set_mkldnn_cache_capacity(10)
                    config.enable_mkldnn()
                except Exception as e:
                    logging.error(
                        "The current environment does not support `mkldnn`, so disable mkldnn."
                    )
        config.disable_glog_info()
        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)
        input_names = predictor.get_input_names()
        output_names = predictor.get_output_names()
        return predictor, input_names, output_names
示例#3
0
    def __init__(self, cfg, name='ACTION'):
        name = name.upper()
        self.name = name
        model_file = cfg[name]['model_file']
        params_file = cfg[name]['params_file']
        gpu_mem = cfg[name]['gpu_mem']
        device_id = cfg[name]['device_id']

        self.topk = cfg[name]['topk']
        self.frame_offset = cfg[name]['nms_offset']
        self.nms_thread = cfg[name]['nms_thread']
        self.cls_thread = cfg[name]['classify_score_thread']
        self.iou_thread = cfg[name]['iou_score_thread']

        self.label_map_file = cfg['COMMON']['label_dic']
        self.fps = cfg['COMMON']['fps']
        self.nms_id = 5

        # model init
        config = Config(model_file, params_file)
        config.enable_use_gpu(gpu_mem, device_id)
        config.switch_ir_optim(True)  # default true
        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        self.predictor = create_predictor(config)

        input_names = self.predictor.get_input_names()
        self.input1_tensor = self.predictor.get_input_handle(input_names[0])
        #self.input2_tensor = self.predictor.get_input_handle(input_names[1])

        output_names = self.predictor.get_output_names()
        self.output1_tensor = self.predictor.get_output_handle(output_names[0])
        self.output2_tensor = self.predictor.get_output_handle(output_names[1])
示例#4
0
def create_paddle_predictor(args):
    config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()
        if args.enable_mkldnn:
            # cache 10 different shapes for mkldnn to avoid memory leak
            config.set_mkldnn_cache_capacity(10)
            config.enable_mkldnn()

    #config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)  # default true
    if args.use_tensorrt:
        config.enable_tensorrt_engine(
            precision_mode=Config.Precision.Half
            if args.use_fp16 else Config.Precision.Float32,
            max_batch_size=args.batch_size)

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)

    return predictor
示例#5
0
    def __init__(self, cfg, name='PPTSM'): 
        name = name.upper()
        self.name           = name
        model_file          = cfg[name]['model_file']
        params_file         = cfg[name]['params_file']
        gpu_mem             = cfg[name]['gpu_mem']
        device_id           = cfg[name]['device_id']

        # model init
        config = Config(model_file, params_file)
        config.enable_use_gpu(gpu_mem, device_id)
        config.switch_ir_optim(True)  # default true
        config.enable_memory_optim()

        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        self.predictor = create_predictor(config)

        input_names = self.predictor.get_input_names()
        self.input_tensor = self.predictor.get_input_handle(input_names[0])

        output_names = self.predictor.get_output_names()
        print("output_names = ", output_names)
        #self.output_tensor = self.predictor.get_output_handle(output_names[1])
        self.output_tensor = self.predictor.get_output_handle(output_names[0])
示例#6
0
    def create_paddle_predictor(self, args, inference_model_dir=None):
        if inference_model_dir is None:
            inference_model_dir = args.inference_model_dir
        params_file = os.path.join(inference_model_dir, "inference.pdiparams")
        model_file = os.path.join(inference_model_dir, "inference.pdmodel")
        config = Config(model_file, params_file)

        if args.use_gpu:
            config.enable_use_gpu(args.gpu_mem, 0)
        else:
            config.disable_gpu()
            if args.enable_mkldnn:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
        config.set_cpu_math_library_num_threads(args.cpu_num_threads)

        if args.enable_profile:
            config.enable_profile()
        config.disable_glog_info()
        config.switch_ir_optim(args.ir_optim)  # default true
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
                precision_mode=Config.Precision.Half
                if args.use_fp16 else Config.Precision.Float32,
                max_batch_size=args.batch_size,
                min_subgraph_size=30)

        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)

        return predictor, config
示例#7
0
    def load_model(self,
                   model_dir,
                   use_gpu=False,
                   enable_mkldnn=False,
                   cpu_threads=1):
        model = os.path.join(model_dir, '__model__')
        params = os.path.join(model_dir, '__params__')
        config = Config(model, params)

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(cpu_threads)
            if enable_mkldnn:
                config.enable_mkldnn()
                config.set_mkldnn_cache_capacity(10)

        config.disable_glog_info()
        config.switch_ir_optim(True)
        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.switch_specify_input_names(True)

        # 通过参数加载模型预测器
        predictor = create_predictor(config)

        # 获取模型的输入输出
        input_names = predictor.get_input_names()
        output_names = predictor.get_output_names()
        input_handle = predictor.get_input_handle(input_names[0])
        output_handle = predictor.get_output_handle(output_names[0])

        return predictor, input_handle, output_handle
def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   use_gpu=False,
                   min_subgraph_size=3):
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
        use_gpu (bool): whether use gpu
    Returns:
        predictor (PaddlePredictor): AnalysisPredictor
    Raises:
        ValueError: predict by TensorRT need use_gpu == True.
    """
    if not use_gpu and not run_mode == 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
            .format(run_mode, use_gpu))
    if run_mode == 'trt_int8':
        raise ValueError("TensorRT int8 mode is not supported now, "
                         "please use trt_fp32 or trt_fp16 instead.")
    config = Config(os.path.join(model_dir, 'model.pdmodel'),
                    os.path.join(model_dir, 'model.pdiparams'))
    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if use_gpu:
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        # FIXME(dkp): ir optimize may prune variable inside graph
        #             and incur error in Paddle 2.0, e.g. in SSDLite
        #             FCOS model, set as False currently and should
        #             be set as True after switch_ir_optim fixed
        config.switch_ir_optim(False)
    else:
        config.disable_gpu()

    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 10,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=False)

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor
示例#9
0
def create_paddle_predictor(args):
    config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()

    if args.use_mkldnn:
        config.enable_mkldnn()
        config.set_cpu_math_library_num_threads(args.cpu_num_threads)
        config.set_mkldnn_cache_capacity(100)

    config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)  # default true

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)

    return predictor
    def init_model(self):
        from paddle.inference import Config
        from paddle.inference import PrecisionType
        from paddle.inference import create_predictor

        precision_mode = PrecisionType.Float32
        use_calib_mode = False

        if self.param_type == "fp16":
            precision_mode = PrecisionType.Half
        elif self.param_type == "int8":
            precision_mode = PrecisionType.Int8
            use_calib_mode = True

        mode_path = os.path.join(self.model_dir,"yolov3/__model__")
        param_path = os.path.join(self.model_dir,"yolov3/__params__")
        config = Config(mode_path, param_path)
        config.enable_use_gpu(100, 0)
        config.switch_ir_optim(True)
        size = (self.batch_size * 101) << 20
        config.enable_tensorrt_engine(
            workspace_size= size,
            max_batch_size=self.batch_size,
            min_subgraph_size=3,
            precision_mode=precision_mode,
            use_static=False,
            use_calib_mode=use_calib_mode)
        if not self.debug:
            config.disable_glog_info()
        else:
            config.enable_profile()

        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.enable_mkldnn()
        #exit(1)
        self.predictor = create_predictor(config)
示例#11
0
    def load_model(self, modelpath, use_gpu, use_mkldnn, combined):
        # 对运行位置进行配置
        if use_gpu:
            try:
                int(os.environ.get('CUDA_VISIBLE_DEVICES'))
            except Exception:
                print(
                    'Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.'
                )
                use_gpu = False

        # 加载模型参数
        if combined:
            model = os.path.join(modelpath, "__model__")
            params = os.path.join(modelpath, "__params__")
            config = Config(model, params)
        else:
            config = Config(modelpath)

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
            if use_mkldnn:
                config.enable_mkldnn()
        config.disable_glog_info()
        config.switch_ir_optim(True)
        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.switch_specify_input_names(True)

        # 通过参数加载模型预测器
        predictor = create_predictor(config)

        # 返回预测器
        return predictor
示例#12
0
    return parser.parse_args()

if __name__ == '__main__':
    args = parse_args()
    assert (args.model_dir != "") or \
            (args.model_file != "" and args.params_file != ""), \
            "Set model path error."
    assert args.img_path != "", "Set img_path error."
    
    # Init config
    if args.model_dir == "":
        config = Config(args.model_file, args.params_file)
    else:
        config = Config(args.model_dir)
    config.enable_use_gpu(500, 0)
    config.switch_ir_optim()
    config.enable_memory_optim()
    config.enable_tensorrt_engine(workspace_size=1 << 30, precision_mode=PrecisionType.Float32,max_batch_size=1, min_subgraph_size=5, use_static=False, use_calib_mode=False)
        
    # Create predictor
    predictor = create_predictor(config)

    # Set input
    img = cv2.imread(args.img_path)
    img = preprocess(img)
    input_names = predictor.get_input_names()
    input_tensor = predictor.get_input_handle(input_names[0])
    input_tensor.reshape(img.shape)
    input_tensor.copy_from_cpu(img.copy())

    # Run
示例#13
0
class Predictor:
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()
        self._init_cpu_config()

        self.predictor = create_predictor(self.pred_cfg)

    def _init_base_config(self):
        self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        if not self.args.print_detail:
            self.pred_cfg.disable_glog_info()
        self.pred_cfg.enable_memory_optim()
        self.pred_cfg.switch_ir_optim(True)

    def _init_cpu_config(self):
        """
        Init the config for x86 cpu.
        """
        logger.info("Use CPU")
        self.pred_cfg.disable_gpu()
        if self.args.enable_mkldnn:
            logger.info("Use MKLDNN")
            # cache 10 different shapes for mkldnn
            self.pred_cfg.set_mkldnn_cache_capacity(10)
            self.pred_cfg.enable_mkldnn()
        self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads)

    def run(self, imgs):
        if not isinstance(imgs, (list, tuple)):
            imgs = [imgs]

        input_names = self.predictor.get_input_names()
        input_handle = self.predictor.get_input_handle(input_names[0])
        output_names = self.predictor.get_output_names()
        output_seg_handle = self.predictor.get_output_handle(output_names[0])

        args = self.args
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)
        cut_height = 160
        num_classes = 7
        postprocessor = tusimple_processor.TusimpleProcessor(
            num_classes=num_classes,
            cut_height=cut_height,
            save_dir=args.save_dir)

        for i, im_path in enumerate(imgs):
            im = cv2.imread(im_path)
            im = im[cut_height:, :, :]
            im = im.astype('float32')
            im, _ = self.cfg.transforms(im)
            im = im[np.newaxis, ...]

            input_handle.reshape(im.shape)
            input_handle.copy_from_cpu(im)

            self.predictor.run()

            seg_results = output_seg_handle.copy_to_cpu()

            # get lane points
            seg_results = paddle.to_tensor([seg_results])
            postprocessor.predict(seg_results, im_path)
        logger.info("Finish")
示例#14
0
root_path = "/usr/local/quake/datas/benchmark"

images_dir = os.path.join(root_path, './dataset/images')
# 均值 方差
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
batch_size = 8
input_h = 608
input_w = 608

mode_path = os.path.join(root_path, "./model/yolov3/__model__")
param_path = os.path.join(root_path, "./model/yolov3/__params__")
config = Config(mode_path, param_path)
config.enable_use_gpu(100, 0)
config.switch_ir_optim(True)
config.enable_tensorrt_engine(workspace_size=1 << 10,
                              max_batch_size=batch_size,
                              min_subgraph_size=3,
                              precision_mode=PrecisionType.Int8,
                              use_static=False,
                              use_calib_mode=True)

config.enable_memory_optim()
config.switch_use_feed_fetch_ops(False)
config.enable_mkldnn()
#config.enable_profile()

predictor = create_predictor(config)
input_names = predictor.get_input_names()
input_img = predictor.get_input_handle(input_names[0])
示例#15
0
class Predictor:
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()

        if args.device == 'cpu':
            self._init_cpu_config()
        else:
            self._init_gpu_config()

        self.predictor = create_predictor(self.pred_cfg)

        if hasattr(args, 'benchmark') and args.benchmark:
            import auto_log
            pid = os.getpid()
            self.autolog = auto_log.AutoLogger(model_name=args.model_name,
                                               model_precision=args.precision,
                                               batch_size=args.batch_size,
                                               data_shape="dynamic",
                                               save_path=None,
                                               inference_config=self.pred_cfg,
                                               pids=pid,
                                               process_name=None,
                                               gpu_ids=0,
                                               time_keys=[
                                                   'preprocess_time',
                                                   'inference_time',
                                                   'postprocess_time'
                                               ],
                                               warmup=0,
                                               logger=logger)

    def _init_base_config(self):
        self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        if not self.args.print_detail:
            self.pred_cfg.disable_glog_info()
        self.pred_cfg.enable_memory_optim()
        self.pred_cfg.switch_ir_optim(True)

    def _init_cpu_config(self):
        """
        Init the config for x86 cpu.
        """
        logger.info("Using CPU")
        self.pred_cfg.disable_gpu()
        if self.args.enable_mkldnn:
            logger.info("Using MKLDNN")
            # cache 1- different shapes for mkldnn
            self.pred_cfg.set_mkldnn_cache_capacity(10)
            self.pred_cfg.enable_mkldnn()
        self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads)

    def _init_gpu_config(self):
        """
        Init the config for nvidia gpu.
        """
        logger.info("using GPU")
        self.pred_cfg.enable_use_gpu(100, 0)
        precision_map = {
            "fp16": PrecisionType.Half,
            "fp32": PrecisionType.Float32,
            "int8": PrecisionType.Int8
        }
        precision_mode = precision_map[self.args.precision]

        if self.args.use_trt:
            logger.info("Use TRT")
            self.pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30,
                                                 max_batch_size=1,
                                                 min_subgraph_size=300,
                                                 precision_mode=precision_mode,
                                                 use_static=False,
                                                 use_calib_mode=False)

            if use_auto_tune(self.args) and \
                os.path.exists(self.args.auto_tuned_shape_file):
                logger.info("Use auto tuned dynamic shape")
                allow_build_at_runtime = True
                self.pred_cfg.enable_tuned_tensorrt_dynamic_shape(
                    self.args.auto_tuned_shape_file, allow_build_at_runtime)
            else:
                logger.info("Use manual set dynamic shape")
                min_input_shape = {"x": [1, 3, 100, 100]}
                max_input_shape = {"x": [1, 3, 2000, 3000]}
                opt_input_shape = {"x": [1, 3, 512, 1024]}
                self.pred_cfg.set_trt_dynamic_shape_info(
                    min_input_shape, max_input_shape, opt_input_shape)

    def run(self, imgs, trimaps=None, imgs_dir=None):
        self.imgs_dir = imgs_dir
        num = len(imgs)
        input_names = self.predictor.get_input_names()
        input_handle = {}

        for i in range(len(input_names)):
            input_handle[input_names[i]] = self.predictor.get_input_handle(
                input_names[i])
        output_names = self.predictor.get_output_names()
        output_handle = self.predictor.get_output_handle(output_names[0])
        args = self.args

        for i in tqdm.tqdm(range(0, num, args.batch_size)):
            # warm up
            if i == 0 and args.benchmark:
                for _ in range(5):
                    img_inputs = []
                    if trimaps is not None:
                        trimap_inputs = []
                    trans_info = []
                    for j in range(i, i + args.batch_size):
                        img = imgs[i]
                        trimap = trimaps[i] if trimaps is not None else None
                        data = self._preprocess(img=img, trimap=trimap)
                        img_inputs.append(data['img'])
                        if trimaps is not None:
                            trimap_inputs.append(
                                data['trimap'][np.newaxis, :, :])
                        trans_info.append(data['trans_info'])
                    img_inputs = np.array(img_inputs)
                    if trimaps is not None:
                        trimap_inputs = (
                            np.array(trimap_inputs)).astype('float32')

                    input_handle['img'].copy_from_cpu(img_inputs)
                    if trimaps is not None:
                        input_handle['trimap'].copy_from_cpu(trimap_inputs)
                    self.predictor.run()
                    results = output_handle.copy_to_cpu()

                    results = results.squeeze(1)
                    for j in range(args.batch_size):
                        trimap = trimap_inputs[
                            j] if trimaps is not None else None
                        result = self._postprocess(results[j],
                                                   trans_info[j],
                                                   trimap=trimap)

            # inference
            if args.benchmark:
                self.autolog.times.start()

            img_inputs = []
            if trimaps is not None:
                trimap_inputs = []
            trans_info = []
            for j in range(i, i + args.batch_size):
                img = imgs[i]
                trimap = trimaps[i] if trimaps is not None else None
                data = self._preprocess(img=img, trimap=trimap)
                img_inputs.append(data['img'])
                if trimaps is not None:
                    trimap_inputs.append(data['trimap'][np.newaxis, :, :])
                trans_info.append(data['trans_info'])
            img_inputs = np.array(img_inputs)
            if trimaps is not None:
                trimap_inputs = (np.array(trimap_inputs)).astype('float32')

            input_handle['img'].copy_from_cpu(img_inputs)
            if trimaps is not None:
                input_handle['trimap'].copy_from_cpu(trimap_inputs)

            if args.benchmark:
                self.autolog.times.stamp()

            self.predictor.run()

            if args.benchmark:
                self.autolog.times.stamp()

            results = output_handle.copy_to_cpu()

            results = results.squeeze(1)
            for j in range(args.batch_size):
                trimap = trimap_inputs[j] if trimaps is not None else None
                result = self._postprocess(results[j],
                                           trans_info[j],
                                           trimap=trimap)
                self._save_imgs(result, imgs[i + j])

            if args.benchmark:
                self.autolog.times.end(stamp=True)
        logger.info("Finish")

    def _preprocess(self, img, trimap=None):
        data = {}
        data['img'] = img
        if trimap is not None:
            data['trimap'] = trimap
            data['gt_fields'] = ['trimap']
        data = self.cfg.transforms(data)
        return data

    def _postprocess(self, alpha, trans_info, trimap=None):
        """recover pred to origin shape"""
        if trimap is not None:
            trimap = trimap.squeeze(0)
            alpha[trimap == 0] = 0
            alpha[trimap == 255] = 1
        for item in trans_info[::-1]:
            if item[0] == 'resize':
                h, w = item[1][0], item[1][1]
                alpha = cv2.resize(alpha, (w, h),
                                   interpolation=cv2.INTER_LINEAR)
            elif item[0] == 'padding':
                h, w = item[1][0], item[1][1]
                alpha = alpha[:, :, 0:h, 0:w]
            else:
                raise Exception("Unexpected info '{}' in im_info".format(
                    item[0]))
        return alpha

    def _save_imgs(self, alpha, img_path):
        ori_img = cv2.imread(img_path)
        alpha = (alpha * 255).astype('uint8')

        if self.imgs_dir is not None:
            img_path = img_path.replace(self.imgs_dir, '')
        name, ext = os.path.splitext(img_path)
        if name[0] == '/':
            name = name[1:]
        alpha_save_path = os.path.join(args.save_dir, 'alpha/', name + '.png')
        clip_save_path = os.path.join(args.save_dir, 'clip/', name + '.png')

        # save alpha
        mkdir(alpha_save_path)
        cv2.imwrite(alpha_save_path, alpha)

        # save clip image
        mkdir(clip_save_path)
        alpha = alpha[:, :, np.newaxis]
        clip = np.concatenate([ori_img, alpha], axis=-1)
        cv2.imwrite(clip_save_path, clip)
示例#16
0
def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   use_gpu=False,
                   min_subgraph_size=3,
                   use_dynamic_shape=False,
                   trt_min_shape=1,
                   trt_max_shape=1280,
                   trt_opt_shape=640):
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
        use_gpu (bool): whether use gpu
        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
        use_dynamic_shape (bool): use dynamic shape or not
        trt_min_shape (int): min shape for dynamic shape in trt
        trt_max_shape (int): max shape for dynamic shape in trt
        trt_opt_shape (int): opt shape for dynamic shape in trt
    Returns:
        predictor (PaddlePredictor): AnalysisPredictor
    Raises:
        ValueError: predict by TensorRT need use_gpu == True.
    """
    if not use_gpu and not run_mode == 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
            .format(run_mode, use_gpu))
    use_calib_mode = True if run_mode == 'trt_int8' else False
    config = Config(os.path.join(model_dir, 'model.pdmodel'),
                    os.path.join(model_dir, 'model.pdiparams'))
    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if use_gpu:
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        config.switch_ir_optim(True)
    else:
        config.disable_gpu()

    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 10,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=use_calib_mode)

        if use_dynamic_shape:
            print('use_dynamic_shape')
            min_input_shape = {'image': [1, 3, trt_min_shape, trt_min_shape]}
            max_input_shape = {'image': [1, 3, trt_max_shape, trt_max_shape]}
            opt_input_shape = {'image': [1, 3, trt_opt_shape, trt_opt_shape]}
            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
                                              opt_input_shape)
            print('trt set dynamic shape done!')

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor
示例#17
0
class Predictor:
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()

        if args.device == 'cpu':
            self._init_cpu_config()
        else:
            self._init_gpu_config()

        self.predictor = create_predictor(self.pred_cfg)

    def _init_base_config(self):
        self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        if not self.args.print_detail:
            self.pred_cfg.disable_glog_info()
        self.pred_cfg.enable_memory_optim()
        self.pred_cfg.switch_ir_optim(True)

    def _init_cpu_config(self):
        """
        Init the config for x86 cpu.
        """
        logger.info("Using CPU")
        self.pred_cfg.disable_gpu()
        if self.args.enable_mkldnn:
            logger.info("Using MKLDNN")
            # cache 1- different shapes for mkldnn
            self.pred_cfg.set_mkldnn_cache_capacity(10)
            self.pred_cfg.enable_mkldnn()
        self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads)

    def _init_gpu_config(self):
        """
        Init the config for nvidia gpu.
        """
        logger.info("using GPU")
        self.pred_cfg.enable_use_gpu(100, 0)

    def run(self, imgs, trimaps=None, imgs_dir=None):
        self.imgs_dir = imgs_dir
        num = len(imgs)
        input_names = self.predictor.get_input_names()
        input_handle = {}

        for i in range(len(input_names)):
            input_handle[input_names[i]] = self.predictor.get_input_handle(
                input_names[i])
        output_names = self.predictor.get_output_names()
        output_handle = self.predictor.get_output_handle(output_names[0])
        args = self.args

        for i in tqdm.tqdm(range(0, num, args.batch_size)):
            img_inputs = []
            if trimaps is not None:
                trimap_inputs = []
            trans_info = []
            for j in range(i, i + args.batch_size):
                img = imgs[i]
                trimap = trimaps[i] if trimaps is not None else None
                data = self._preprocess(img=img, trimap=trimap)
                img_inputs.append(data['img'])
                if trimaps is not None:
                    trimap_inputs.append(data['trimap'][np.newaxis, :, :])
                trans_info.append(data['trans_info'])
            img_inputs = np.array(img_inputs)
            if trimaps is not None:
                trimap_inputs = (np.array(trimap_inputs)).astype('float32')

            input_handle['img'].copy_from_cpu(img_inputs)
            if trimaps is not None:
                input_handle['trimap'].copy_from_cpu(trimap_inputs)
            self.predictor.run()
            results = output_handle.copy_to_cpu()

            results = results.squeeze(1)
            for j in range(args.batch_size):
                trimap = trimap_inputs[j] if trimaps is not None else None
                result = self._postprocess(
                    results[j], trans_info[j], trimap=trimap)
                self._save_imgs(result, imgs[i + j])
        logger.info("Finish")

    def _preprocess(self, img, trimap=None):
        data = {}
        data['img'] = img
        if trimap is not None:
            data['trimap'] = trimap
            data['gt_fields'] = ['trimap']
        data = self.cfg.transforms(data)
        return data

    def _postprocess(self, alpha, trans_info, trimap=None):
        """recover pred to origin shape"""
        if trimap is not None:
            trimap = trimap.squeeze(0)
            alpha[trimap == 0] = 0
            alpha[trimap == 255] = 1
        for item in trans_info[::-1]:
            if item[0] == 'resize':
                h, w = item[1][0], item[1][1]
                alpha = cv2.resize(
                    alpha, (w, h), interpolation=cv2.INTER_LINEAR)
            elif item[0] == 'padding':
                h, w = item[1][0], item[1][1]
                alpha = alpha[:, :, 0:h, 0:w]
            else:
                raise Exception("Unexpected info '{}' in im_info".format(
                    item[0]))
        return alpha

    def _save_imgs(self, alpha, img_path):
        ori_img = cv2.imread(img_path)
        alpha = (alpha * 255).astype('uint8')

        if self.imgs_dir is not None:
            img_path = img_path.replace(self.imgs_dir, '')
        name, ext = os.path.splitext(img_path)
        if name[0] == '/':
            name = name[1:]
        alpha_save_path = os.path.join(args.save_dir, 'alpha/', name + '.png')
        clip_save_path = os.path.join(args.save_dir, 'clip/', name + '.png')

        # save alpha
        mkdir(alpha_save_path)
        cv2.imwrite(alpha_save_path, alpha)

        # save clip image
        mkdir(clip_save_path)
        alpha = alpha[:, :, np.newaxis]
        clip = np.concatenate([ori_img, alpha], axis=-1)
        cv2.imwrite(clip_save_path, clip)
示例#18
0
def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   device='CPU',
                   min_subgraph_size=3,
                   use_dynamic_shape=False,
                   trt_min_shape=1,
                   trt_max_shape=1280,
                   trt_opt_shape=640,
                   trt_calib_mode=False,
                   cpu_threads=1,
                   enable_mkldnn=False):
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
        use_dynamic_shape (bool): use dynamic shape or not
        trt_min_shape (int): min shape for dynamic shape in trt
        trt_max_shape (int): max shape for dynamic shape in trt
        trt_opt_shape (int): opt shape for dynamic shape in trt
        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
            calibration, trt_calib_mode need to set True
    Returns:
        predictor (PaddlePredictor): AnalysisPredictor
    Raises:
        ValueError: predict by TensorRT need device == 'GPU'.
    """
    if device != 'GPU' and run_mode != 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
            .format(run_mode, device))
    config = Config(os.path.join(model_dir, 'model.pdmodel'),
                    os.path.join(model_dir, 'model.pdiparams'))
    if device == 'GPU':
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        config.switch_ir_optim(True)
    elif device == 'XPU':
        config.enable_lite_engine()
        config.enable_xpu(10 * 1024 * 1024)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(cpu_threads)
        if enable_mkldnn:
            try:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
            except Exception as e:
                print(
                    "The current environment does not support `mkldnn`, so disable mkldnn."
                )
                pass

    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 10,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=trt_calib_mode)

        if use_dynamic_shape:
            min_input_shape = {
                'image': [batch_size, 3, trt_min_shape, trt_min_shape]
            }
            max_input_shape = {
                'image': [batch_size, 3, trt_max_shape, trt_max_shape]
            }
            opt_input_shape = {
                'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]
            }
            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
                                              opt_input_shape)
            print('trt set dynamic shape done!')

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor, config
示例#19
0
class Predictor:
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()

        if args.device == 'cpu':
            self._init_cpu_config()
        else:
            self._init_gpu_config()

        self.predictor = create_predictor(self.pred_cfg)

        if hasattr(args, 'benchmark') and args.benchmark:
            import auto_log
            pid = os.getpid()
            self.autolog = auto_log.AutoLogger(model_name=args.model_name,
                                               model_precision=args.precision,
                                               batch_size=args.batch_size,
                                               data_shape="dynamic",
                                               save_path=None,
                                               inference_config=self.pred_cfg,
                                               pids=pid,
                                               process_name=None,
                                               gpu_ids=0,
                                               time_keys=[
                                                   'preprocess_time',
                                                   'inference_time',
                                                   'postprocess_time'
                                               ],
                                               warmup=0,
                                               logger=logger)

    def _init_base_config(self):
        self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        if not self.args.print_detail:
            self.pred_cfg.disable_glog_info()
        self.pred_cfg.enable_memory_optim()
        self.pred_cfg.switch_ir_optim(True)

    def _init_cpu_config(self):
        """
        Init the config for x86 cpu.
        """
        logger.info("Use CPU")
        self.pred_cfg.disable_gpu()
        if self.args.enable_mkldnn:
            logger.info("Use MKLDNN")
            # cache 10 different shapes for mkldnn
            self.pred_cfg.set_mkldnn_cache_capacity(10)
            self.pred_cfg.enable_mkldnn()
        self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads)

    def _init_gpu_config(self):
        """
        Init the config for nvidia gpu.
        """
        logger.info("Use GPU")
        self.pred_cfg.enable_use_gpu(100, 0)
        precision_map = {
            "fp16": PrecisionType.Half,
            "fp32": PrecisionType.Float32,
            "int8": PrecisionType.Int8
        }
        precision_mode = precision_map[self.args.precision]

        if self.args.use_trt:
            logger.info("Use TRT")
            self.pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30,
                                                 max_batch_size=1,
                                                 min_subgraph_size=50,
                                                 precision_mode=precision_mode,
                                                 use_static=False,
                                                 use_calib_mode=False)

            if use_auto_tune(self.args) and \
                os.path.exists(self.args.auto_tuned_shape_file):
                logger.info("Use auto tuned dynamic shape")
                allow_build_at_runtime = True
                self.pred_cfg.enable_tuned_tensorrt_dynamic_shape(
                    self.args.auto_tuned_shape_file, allow_build_at_runtime)
            else:
                logger.info("Use manual set dynamic shape")
                min_input_shape = {"x": [1, 3, 100, 100]}
                max_input_shape = {"x": [1, 3, 2000, 3000]}
                opt_input_shape = {"x": [1, 3, 512, 1024]}
                self.pred_cfg.set_trt_dynamic_shape_info(
                    min_input_shape, max_input_shape, opt_input_shape)

    def run(self, imgs):
        if not isinstance(imgs, (list, tuple)):
            imgs = [imgs]

        num = len(imgs)
        input_names = self.predictor.get_input_names()
        input_handle = self.predictor.get_input_handle(input_names[0])
        output_names = self.predictor.get_output_names()
        output_handle = self.predictor.get_output_handle(output_names[0])
        results = []
        args = self.args

        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        for i in range(0, num, args.batch_size):
            if args.benchmark:
                self.autolog.times.start()
            data = np.array(
                [self._preprocess(img) for img in imgs[i:i + args.batch_size]])

            input_handle.reshape(data.shape)
            input_handle.copy_from_cpu(data)
            if args.benchmark:
                self.autolog.times.stamp()

            self.predictor.run()

            results = output_handle.copy_to_cpu()
            if args.benchmark:
                self.autolog.times.stamp()

            results = self._postprocess(results)

            if args.benchmark:
                self.autolog.times.end(stamp=True)
            self._save_imgs(results, imgs)

        logger.info("Finish")

    def _preprocess(self, img):
        return self.cfg.transforms(img)[0]

    def _postprocess(self, results):
        if self.args.with_argmax:
            results = np.argmax(results, axis=1)
        return results

    def _save_imgs(self, results, imgs):
        for i in range(results.shape[0]):
            result = get_pseudo_color_map(results[i])
            basename = os.path.basename(imgs[i])
            basename, _ = os.path.splitext(basename)
            basename = f'{basename}.png'
            result.save(os.path.join(self.args.save_dir, basename))