示例#1
0
def build_dynamic_engine(onnx_file_path, engine_file_path, trt_type):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            EXPLICIT_BATCH) as network, trt.OnnxParser(network,
                                                       TRT_LOGGER) as parser:
        config = builder.create_builder_config()
        config.max_workspace_size = 1 << 30
        builder.max_batch_size = 1  # 如果最终bs维度不为1 则此处需要更改
        profile = builder.create_optimization_profile()
        # 默认为静态输入.动态输入可参考 https://blog.csdn.net/qq_36276587/article/details/113175314
        # 如果要启用则需要重写网络的后处理,相应的ONNX部分模型也要为动态类型,改动的地方会比较多.如果为静态输入则不需要另行设置shape
        # 此时model中yolo层直接返回x之后再利用numpy进行后处理 sigmoid +grid_xy *anchor_wh *strider -> img_size 等
        # set_shape第一个参数必须与torch2onnx.py中dynamic_axes第一个参数一致.
        profile.set_shape(
            'input',  # input tensor name
            (1, 3, 320, 320),  # min shape
            (1, 3, 416, 416),  # opt shape
            (1, 3, 608, 608))  # max shape
        config.add_optimization_profile(profile)
        if trt_type == '_FP16':
            config.set_flag(trt.BuilderFlag.FP16)
        if trt_type == '_INT8':
            config.set_flag(trt.BuilderFlag.INT8)
            # 该方法是为了进行INT8量化时需要用到的校准器,如果出现WARNING: Missing dynamic range for tensor相关的警告时,
            # 将calib_yolov4.bin删除再运行即可
            config.int8_calibrator = YOLOEntropyCalibrator(
                '/home/cmv/PycharmProjects/YOLOv4-PyTorch/data/wenyi/test',
                (cfg.h, cfg.w), 'calib_yolov4.bin')
            config.set_calibration_profile(profile)
        print('正在解析ONNX文件 {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                exit()
        print('解析完成,正在构建TensorRT Engine {},这大概需要一会时间...'.format(
            engine_file_path))
        engine = builder.build_engine(network, config)
        with open(engine_file_path, "wb") as t:
            t.write(engine.serialize())
        print("TensorRT Engine 构建完成")
示例#2
0
def build_static_engine(onnx_file_path, engine_file_path, trt_type):
    """
    转换静态ONNX模型时会出现Your ONNX model has been generated with INT64 weights,
     while TensorRT does not natively support INT64. Attempting to cast down to INT32.
     以及三个 One or more weights outside the range of INT32 was clamped
     这是由于在YOLO_Layer对x进行任何操作都会导致的.可以忽略
    """
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            EXPLICIT_BATCH) as network, trt.OnnxParser(network,
                                                       TRT_LOGGER) as parser:
        config = builder.create_builder_config()
        # 指定TensorRT将要优化的batch大小。在运行时,只能选择比这个值小的batch
        builder.max_batch_size = 1
        # 各种layer算法通常需要临时工作空间。这个参数限制了网络中所有的层可以使用的最大的workspace空间大小。
        # 如果分配的空间不足,TensorRT可能无法找到给定层的实现
        config.max_workspace_size = 1 << 30
        if trt_type == '_FP16':
            config.set_flag(trt.BuilderFlag.FP16)
        if trt_type == '_INT8':
            config.set_flag(trt.BuilderFlag.INT8)
            # 该方法是为了进行INT8量化时需要用到的校准器,如果出现WARNING: Missing dynamic range for tensor相关的警告时,
            # 将calib_yolov4.bin删除再运行即可
            config.int8_calibrator = YOLOEntropyCalibrator(
                '/home/cmv/PycharmProjects/YOLOv4-PyTorch/data/wenyi/test',
                (cfg.h, cfg.w), 'calib_yolov4.bin')
        print('正在解析ONNX文件 {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                exit()
        print('解析完成,正在构建TensorRT Engine {},这大概需要一会时间...'.format(
            engine_file_path))
        engine = builder.build_engine(network, config)
        with open(engine_file_path, "wb") as t:
            t.write(engine.serialize())
        print("TensorRT Engine 构建完成")
def build_engine(model_name, category_num, do_int8, dla_core, verbose=False):
    """Build a TensorRT engine from ONNX using the older API."""
    net_w, net_h = get_input_wh(model_name)

    print('Loading the ONNX file...')
    onnx_data = load_onnx(model_name)
    if onnx_data is None:
        return None

    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    EXPLICIT_BATCH = [] if trt.__version__[0] < '7' else \
        [1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)]
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        if do_int8 and not builder.platform_has_fast_int8:
            raise RuntimeError('INT8 not supported on this platform')
        if not parser.parse(onnx_data):
            print('ERROR: Failed to parse the ONNX file.')
            for error in range(parser.num_errors):
                print(parser.get_error(error))
            return None
        network = set_net_batch(network, MAX_BATCH_SIZE)

        print('Adding yolo_layer plugins...')
        network = add_yolo_plugins(
            network, model_name, category_num, TRT_LOGGER)

        print('Building an engine.  This would take a while...')
        print('(Use "--verbose" or "-v" to enable verbose logging.)')
        if trt.__version__[0] < '7':  # older API: build_cuda_engine()
            if dla_core >= 0:
                raise RuntimeError('DLA core not supported by old API')
            builder.max_batch_size = MAX_BATCH_SIZE
            builder.max_workspace_size = 1 << 30
            builder.fp16_mode = True  # alternative: builder.platform_has_fast_fp16
            if do_int8:
                from calibrator import YOLOEntropyCalibrator
                builder.int8_mode = True
                builder.int8_calibrator = YOLOEntropyCalibrator(
                    'calib_images', (net_h, net_w), 'calib_%s.bin' % model_name)
            engine = builder.build_cuda_engine(network)
        else:  # new API: build_engine() with builder config
            builder.max_batch_size = MAX_BATCH_SIZE
            config = builder.create_builder_config()
            config.max_workspace_size = 1 << 30
            config.set_flag(trt.BuilderFlag.GPU_FALLBACK)
            config.set_flag(trt.BuilderFlag.FP16)
            profile = builder.create_optimization_profile()
            profile.set_shape(
                '000_net',                          # input tensor name
                (MAX_BATCH_SIZE, 3, net_h, net_w),  # min shape
                (MAX_BATCH_SIZE, 3, net_h, net_w),  # opt shape
                (MAX_BATCH_SIZE, 3, net_h, net_w))  # max shape
            config.add_optimization_profile(profile)
            if do_int8:
                from calibrator import YOLOEntropyCalibrator
                config.set_flag(trt.BuilderFlag.INT8)
                config.int8_calibrator = YOLOEntropyCalibrator(
                    'calib_images', (net_h, net_w),
                    'calib_%s.bin' % model_name)
                config.set_calibration_profile(profile)
            if dla_core >= 0:
                config.default_device_type = trt.DeviceType.DLA
                config.DLA_core = dla_core
                config.set_flag(trt.BuilderFlag.STRICT_TYPES)
                print('Using DLA core %d.' % dla_core)
            engine = builder.build_engine(network, config)

        if engine is not None:
            print('Completed creating engine.')
        return engine