Exemplo n.º 1
0
 def build_engine(max_batch_size, save_engine):
     """Takes an ONNX file and creates a TensorRT engine to run inference with"""
     with trt.Builder(TRT_LOGGER) as builder, builder.create_network(1) as network,\
             builder.create_builder_config() as config,trt.OnnxParser(network, TRT_LOGGER) as parser:
         
         # parse onnx model file
         if not os.path.exists(onnx_file_path):
             quit(f'[Error]ONNX file {onnx_file_path} not found')
         print(f'[INFO] Loading ONNX file from path {onnx_file_path}...')
         with open(onnx_file_path, 'rb') as model:
             print('[INFO] Beginning ONNX file parsing')
             parser.parse(model.read())
             assert network.num_layers > 0, '[Error] Failed to parse ONNX model. \
                         Please check if the ONNX model is compatible '
         print('[INFO] Completed parsing of ONNX file')
         print(f'[INFO] Building an engine from file {onnx_file_path}; this may take a while...')        
         
         # build trt engine
         builder.max_batch_size = max_batch_size
         # config.max_workspace_size = 2 << 30 # 2GB
         builder.max_workspace_size = 2 << 30 # 2GB
         builder.fp16_mode = fp16_mode
         if int8_mode:
             builder.int8_mode = int8_mode
             # config.set_flag(trt.BuilderFlag.INT8)
             assert calibration_stream, '[Error] a calibration_stream should be provided for int8 mode'
             config.int8_calibrator  = Calibrator(calibration_stream, calibration_table_path)
             # builder.int8_calibrator  = Calibrator(calibration_stream, calibration_table_path)
             print('[INFO] Int8 mode enabled')
         engine = builder.build_cuda_engine(network) 
         if engine is None:
             print('[INFO] Failed to create the engine')
             return None   
         print("[INFO] Completed creating the engine")
         if save_engine:
             with open(engine_file_path, "wb") as f:
                 f.write(engine.serialize())
         return engine
Exemplo n.º 2
0
def build_static_engine(onnx_file_path, engine_file_path, trt_type):
    """
    转换静态ONNX模型时会出现Your ONNX model has been generated with INT64 weights,
     while TensorRT does not natively support INT64. Attempting to cast down to INT32.
     以及三个 One or more weights outside the range of INT32 was clamped
     这是由于在YOLO_Layer对x进行任何操作都会导致的.可以忽略
    """
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            EXPLICIT_BATCH) as network, trt.OnnxParser(network,
                                                       TRT_LOGGER) as parser:
        config = builder.create_builder_config()
        # 指定TensorRT将要优化的batch大小。在运行时,只能选择比这个值小的batch
        builder.max_batch_size = 1
        # 各种layer算法通常需要临时工作空间。这个参数限制了网络中所有的层可以使用的最大的workspace空间大小。
        # 如果分配的空间不足,TensorRT可能无法找到给定层的实现
        config.max_workspace_size = 1 << 30
        if trt_type == '_FP16':
            config.set_flag(trt.BuilderFlag.FP16)
        if trt_type == '_INT8':
            config.set_flag(trt.BuilderFlag.INT8)
            # 该方法是为了进行INT8量化时需要用到的校准器,如果出现WARNING: Missing dynamic range for tensor相关的警告时,
            # 将calib_yolov4.bin删除再运行即可
            config.int8_calibrator = YOLOEntropyCalibrator(
                '/home/cmv/PycharmProjects/YOLOv4-PyTorch/data/wenyi/test',
                (cfg.h, cfg.w), 'calib_yolov4.bin')
        print('正在解析ONNX文件 {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                exit()
        print('解析完成,正在构建TensorRT Engine {},这大概需要一会时间...'.format(
            engine_file_path))
        engine = builder.build_engine(network, config)
        with open(engine_file_path, "wb") as t:
            t.write(engine.serialize())
        print("TensorRT Engine 构建完成")
Exemplo n.º 3
0
    def build_engine(max_batch_size, save_engine):
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, \
                builder.create_network(1) as network,\
                trt.OnnxParser(network, TRT_LOGGER) as parser:

            # parse onnx model file
            if not os.path.exists(onnx_file_path):
                quit('ONNX file {} not found'.format(onnx_file_path))
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
                assert network.num_layers > 0, 'Failed to parse ONNX model. \
                            Please check if the ONNX model is compatible '

            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.
                  format(onnx_file_path))

            # build trt engine
            builder.max_batch_size = max_batch_size
            builder.max_workspace_size = 1 << 30  # 1GB
            builder.fp16_mode = fp16_mode
            if int8_mode:
                builder.int8_mode = int8_mode
                assert calibration_stream, 'Error: a calibration_stream should be provided for int8 mode'
                #builder.int8_calibrator  = Calibrator(calibration_stream, calibration_table_path)
                print('Int8 mode enabled')
            engine = builder.build_cuda_engine(network)
            if engine is None:
                print('Failed to create the engine')
                return None
            print("Completed creating the engine")
            if save_engine:
                with open(engine_file_path, "wb") as f:
                    f.write(engine.serialize())
            return engine
def get_engine(max_batch_size=1, onnx_file_path="", engine_file_path="", fp16_mode=False, save_engine=False):
    '''
    通过加载onnx文件,构建engine
    :param onnx_file_path: onnx文件路径
    :return: engine
    '''
    # 打印日志
    _LOGGER = trt.Logger(trt.Logger.WARNING)
    
    # 如果已经存在序列化之后的引擎,则直接反序列化得到cudaEngine
    if os.path.exists(engine_file_path):
        print("Reading engine from file: {}".format(engine_file_path))
        with open(engine_file_path, 'rb') as f, \
            trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())  # 反序列化
 
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.fp16_mode = fp16_mode
        builder.max_batch_size = max_batch_size
        builder.max_workspace_size = 1 << 20
 
    print('Loading ONNX file from path {}...'.format(onnx_file_path))
    with open(onnx_file_path, 'rb') as model:
        print('Beginning ONNX file parsing')
        parser.parse(model.read())
        print('Completed parsing of ONNX file')
 
    print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
    engine = builder.build_cuda_engine(network)
    print("Completed creating Engine")
    
    if save_engine:  #保存engine供以后直接反序列化使用
        with open(engine_file_path, 'wb') as f: f.write(engine.serialize())  # 序列化
 
    # 保存计划文件
    # with open(engine_file_path, "wb") as f:
    #  f.write(engine.serialize())
    return engine
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('model', type=str, choices=list(MODEL_SPECS.keys()))
    args = parser.parse_args()

    # initialize
    if trt.__version__[0] < '7':
        ctypes.CDLL(LIB_FILE)
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
    trt.init_libnvinfer_plugins(TRT_LOGGER, '')

    # compile the model into TensorRT engine
    model = args.model
    spec = MODEL_SPECS[model]
    graph = gs.DynamicGraph(spec['input_pb'])
    dynamic_graph = add_plugin(gs.DynamicGraph(spec['input_pb']), model, spec)
    _ = uff.from_tensorflow(dynamic_graph.as_graph_def(),
                            output_nodes=['NMS'],
                            output_filename=spec['tmp_uff'],
                            text=True,
                            debug_mode=DEBUG_UFF)
    EXPLICIT_BATCH = 1 << (int)(
        trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, trt.UffParser() as parser:
        builder.max_workspace_size = 1 << 28
        builder.max_batch_size = 1
        builder.fp16_mode = False

        parser.register_input('Input', spec["input_dims"],
                              spec["input_format"])
        parser.register_output('MarkOutput_0')
        parser.parse(spec['tmp_uff'], network)
        engine = builder.build_cuda_engine(network)

        buf = engine.serialize()
        with open(spec['output_bin'], 'wb') as f:
            f.write(buf)
Exemplo n.º 6
0
    def _init_engine(self):
        if not self.force_rebuild and self.engine_path.exists():
            with trt.Runtime(self._logger) as runtime:
                with open(self.engine_path, "rb") as f:
                    engine_data = f.read()
                self._engine = runtime.deserialize_cuda_engine(engine_data)
                self._context = self._engine.create_execution_context()
            return

        with ExitStack() as stack:
            explicit_batch = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
            builder = stack.enter_context(trt.Builder(self._logger))
            network = stack.enter_context(builder.create_network(explicit_batch))
            parser = stack.enter_context(trt.OnnxParser(network, self._logger))

            builder.max_workspace_size = self.workspace_size << 30
            builder.max_batch_size = self.max_batch_size
            builder.fp16_mode = self.fp16_mode
            builder.strict_type_constraints = True

            with open(self.onnx_path, "rb") as model:
                success = parser.parse(model.read())
                if not success:
                    err = parser.get_error(0)
                    msg = "while parsing node number %i:\n" % err.node()
                    msg += "%s:%i In function %s:\n[%i] %s" % (
                        err.file(),
                        err.line(),
                        err.func(),
                        err.code(),
                        err.desc(),
                    )
                    raise RuntimeError(msg)

            self._engine = builder.build_cuda_engine(network)
            self._context = self._engine.create_execution_context()
            with open(self.engine_path, "wb") as f:
                f.write(self._engine.serialize())
Exemplo n.º 7
0
 def build_engine():
     """Takes an ONNX file and creates a TensorRT engine to run inference with"""
     #创建相应的实例
     with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
             common.EXPLICIT_BATCH
     ) as network, builder.create_builder_config(
     ) as config, trt.OnnxParser(network, TRT_LOGGER) as parser:
         #设定相关的配置
         config.max_workspace_size = 1 << 28  # 256MiB
         builder.max_batch_size = 1
         # Parse model file
         if not os.path.exists(onnx_file_path):
             print(
                 'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'
                 .format(onnx_file_path))
             exit(0)
         print('Loading ONNX file from path {}...'.format(onnx_file_path))
         #读取onnx模型文件,开始解析
         with open(onnx_file_path, 'rb') as model:
             print('Beginning ONNX file parsing')
             if not parser.parse(model.read()):
                 print('ERROR: Failed to parse the ONNX file.')
                 for error in range(parser.num_errors):
                     print(parser.get_error(error))
                 return None
         # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
         #设定输入的维度
         network.get_input(0).shape = [1, 3, 608, 608]
         print('Completed parsing of ONNX file')
         print('Building an engine from file {}; this may take a while...'.
               format(onnx_file_path))
         #构建相应的引擎
         engine = builder.build_engine(network, config)
         print("Completed creating Engine")
         #将相应的引擎保存到文件
         with open(engine_file_path, "wb") as f:
             f.write(engine.serialize())
         return engine
Exemplo n.º 8
0
    def __init__(
        self,
        module: torch.fx.GraphModule,
        input_specs: List[InputTensorSpec],
        explicit_batch_dimension: bool = False,
        explicit_precision: bool = False,
        logger_level=trt.Logger.WARNING,
    ):
        super().__init__(module)

        self.logger = trt.Logger(logger_level)
        self.builder = trt.Builder(self.logger)

        flag = 0
        if explicit_batch_dimension:
            EXPLICIT_BATCH = 1 << (int)(
                trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
            flag |= EXPLICIT_BATCH

        if explicit_precision:
            EXPLICIT_PRECISION = 1 << (int)(
                trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)
            flag |= EXPLICIT_PRECISION
        self.network = self.builder.create_network(flag)

        missing_ops = self.validate_conversion()
        if missing_ops:
            warnings.warn(
                "Interpretation will fail due to missing operations \n" +
                "\n".join(f"{i}" for i in missing_ops))

        self.optimization_profiles: Optional[List] = None
        self.input_specs = input_specs
        self.input_specs_iter = 0
        self.validate_input_specs()
        self._cur_node_name: Optional[str] = None
        self._input_names: List[str] = []
        self._output_names: List[str] = []
Exemplo n.º 9
0
def torch2trt(module,
              inputs,
              input_names=None,
              output_names=None,
              log_level=trt.Logger.ERROR,
              max_batch_size=1,
              fp16_mode=False,
              max_workspace_size=0,
              strict_type_constraints=False):

    # copy inputs to avoid modifications to source data
    inputs = [tensor.clone() for tensor in inputs]

    with trt.Logger(log_level) as logger, trt.Builder(logger) as builder,\
            builder.create_network() as network, ConversionContext(network) as ctx:

        if isinstance(inputs, list):
            inputs = tuple(inputs)
        if not isinstance(inputs, tuple):
            inputs = (inputs, )
        ctx.add_inputs(inputs, input_names)

        outputs = module(*inputs)

        if not isinstance(outputs, tuple):
            outputs = (outputs, )
        ctx.mark_outputs(outputs, output_names)

        final_shapes = [tuple(output.shape)[1:] for output in list(outputs)]

        builder.max_workspace_size = max_workspace_size
        builder.fp16_mode = fp16_mode
        builder.max_batch_size = max_batch_size
        builder.strict_type_constraints = strict_type_constraints

        engine = builder.build_cuda_engine(network)

    return TRTModule(engine, ctx.input_names, ctx.output_names, final_shapes)
Exemplo n.º 10
0
def build_engine_onnx(model_file, calibrator=None):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = common.GiB(1)
        builder.max_batch_size = 8
        precision = "fp32"
        if calibrator:
            builder.int8_mode = True
            builder.int8_calibrator = calibrator
            precision = "int8"
        else:
            builder.fp16_mode = True
            precision = "fp16"
        # Load the Onnx model and parse it in order to populate the TensorRT network.
        with open(model_file, 'rb') as model:
            parser.parse(model.read())
        engine = builder.build_cuda_engine(network)
        serialized = engine.serialize()
        with open(
                "/work/models/flowers-152-b{}-{}.engine".format(
                    builder.max_batch_size, precision), "wb") as file:
            file.write(serialized)
        return engine
Exemplo n.º 11
0
 def build_engine():
     """Takes an ONNX file and creates a TensorRT engine to run inference with"""
     with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
         builder.max_workspace_size = 1 << 30  # 1GB
         builder.max_batch_size = 1
         # builder.fp16_mode = True
         # Parse model file
         if not os.path.exists(onnx_file_path):
             print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(
                 onnx_file_path))
             exit(0)
         print('Loading ONNX file from path {}...'.format(onnx_file_path))
         with open(onnx_file_path, 'rb') as model:
             print('Beginning ONNX file parsing')
             parser.parse(model.read())
         print('Completed parsing of ONNX file')
         print('Building an engine from file {}; this may take a while...'.format(
             onnx_file_path))
         engine = builder.build_cuda_engine(network)
         print("Completed creating Engine")
         with open(engine_file_path, "wb") as f:
             f.write(engine.serialize())
         return engine
Exemplo n.º 12
0
def build_engine(onnx_path):
    """
   This is the function to create the TensorRT engine
   Args:
      onnx_path : Path to onnx_file. 
      shape : Shape of the input of the ONNX file. 
  """

    EX_B = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            EX_B) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = (256 << 20)
        builder.fp16_mode = True
        builder.max_batch_size = 1
        with open(onnx_path, 'rb') as model:
            parser.parse(model.read())
        ll = network.get_layer(network.num_layers - 1)
        network.mark_output(ll.get_output(0))
        shape = list(network.get_input(0).shape)
        shape[0] = 1
        network.get_input(0).shape = shape
        engine = builder.build_cuda_engine(network)
        return engine
Exemplo n.º 13
0
    def __init__(self, init_dict):
        a = torch.cuda.FloatTensor()  #pytorch必须首先占用部分CUDA
        builder = trt.Builder(TRT_LOGGER)
        builder.fp16_mode = True
        builder.strict_type_constraints = True
        self.trt_file = init_dict['trt']
        self.use_cuda = init_dict['use_cuda']
        self.inp_dim = 608
        self.output_shapes = [(1, 24, 19, 19), (1, 24, 38, 38),
                              (1, 24, 76, 76)]  #yolo3-608
        self.yolo_anchors = [[(84, 98), (132, 184), (216, 309)],
                             [(28, 30), (30, 117), (48, 55)],
                             [(5, 8), (13, 18), (14, 49)]]
        self.num_classes = 3
        # self.output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] #yolo3-608
        # self.yolo_anchors = [[(116, 90), (156, 198), (373, 326)],
        #                      [(30, 61),  (62, 45),   (59, 119)],
        #                      [(10, 13),  (16, 30),   (33, 23)]]

        self.engine = get_engine(self.trt_file)
        self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers(
            self.engine)
        self.context = self.engine.create_execution_context()
Exemplo n.º 14
0
def build_engine(shape_data, shape_indices, shape_updates):
    plugin_creator = get_plugin_creator('ScatterND')
    if plugin_creator == None:
        print('scatterND plugin not found. Exiting')
        exit()

    builder = trt.Builder(logger)
    network = builder.create_network(
        flags=1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))

    tensor_data = network.add_input('data', trt.DataType.FLOAT, shape_data)
    tensor_indices = network.add_input('indices', trt.DataType.INT32,
                                       shape_indices)
    tensor_updates = network.add_input('updates', trt.DataType.FLOAT,
                                       shape_updates)

    layer = network.add_plugin_v2(
        [tensor_data, tensor_indices, tensor_updates],
        plugin_creator.create_plugin('ScatterND',
                                     trt.PluginFieldCollection([])))
    network.mark_output(layer.get_output(0))

    return builder.build_engine(network, builder.create_builder_config())
Exemplo n.º 15
0
def buildEngine(logger, outDatatype, nTopK, maxTopK):
    builder = trt.Builder(logger)
    network = builder.create_network(1)
    profile = builder.create_optimization_profile()
    config = builder.create_builder_config()
    config.max_workspace_size = 1 << 30
    config.flags = int(outDatatype == np.float16)

    inputT0 = network.add_input('inputT0', trt.float32, [-1, -1, -1, -1])
    profile.set_shape(inputT0.name, [1, 1, 1, 1], [36, 10, 5, 30], [72, 20, 10, 70])
    inputT1 = network.add_input('inputT1', trt.int32, [-1])
    profile.set_shape(inputT1.name, [1], [36], [72])
    inputT2 = network.add_input('inputT2', trt.int32, [-1])
    profile.set_shape(inputT2.name, [1], [36], [72])
    inputT3 = network.add_input('inputT3', trt.int32, [-1])
    profile.set_shape(inputT3.name, [1], [2], [4])

    config.add_optimization_profile(profile)

    pluginLayer = network.add_plugin_v2([inputT0, inputT1, inputT2, inputT3], getTopKAveragePlugin(nTopK, maxTopK))

    network.mark_output(pluginLayer.get_output(0))
    return builder.build_engine(network, config)
Exemplo n.º 16
0
def parse_onnx(onnx_file_path, explicit_batch=None, max_batch_size=1):
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
    trt.init_libnvinfer_plugins(TRT_LOGGER, '')
    builder = trt.Builder(TRT_LOGGER)

    if explicit_batch is not None:
        EXPLICIT_BATCH = explicit_batch << int(
            trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
        network = builder.create_network(EXPLICIT_BATCH)
    else:
        network = builder.create_network()
        builder.max_batch_size = max_batch_size

    parser = trt.OnnxParser(network, TRT_LOGGER)
    logger = logging.getLogger('onnx_parser')

    with open(onnx_file_path, 'rb') as model:
        if not parser.parse(model.read()):
            for error in range(parser.num_errors):
                logger.debug(parser.get_error(error))

    logger.debug(f'NETWORK PARSE: {len(network)}')
    return builder, network, TRT_LOGGER
Exemplo n.º 17
0
def trt_export(onnx_fn: str, trt_fn: str):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        with open(onnx_fn, 'rb') as model:
            print('Beginning ONNX file parsing')
            parser.parse(model.read())
        print('Completed parsing of ONNX file')
        # Allow TRT to use up tu 1GB memory
        builder.max_workspace_size = 1 << 30
        # allow one image per batch (TODO: increase this?)
        builder.max_batch_size = 1
        # use FP16 mode
        builder.fp16_mode = True
        # mark the output layer (https://github.com/NVIDIA/TensorRT/issues/183)
        # print("Number of layers", network.num_layers)
        # last_layer = network.get_layer(network.num_layers - 1)
        # network.mark_output(last_layer.get_output(0))
        print('Building an engine...')
        engine = builder.build_cuda_engine(network)
        # context = engine.create_execution_context()
        print("Completed creating Engine")
        # engine.save(trt_fn)
        print("Engine saved!")
Exemplo n.º 18
0
def build_engine_onnx(model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            common.EXPLICIT_BATCH) as network, trt.OnnxParser(
                network, TRT_LOGGER) as parser:
        builder.max_workspace_size = common.GiB(1)
        builder.fp16_mode = True
        builder.max_batch_size = 1  # always 1 for explicit batch
        config = builder.create_builder_config()
        # need to be set along with fp16_mode if config is specified.
        config.set_flag(trt.BuilderFlag.FP16)
        profile = builder.create_optimization_profile()
        profile.set_shape('input', (1, 1, 4, 4), (2, 1, 4, 4), (4, 1, 4, 4))
        profile.set_shape('grid', (1, 4, 4, 2), (2, 4, 4, 2), (4, 4, 4, 2))
        config.add_optimization_profile(profile)

        # Load the Onnx model and parse it in order to populate the TensorRT network.
        with open(model_file, 'rb') as model:
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        return builder.build_engine(network, config)
Exemplo n.º 19
0
def build_engine_onnx(model_file, verbose=False):
    if verbose:
        TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
    else:
        TRT_LOGGER = trt.Logger(trt.Logger.INFO)

    network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    network_flags = network_flags | (
        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION))

    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            flags=network_flags) as network, trt.OnnxParser(
                network, TRT_LOGGER) as parser:
        with open(model_file, "rb") as model:
            if not parser.parse(model.read()):
                print("ERROR: Failed to parse the ONNX file.")
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        config = builder.create_builder_config()
        config.max_workspace_size = 1 << 30
        config.flags = config.flags | 1 << int(trt.BuilderFlag.INT8)
        return builder.build_engine(network, config)
Exemplo n.º 20
0
    def build_engine(cls, trt_logger, batch_size, calib_dataset=Path.home() / 'VOCdevkit' / 'VOC2007' / 'JPEGImages'):
        import graphsurgeon as gs
        import uff
        from . import calibrator

        # convert TensorFlow graph into UFF
        dynamic_graph = gs.DynamicGraph(str(cls.MODEL_PATH))
        dynamic_graph = cls.add_plugin(dynamic_graph)
        uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), [cls.OUTPUT_NAME], quiet=True)

        with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.UffParser() as parser:
            builder.max_workspace_size = 1 << 30
            builder.max_batch_size = batch_size
            LOGGER.info('Building engine with batch size: %d', batch_size)
            LOGGER.info('This may take a while...')

            if builder.platform_has_fast_fp16:
                builder.fp16_mode = True
            if builder.platform_has_fast_int8:
                builder.int8_mode = True
                builder.int8_calibrator = calibrator.SSDEntropyCalibrator(cls.INPUT_SHAPE,
                                                                          data_dir=calib_dataset,
                                                                          cache_file=Path(__file__).parent /
                                                                          f'{cls.__name__}_calib_cache')

            parser.register_input('Input', cls.INPUT_SHAPE)
            parser.register_output('MarkOutput_0')
            parser.parse_buffer(uff_model, network)
            engine = builder.build_cuda_engine(network)
            if engine is None:
                LOGGER.critical('Failed to build engine')
                return None

            LOGGER.info("Completed creating engine")
            with open(cls.ENGINE_PATH, 'wb') as engine_file:
                engine_file.write(engine.serialize())
            return engine
Exemplo n.º 21
0
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
                *common.EXPLICIT_BATCH) as network, trt.OnnxParser(
                    network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 31
            builder.max_batch_size = batch_size
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print(
                    'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'
                    .format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                if not parser.parse(model.read()):
                    print('ERROR: Failed to parse the ONNX file.')
                    for error in range(parser.num_errors):
                        print(parser.get_error(error))
                    return None
            # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
            network.get_input(0).shape = [1, 3, 608, 608]
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.
                  format(onnx_file_path))
            config = builder.create_builder_config()
            profile = builder.create_optimization_profile()
            profile.set_shape('input', (1, 3, 128, 128), (1, 3, 512, 512),
                              (1, 3, 1024, 1024))
            config.add_optimization_profile(profile)

            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine
Exemplo n.º 22
0
    def load(self, model_path):
        uff_model = Path(model_path)
        metadata_path = Path('%s/%s.metadata' %
                             (uff_model.parent.as_posix(), uff_model.stem))
        with open(metadata_path.as_posix(), 'r') as metadata, trt.Builder(
                self.logger) as builder, builder.create_network(
                ) as network, trt.UffParser() as parser:

            # Without this max_workspace_size setting, I was getting:
            # Building CUDA Engine
            # [TensorRT] ERROR: Internal error: could not find any implementation for node 2-layer MLP, try increasing the workspace size with IBuilder::setMaxWorkspaceSize()
            # [TensorRT] ERROR: ../builder/tacticOptimizer.cpp (1230) - OutOfMemory Error in computeCosts: 0
            builder.max_workspace_size = 1 << 20  #common.GiB(1)
            builder.max_batch_size = 1

            metadata = json.loads(metadata.read())
            # Configure inputs and outputs
            print('Configuring I/O')
            input_names = metadata['input_names']
            output_names = metadata['output_names']
            for name in input_names:
                parser.register_input(
                    name,
                    (self.cfg.TARGET_D, self.cfg.TARGET_H, self.cfg.TARGET_W))

            for name in output_names:
                parser.register_output(name)
            # Parse network
            print('Parsing TensorRT Network')
            parser.parse(uff_model.as_posix(), network)
            print('Building CUDA Engine')
            self.engine = builder.build_cuda_engine(network)
            # Allocate buffers
            print('Allocating Buffers')
            self.inputs, self.outputs, self.bindings, self.stream = TensorRTLinear.allocate_buffers(
                self.engine)
            print('Ready')
Exemplo n.º 23
0
    def build_engine(max_batch_size, save_engine):
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, \
                builder.create_network(1) as network, \
                trt.OnnxParser(network, TRT_LOGGER) as parser:

            builder.max_workspace_size = 1 << 30  # Your workspace size
            builder.max_batch_size = max_batch_size
            # pdb.set_trace()
            builder.fp16_mode = fp16_mode  # Default: False
            builder.int8_mode = int8_mode  # Default: False
            if int8_mode:
                # To be updated
                raise NotImplementedError

            # Parse model file
            if not os.path.exists(onnx_file_path):
                quit('[error] ONNX file {} not found'.format(onnx_file_path))

            print('[info] Loading ONNX file from path {}...'.format(
                onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('[info] Beginning ONNX file parsing')
                res = parser.parse(model.read())
                print(res)
            print('[info] Completed parsing of ONNX file')
            print('[info] Building an engine from file {}'.format(
                onnx_file_path))
            print('this may take a while...')

            engine = builder.build_cuda_engine(network)
            print("[info] Completed creating Engine")

            if save_engine:
                with open(engine_file_path, "wb") as f:
                    f.write(engine.serialize())
            return engine
Exemplo n.º 24
0
    def build_engine(max_batch_size, save_engine):
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        # create_network() without parameters will make parser.parse() return False
        with trt.Builder(TRT_LOGGER) as builder, \
                builder.create_network(flags=1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) as network, \
                trt.OnnxParser(network, TRT_LOGGER) as parser:

            builder.max_workspace_size = 1 << 30  # Your workspace size
            builder.max_batch_size = max_batch_size
            # pdb.set_trace()
            builder.fp16_mode = fp16_mode  # Default: False
            builder.int8_mode = int8_mode  # Default: False
            if int8_mode:
                # To be updated
                raise NotImplementedError

            # Parse model file
            if not os.path.exists(onnx_file_path):
                quit('ONNX file {} not found'.format(onnx_file_path))

            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            # pdb.set_trace()
            # network.mark_output(network.get_layer(network.num_layers-1).get_output(0)) # Riz
            # network.mark_output(network.get_layer(network.num_layers-1).get_output(1)) # Riz

            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")

            if save_engine:
                with open(engine_file_path, "wb") as f:
                    f.write(engine.serialize())
            return engine
Exemplo n.º 25
0
def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt.Logger.ERROR, max_batch_size=1,
        fp16_mode=False, max_workspace_size=0, strict_type_constraints=False, keep_network=True):

    # copy inputs to avoid modifications to source data
    inputs = [tensor.clone() for tensor in inputs]
    
    logger = trt.Logger(log_level)
    builder = trt.Builder(logger)
    network = builder.create_network()
    
    with ConversionContext(network) as ctx:

        if isinstance(inputs, list):
            inputs = tuple(inputs)
        if not isinstance(inputs, tuple):
            inputs = (inputs, )
        ctx.add_inputs(inputs, input_names)

        outputs = module(*inputs)

        if not isinstance(outputs, tuple) and not isinstance(outputs, list):
            outputs = (outputs, )
        ctx.mark_outputs(outputs, output_names)

        builder.max_workspace_size = max_workspace_size
        builder.fp16_mode = fp16_mode
        builder.max_batch_size = max_batch_size
        builder.strict_type_constraints = strict_type_constraints

        engine = builder.build_cuda_engine(network)
    
        module_trt = TRTModule(engine, ctx.input_names, ctx.output_names)
        
        if keep_network:
            module_trt.network = network
            
    return module_trt
Exemplo n.º 26
0
def build_engine(onnx_file_path, category_num=80, verbose=False):
    """Build a TensorRT engine from an ONNX file."""
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            *EXPLICIT_BATCH) as network, trt.OnnxParser(network,
                                                        TRT_LOGGER) as parser:
        builder.max_workspace_size = 1 << 28
        builder.max_batch_size = 1
        builder.fp16_mode = True
        #builder.strict_type_constraints = True

        # Parse model file
        print('Loading ONNX file from path {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        if trt.__version__[0] >= '7':
            # The actual yolo*.onnx is generated with batch size 64.
            # Reshape input to batch size 1
            shape = list(network.get_input(0).shape)
            shape[0] = 1
            network.get_input(0).shape = shape

        print('Adding yolo_layer plugins...')
        # model_name = onnx_file_path[:-5]
        # network = add_yolo_plugins(
        #     network, model_name, category_num, TRT_LOGGER)

        print('Building an engine.  This would take a while...')
        print('(Use "--verbose" to enable verbose logging.)')
        engine = builder.build_cuda_engine(network)
        print('Completed creating engine.')
        return engine
Exemplo n.º 27
0
def build_int8_engine(model_file, calib, batch_size=32):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            EXPLICIT_BATCH) as network, builder.create_builder_config(
            ) as config, trt.OnnxParser(network, TRT_LOGGER) as parser:
        # We set the builder batch size to be the same as the calibrator's, as we use the same batches
        # during inference. Note that this is not required in general, and inference batch size is
        # independent of calibration batch size.
        builder.max_batch_size = batch_size
        config.max_workspace_size = (1 << 30)  # 1GiB
        config.set_flag(trt.BuilderFlag.INT8)
        config.int8_calibrator = calib
        if not os.path.exists(model_file):
            print('ONNX file {} not found, please generate it.'.format(
                model_file))
            exit(0)
        print('Loading ONNX file from path {}...'.format(model_file))
        with open(model_file, 'rb') as model:
            print('Beginning ONNX file parsing')
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
        network.get_input(0).shape = [1, 3, 448, 512]
        print('Completed parsing of ONNX file')
        print(
            'Building an engine from file {}; this may take a while...'.format(
                model_file))
        # Build engine and do int8 calibration.
        engine = builder.build_engine(network, config)
        if engine is None:
            print('Failed to create the engine')
            return None
        print("Completed creating the engine")
        return engine
Exemplo n.º 28
0
 def build_tensorrt(self, net, torch_inputs):
     self.graph_pth = torch2trt.GraphModule(
         net, torch_inputs, param_exclude=self.param_exclude)
     self.output_names = []
     with trt.Builder(
             self.logger) as builder, builder.create_network() as trt_net:
         builder.max_workspace_size = self.workspace
         builder.max_batch_size = self.max_batchsize
         builder.refittable = True
         with torch2trt.trt_network(trt_net):
             inputs = []
             for i, arg in enumerate(torch_inputs):
                 inp = trt_net.add_input(name="input{}".format(i),
                                         shape=arg.shape[1:],
                                         dtype=trt.float32)
                 inputs.append(inp)
             outputs = self.graph_pth(*inputs, verbose=self.verbose)
         self.refit_weight_dict = self.graph_pth.graph.refit_weight_dict
         if not isinstance(outputs, (list, tuple)):
             outputs = [outputs]
         for i, out in enumerate(outputs):
             name = "output{}".format(i)
             out.name = name
             self.output_names.append(name)
             trt_net.mark_output(tensor=out)
         self.builder = builder
         self.engine = builder.build_cuda_engine(trt_net)
         self.ctx = self.engine.create_execution_context()
         self.ctx = torch2trt.TorchInferenceContext(self.ctx)
     # get output shapes
     outputs = self.graph_pth(*torch_inputs)
     if not isinstance(outputs, (list, tuple)):
         outputs = [outputs]
     self.output_shapes = {}
     for n, v in zip(self.output_names, outputs):
         self.output_shapes[n] = v.shape[1:]
Exemplo n.º 29
0
def build_engine(onnx_file_path, width, height,
                 do_int8=False, dla_core=False, verbose=False):
    """Build a TensorRT engine from ONNX using the older API."""
    onnx_data = load_onnx(onnx_file_path)

    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    EXPLICIT_BATCH = [1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)]
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        if do_int8 and not builder.platform_has_fast_int8:
            raise RuntimeError('INT8 not supported on this platform')
        if not parser.parse(onnx_data):
            print('ERROR: Failed to parse the ONNX file.')
            for error in range(parser.num_errors):
                print(parser.get_error(error))
            return None
        network = set_net_batch(network, BATCH_SIZE)

        builder.max_batch_size = BATCH_SIZE
        config = builder.create_builder_config()
        config.max_workspace_size = 1 << 30
        config.set_flag(trt.BuilderFlag.GPU_FALLBACK)
        config.set_flag(trt.BuilderFlag.FP16)
        profile = builder.create_optimization_profile()
        profile.set_shape(
            'Input',                         # input tensor name
            (BATCH_SIZE, 3, height, width),  # min shape
            (BATCH_SIZE, 3, height, width),  # opt shape
            (BATCH_SIZE, 3, height, width))  # max shape
        config.add_optimization_profile(profile)
        if do_int8:
            raise RuntimeError('INT8 not implemented yet')
        if dla_core >= 0:
            raise RuntimeError('DLA_core not implemented yet')
        engine = builder.build_engine(network, config)

        return engine
Exemplo n.º 30
0
def build_engine(model_file, shapes, max_ws=512*1024*1024, fp16=False):
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    builder = trt.Builder(TRT_LOGGER)
    builder.fp16_mode = fp16

    config = builder.create_builder_config()
    config.max_workspace_size = max_ws
    if fp16:
        config.flags |= 1 << int(trt.BuilderFlag.FP16)
    profile = builder.create_optimization_profile()
    for s in shapes:
        profile.set_shape(s['name'], min=s['min'], opt=s['opt'], max=s['max'])
    config.add_optimization_profile(profile)
    explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    network = builder.create_network(explicit_batch)

    with trt.OnnxParser(network, TRT_LOGGER) as parser:
        with open(model_file, 'rb') as model:
            parsed = parser.parse(model.read())
            for i in range(parser.num_errors):
                print("TensorRT ONNX parser error:", parser.get_error(i))
            engine = builder.build_engine(network, config=config)

            return engine