Exemplo n.º 1
0
def get_engine2(engine_file_path=""):
    if os.path.exists(engine_file_path):
        with open(engine_file_path, 'rb') as f, trt.Runtime(logger) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        print("building engine...")
        with trt.Builder(logger) as builder, builder.create_network(
        ) as network, trt.CaffeParser() as parser:
            builder.max_batch_size = 1
            builder.max_workspace_size = (256 << 20)
            builder.fp16_mode = False
            builder.strict_type_constraints = True

            if not os.path.exists(MODEL_DIR + 'fcn8s.prototxt'):
                print("There is no prototxt at: %s" %
                      (MODEL_DIR + 'fcn8s.prototxt'))
                exit(0)
            parser.parse(deploy=MODEL_DIR + 'fcn8s.prototxt',
                         model=MODEL_DIR + 'fcn8s.caffemodel',
                         network=network,
                         dtype=trt.float32)
            network.mark_output(
                network.get_layer(network.num_layers - 1).get_output(0))
            engine = builder.build_cuda_engine(network)
        return engine
def build_engine(deploy_file, model_file, verbose=False):
    """Takes an ONNX file and creates a TensorRT engine."""
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()

    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            *EXPLICIT_BATCH) as network, trt.CaffeParser() as parser:
        builder.max_workspace_size = 1 << 28
        builder.max_batch_size = 1
        builder.fp16_mode = True
        datatype = trt.float32
        #builder.strict_type_constraints = True

        # Parse model file
        # if not os.path.exists(onnx_file_path):
        #     print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
        #     exit(0)
        # print('Loading ONNX file from path {}...'.format(onnx_file_path))
        # with open(onnx_file_path, 'rb') as model:
        #     print('Beginning ONNX file parsing')
        if not parser.parse(deploy=deploy_file,
                            model=model_file,
                            network=network,
                            dtype=datatype):
            print('ERROR: Failed to parse the ONNX file.')
            for error in range(parser.num_errors):
                print(parser.get_error(error))
                return None
        if trt.__version__[0] >= '7':
            # The actual yolov3.onnx is generated with batch size 64.
            # Reshape input to batch size 1
            shape = list(network.get_input(0).shape)
            shape[0] = 1
            network.get_input(0).shape = shape
        print('Completed parsing of Caffe file')

        net_out_tmp = [
            network.get_layer(ln).get_output(0)
            for ln in range(network.num_layers)
            if network.get_layer(ln).get_output(0).name in OUTPUT_LAYERS
        ]

        assert len(net_out_tmp) == len(OUTPUT_LAYERS)
        net_out = [None] * len(OUTPUT_LAYERS)
        for nn in net_out_tmp:
            net_out[OUTPUT_LAYERS.index(nn.name)] = nn

        assert None not in net_out
        for nn in net_out:
            network.mark_output(nn)

        print('Building an engine; this may take a while...')
        engine = builder.build_cuda_engine(network)
        print('Completed creating engine')
        # with open(engine_file_path, 'wb') as f:
        #     f.write(engine.serialize())
        return engine
def build_engine(deploy_file, model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, trt.CaffeParser() as parser:
        builder.max_workspace_size = common.GiB(1)
        # Parse the model and build the engine.
        model_tensors = parser.parse(deploy=deploy_file,
                                     model=model_file,
                                     network=network,
                                     dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        return builder.build_cuda_engine(network)
Exemplo n.º 4
0
def build_engine():
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        builder.max_batch_size = 1
        builder.max_workspace_size = 2**20
        input_layer = network.add_input(name="input_layer", dtype=trt.float32, shape=(1, 13, 3, 3))
        # bn_w = []
        # bn = network.add_scale(input=[input_layer], mode=trt.ScaleMode.CHANNEL, )
        upsample = network.add_plugin_v2(inputs=[input_layer], plugin=get_trt_plugin("UpsamplePlugin"))
        upsample.get_output(0).name = "outputs"
        network.mark_output(upsample.get_output(0))

        return builder.build_cuda_engine(network)
Exemplo n.º 5
0
def build_engine(trt_deploy_path, trt_model_path, trt_logger, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1, silent=False):
    with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        builder.max_workspace_size = 1 << 30
        if trt_engine_datatype == trt.DataType.HALF:
            builder.fp16_mode = True
        builder.max_batch_size = batch_size
        model_tensors = parser.parse(trt_deploy_path, trt_model_path, network,trt_engine_datatype)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        network.mark_output(model_tensors.find('keep_count')) 
        if not silent:
            print("Building TensorRT engine. This may take few minutes.")
       
        return builder.build_cuda_engine(network)
Exemplo n.º 6
0
    def __call__(self):
        builder = trt.Builder(TRT_LOGGER)
        network = builder.create_network()
        parser = trt.CaffeParser()

        model_tensors = parser.parse(deploy=self.deploy,
                                     model=self.model,
                                     network=network,
                                     dtype=self.dtype)

        if self.outputs and self.outputs != constants.MARK_ALL:
            for output in self.outputs:
                network.mark_output(model_tensors.find(output))

        return builder, network, parser, self.batch_size
Exemplo n.º 7
0
def build_engine(deploy_file, model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, trt.CaffeParser() as parser:
        builder.max_workspace_size = common.GiB(1)

        # Set the parser's plugin factory. Note that we bind the factory to a reference so
        # that we can destroy it later. (parser.plugin_factory_ext is a write-only attribute)
        parser.plugin_factory_ext = fc_factory

        # Parse the model and build the engine.
        model_tensors = parser.parse(deploy=deploy_file,
                                     model=model_file,
                                     network=network,
                                     dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        return builder.build_cuda_engine(network)
def build_engine_caffe(model_file, deploy_file):
    # You can set the logger severity higher to suppress messages (or lower to display more messages).
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, trt.CaffeParser() as parser:
        # Workspace size is the maximum amount of memory available to the builder while building an engine.
        # It should generally be set as high as possible.
        builder.max_workspace_size = common.GiB(1)
        # Load the Caffe model and parse it in order to populate the TensorRT network.
        # This function returns an object that we can query to find tensors by name.
        model_tensors = parser.parse(deploy=deploy_file,
                                     model=model_file,
                                     network=network,
                                     dtype=ModelData.DTYPE)
        # For Caffe, we need to manually mark the output of the network.
        # Since we know the name of the output tensor, we can find it in model_tensors.
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        return builder.build_cuda_engine(network)
def build_int8_engine(deploy_file, model_file, calib, batch_size=32):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, trt.CaffeParser() as parser:
        # We set the builder batch size to be the same as the calibrator's, as we use the same batches
        # during inference. Note that this is not required in general, and inference batch size is
        # independent of calibration batch size.
        builder.max_batch_size = batch_size
        builder.max_workspace_size = common.GiB(1)
        builder.int8_mode = True
        builder.int8_calibrator = calib
        # Parse Caffe model
        model_tensors = parser.parse(deploy=deploy_file,
                                     model=model_file,
                                     network=network,
                                     dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        # Build engine and do int8 calibration.
        return builder.build_cuda_engine(network)
	def build_engine():
	    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
	        builder.fp16_mode = True
	        builder.strict_type_constraints = True
	        builder.max_batch_size = 16
	        # Workspace size is the maximum amount of memory available to the builder while building an engine.
	        # It should generally be set as high as possible.
	        builder.max_workspace_size = common.GiB(1)
	        # Load the Caffe model and parse it in order to populate the TensorRT network.
	        # This function returns an object that we can query to find tensors by name.
	        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
	        # For Caffe, we need to manually mark the output of the network.
	        # Since we know the name of the output tensor, we can find it in model_tensors.
	        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
	        engine = builder.build_cuda_engine(network)
	        with open(engine_file_path, "wb") as f:
	            f.write(engine.serialize())
	        return engine
Exemplo n.º 11
0
def convert_caffe_model_to_trt(caffe_weights_file, caffe_deploy_file,
                               trt_model_filename, output_tensor_name,
                               output_data_type, max_workspace_size,
                               max_batch_size):
    "Convert a pair of (caffe_weights_file,caffe_deploy_file) into a trt_model_file using the given parameters"

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, trt.CaffeParser() as parser:

        if (output_data_type == 'fp16'):
            if not builder.platform_has_fast_fp16:
                print(
                    'Warning: This platform is not optimized for fast fp16 mode'
                )

            builder.fp16_mode = True
            print('Converting into fp16, max_batch_size={}'.format(
                max_batch_size))
        else:
            print('Converting into fp32 (default), max_batch_size={}'.format(
                max_batch_size))

        builder.max_workspace_size = max_workspace_size
        builder.max_batch_size = max_batch_size

        model_tensors = parser.parse(deploy=caffe_deploy_file,
                                     model=caffe_weights_file,
                                     network=network,
                                     dtype=trt.float32)
        network.mark_output(model_tensors.find(output_tensor_name))

        trt_model_object = builder.build_cuda_engine(network)

        try:
            serialized_trt_model = trt_model_object.serialize()
            with open(trt_model_filename, "wb") as trt_model_file:
                trt_model_file.write(serialized_trt_model)
        except:
            print(
                'Error: cannot serialize or write TensorRT engine to file {}.'.
                format(trt_model_filename))
Exemplo n.º 12
0
def build_int8_engine(deploy_file, model_file, calib, batch_size=32):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, builder.create_builder_config() as config, trt.CaffeParser(
    ) as parser, trt.Runtime(TRT_LOGGER) as runtime:
        # We set the builder batch size to be the same as the calibrator's, as we use the same batches
        # during inference. Note that this is not required in general, and inference batch size is
        # independent of calibration batch size.
        builder.max_batch_size = batch_size
        config.max_workspace_size = common.GiB(1)
        config.set_flag(trt.BuilderFlag.INT8)
        config.int8_calibrator = calib
        # Parse Caffe model
        model_tensors = parser.parse(deploy=deploy_file,
                                     model=model_file,
                                     network=network,
                                     dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        # Build engine and do int8 calibration.
        plan = builder.build_serialized_network(network, config)
        return runtime.deserialize_cuda_engine(plan)
Exemplo n.º 13
0
def build_int8_engine(deploy_file, model_file, batch_size=32, trt_engine_datatype=trt.DataType.FLOAT):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        # We set the builder batch size to be the same as the calibrator's, as we use the same batches
        # during inference. Note that this is not required in general, and inference batch size is
        # independent of calibration batch size.
        builder.max_batch_size = batch_size
        builder.max_workspace_size = common.GiB(1)
        if trt_engine_datatype == trt.DataType.HALF:
            builder.fp16_mode = True
        elif trt_engine_datatype == trt.DataType.INT8:
            # Now we create a calibrator and give it the location of our calibration data.
            # We also allow it to cache calibration data for faster engine building.
            _, [calib_data] = common.find_sample_data(description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=["t10k-images-idx3-ubyte"])
            calibration_cache = "mnist_calibration.cache"
            builder.int8_mode = True
            builder.int8_calibrator = MNISTEntropyCalibrator(calib_data, cache_file=calibration_cache)
        # Parse Caffe model
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        # Build engine and do int8 calibration.
        return builder.build_cuda_engine(network)
Exemplo n.º 14
0
    def parse_caffe(self,
                    caffe_model_file,
                    caffe_deploy_file,
                    output_name="prob1"):
        """Parses caffe model file and prepares for serialization
        :param caffe_model_file: path to caffe model file
        :param caffe_deploy_file: path to caffe deploy file
        :param output_name: output name
        """

        parser = trt.CaffeParser()

        model_tensors = parser.parse(
            deploy=caffe_deploy_file,
            model=caffe_model_file,
            network=self.network,
            dtype=CudaEngineManager.CONSTANTS["dtype"])

        self.network.mark_output(model_tensors.find(output_name))

        self.parser = parser
Exemplo n.º 15
0
def build_engine_caffe(model_file, deploy_file, precision):
    # precision: float, half, int8
    # You can set the logger severity higher to suppress messages (or lower to display more messages).
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        # Workspace size is the maximum amount of memory available to the builder while building an engine.
        # It should generally be set as high as possible.
        builder.max_workspace_size = common.GiB(1)
        # Load the Caffe model and parse it in order to populate the TensorRT network.
        # This function returns an object that we can query to find tensors by name.
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
        # For Caffe, we need to manually mark the output of the network.
        # Since we know the name of the output tensor, we can find it in model_tensors.
        print(model_tensors)
        print(ModelData.OUTPUT_NAME)
        print(model_tensors.find(ModelData.OUTPUT_NAME))
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))

        if precision == "half":
            # enable fp16 (chenrong06)
            builder.fp16_mode = True
            builder.strict_type_constraints = True
            print("pricision: half")
        elif precision == "int8":
            # enable int8 and set quantize (chenrong06)
            # Incomplete version, please refer to workspace/tensorrt/samples/sampleINT8API/sampleINT8API.cpp
            builder.int8_mode = True
            builder.int8_calibrator = None
            builder.strict_type_constraints = True
            print(network.num_layers)
            for i in range(network.num_layers):
                layer = network[i]
                tensor = layer.get_output(0)
                tensor.set_dynamic_range(-1.0, 1.0)
                tensor = layer.get_input(0)
                tensor.set_dynamic_range(-1.0, 1.0)
            print("pricision: int8")
        else:
            print("pricision: float")

        return builder.build_cuda_engine(network)
Exemplo n.º 16
0
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
                common.EXPLICIT_BATCH) as network, trt.CaffeParser() as parser:
            builder.max_batch_size = ModelData.BATCH_SIZE
            builder.max_workspace_size = common.GiB(
                ModelData.MEM_SIZE)  # 1 # 1 << 28 # 256MiB
            if ModelData.DTYPE == trt.float16:
                builder.fp16_mode = True
            elif ModelData.DTYPE == trt.int8:  # onnx有问题,官方例子caffe是可以的
                builder.int8_mode = True

                # Now we create a calibrator and give it the location of our calibration data.
                # We also allow it to cache calibration data for faster engine building.
                calibration_cache = "calibration.cache"
                calib = common.MNISTEntropyCalibrator(
                    ModelData.data_dir,
                    ModelData.INPUT_SHAPE[-2:],
                    cache_file=calibration_cache,
                    batch_size=ModelData.BATCH_SIZE)
                builder.int8_calibrator = calib

            else:
                pass
            # Load the Caffe model and parse it in order to populate the TensorRT network.
            # This function returns an object that we can query to find tensors by name.
            model_tensors = parser.parse(deploy=deploy_file,
                                         model=model_file,
                                         network=network,
                                         dtype=ModelData.DTYPE)
            # For Caffe, we need to manually mark the output of the network.
            # Since we know the name of the output tensor, we can find it in model_tensors.
            network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
            print('Building an engine from file {}; this may take a while...'.
                  format(model_file))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine
Exemplo n.º 17
0
def build_engine(model_file,
                 deploy_file,
                 trt_logger,
                 batch_size=1,
                 precison_mode='FP32'):
    DTYPE = trt.float32
    if precison_mode == 'FP16':
        DTYPE = trt.float16
    with trt.Builder(trt_logger) as builder, builder.create_network(
    ) as network, trt.CaffeParser() as parser:
        # Workspace size for building an engine.
        builder.max_workspace_size = 1 << 30
        # Max batch size
        builder.max_batch_size = batch_size
        print("Building TensorRT engine. This may take few minutes.")
        model_tensors = parser.parse(deploy=deploy_file,
                                     model=model_file,
                                     network=network,
                                     dtype=DTYPE)
        # Find the name of the output tensor in model_tensors.
        network.mark_output(model_tensors.find(OUTPUT_NAME))
        return builder.build_cuda_engine(network)
Exemplo n.º 18
0
def build_int8_engine(deploy_file, model_file, calib, batch_size=32):
    #创建相关的实例
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, builder.create_builder_config() as config, trt.CaffeParser(
    ) as parser:
        # We set the builder batch size to be the same as the calibrator's, as we use the same batches
        # during inference. Note that this is not required in general, and inference batch size is
        # independent of calibration batch size.
        #指定相关的参数
        builder.max_batch_size = batch_size
        config.max_workspace_size = common.GiB(1)
        config.set_flag(trt.BuilderFlag.INT8)
        config.int8_calibrator = calib
        # Parse Caffe model
        #使用caffe解析器解析模型,解析一个prototxt文件和一个binaryproto Caffe模型,分别提取网络定义和与网络相关的权值。
        model_tensors = parser.parse(deploy=deploy_file,
                                     model=model_file,
                                     network=network,
                                     dtype=ModelData.DTYPE)
        #标记网络的输出
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        # Build engine and do int8 calibration.
        #构建相应的引擎
        return builder.build_engine(network, config)
Exemplo n.º 19
0
	
# 3.2.2. Importing A Model Using A Parser In Python
'''
To import a model using a parser, you will need to perform the following high-level steps:

    Create the TensorRTbuilder and network.
    Create the TensorRT parser for the specific format.
    Use the parser to parse the imported model and populate the network.
The builder must be created before the network because it serves as a factory for the network. Different parsers have different mechanisms for marking network outputs. 
'''
# 3.2.3. Importing From Caffe Using Python
import tensorrt as trt
datatype = trt.float32  #Define the data type. In this example, we will use float32.
deploy_file = 'data/mnist/mnist.prototxt'
model_file = 'data/mnist/mnist.caffemodel'
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
    model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=datatype) 
    # The parser returns the model_tensors, which is a table containing the mapping from tensor names to ITensor objects. 
# 3.2.4. Importing From TensorFlow Using Python
'''
Create a frozen TensorFlow model for the tensorflow model. The instructions on freezing a TensorFlow model into a stream can be found in Freezing A TensorFlow Graph. 
Use the UFF converter to convert a frozen tensorflow model to a UFF file. Typically, this is as simple as:
    convert-to-uff frozen_inference_graph.pb    
'''
model_file = '/data/mnist/mnist.uff'
with builder = trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
    	parser.register_input("Placeholder", (1, 28, 28))
    	parser.register_output("fc2/Relu")
parser.parse(model_file, network)
# 3.2.5. Importing From ONNX Using Python
with builder = trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
Exemplo n.º 20
0
# TensorRT 中加载 Caffe 模型并创建 engine -----------------------------------------
logger = trt.Logger(trt.Logger.VERBOSE)
if os.path.isfile(trtFile):
    with open(trtFile, 'rb') as f:
        engine = trt.Runtime(logger).deserialize_cuda_engine(f.read())
    if engine == None:
        print("Failed loading engine!")
        exit()
    print("Succeeded loading engine!")
else:
    builder = trt.Builder(logger)
    network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    config = builder.create_builder_config()
    config.max_workspace_size = 3 << 30
    parser = trt.CaffeParser()
    with open(caffePrototxtFile, 'rb') as f0, open(caffeModelFile, 'rb') as f1:
        net = parser.parse_buffer(f0.read(), f1.read(), network, trt.float32)
        if net is None:
            print("Failed parsing caffe file!")
        print("Succeeded parsing cafe file!")

    outTensor = net.find('y')  # 找到网络的输出层
    squeezeLayer = network.add_reduce(outTensor, trt.ReduceOperation.SUM, (1 << 2) + (1 << 3), False)  # 删掉先前手工添加的、多余的维度
    argmaxLayer = network.add_topk(squeezeLayer.get_output(0), trt.TopKOperation.MAX, 1, 1 << 1)  # 补上 Caffe 不支持的 Argmax 层

    network.mark_output(argmaxLayer.get_output(1))
    engineString = builder.build_serialized_network(network, config)
    if engineString == None:
        print("Failed building engine!")
        exit()
        def _build_engine_caffe(model_info):
            def GiB(x):
                return x * 1 << 30

            with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
                builder.max_batch_size = model_info.max_batch_size
                builder.max_workspace_size = GiB(model_info.max_workspace_size)
                builder.fp16_mode = model_info.flag_fp16

                # Parse the model and build the engine.
                model_tensors = parser.parse(deploy=model_info.deploy_file, model=model_info.model_file,
                                             network=network,
                                             dtype=model_info.data_type)
                for ind_out in range(len(model_info.output_name)):
                    print('=> Marking output blob "', model_info.output_name[ind_out], '"')
                    network.mark_output(model_tensors.find(model_info.output_name[ind_out]))
                print("=> Building TensorRT engine. This may take a few minutes.")
                return builder.build_cuda_engine(network)
Exemplo n.º 22
0
def retrieve_mean(mean_proto):
    with trt.CaffeParser() as parser:
        return parser.parse_binary_proto(mean_proto)