Пример #1
0
    def test_save_and_load_tensor(self):  # type: () -> None
        proto = self._simple_tensor()
        cls = TensorProto
        proto_string = onnx._serialize(proto)

        # Test if input is string
        loaded_proto = onnx.load_tensor_from_string(proto_string)
        self.assertTrue(proto == loaded_proto)

        # Test if input has a read function
        f = io.BytesIO()
        onnx.save_tensor(loaded_proto, f)
        f = io.BytesIO(f.getvalue())
        loaded_proto = onnx.load_tensor(f, cls)
        self.assertTrue(proto == loaded_proto)

        # Test if input is a file name
        try:
            tfile = tempfile.NamedTemporaryFile(delete=False)
            onnx.save_tensor(proto, tfile)
            tfile.close()

            loaded_proto = onnx.load_tensor(tfile.name, cls)
            self.assertTrue(proto == loaded_proto)
        finally:
            os.remove(tfile.name)
Пример #2
0
    def __init__(
        self,
        model: onnx.ModelProto,
        execution_providers: List[str] = None,
        context: mlrun.MLClientCtx = None,
    ):
        # Set the context:
        self._context = (context if context is not None else
                         mlrun.get_or_create_ctx(self.DEFAULT_CONTEXT_NAME))

        # Store the model:
        self._model = model

        # Set the execution providers (default will prefer CUDA Execution Provider over CPU Execution Provider):
        self._execution_providers = ([
            "CUDAExecutionProvider", "CPUExecutionProvider"
        ] if execution_providers is None else execution_providers)

        # initialize the onnx run time session:
        self._inference_session = onnxruntime.InferenceSession(
            onnx._serialize(model),
            providers=self._execution_providers,
        )

        # Get the input layers names:
        self._input_layers = [
            input_layer.name
            for input_layer in self._inference_session.get_inputs()
        ]

        # Get the outputs layers names:
        self._output_layers = [
            output_layer.name
            for output_layer in self._inference_session.get_outputs()
        ]
Пример #3
0
    def test_save_and_load_model(self):
        proto = self._simple_model()
        cls = ModelProto
        proto_string = onnx._serialize(proto)

        # Test if input is string
        loaded_proto = onnx.load_model_from_string(proto_string)
        self.assertTrue(proto == loaded_proto)

        # Test if input has a read function
        f = io.BytesIO()
        onnx.save_model(proto_string, f)
        f = io.BytesIO(f.getvalue())
        loaded_proto = onnx.load_model(f, cls)
        self.assertTrue(proto == loaded_proto)

        # Test if input is a file name
        try:
            f = tempfile.NamedTemporaryFile(delete=False)
            onnx.save_model(proto, f)
            f.close()

            loaded_proto = onnx.load_model(f.name, cls)
            self.assertTrue(proto == loaded_proto)
        finally:
            os.remove(f.name)
Пример #4
0
    def load(self):
        """
        Use the model handler to get the model file path and initialize an ONNX run time inference session.
        """
        # Load the model:
        if self._model_handler.model is None:
            self._model_handler.load()
        self.model = self._model_handler.model

        # initialize the onnx run time session:
        self._inference_session = onnxruntime.InferenceSession(
            onnx._serialize(self._model_handler.model),
            providers=self._execution_providers,
        )

        # Get the input layers names:
        self._input_layers = [
            input_layer.name
            for input_layer in self._inference_session.get_inputs()
        ]

        # Get the outputs layers names:
        self._output_layers = [
            output_layer.name
            for output_layer in self._inference_session.get_outputs()
        ]
Пример #5
0
    def test_save_and_load_tensor(self):  # type: () -> None
        proto = self._simple_tensor()
        cls = TensorProto
        proto_string = onnx._serialize(proto)

        # Test if input is string
        loaded_proto = onnx.load_tensor_from_string(proto_string)
        self.assertTrue(proto == loaded_proto)

        # Test if input has a read function
        f = io.BytesIO()
        onnx.save_tensor(loaded_proto, f)
        f = io.BytesIO(f.getvalue())
        loaded_proto = onnx.load_tensor(f, cls)
        self.assertTrue(proto == loaded_proto)

        # Test if input is a file name
        try:
            tfile = tempfile.NamedTemporaryFile(delete=False)
            onnx.save_tensor(proto, tfile)
            tfile.close()

            loaded_proto = onnx.load_tensor(tfile.name, cls)
            self.assertTrue(proto == loaded_proto)
        finally:
            os.remove(tfile.name)
Пример #6
0
def save_model(proto, f, format=None, save_as_external_data=False, all_tensors_to_one_file=True, location=None, size_threshold=1024, convert_attribute=False):
    if isinstance(proto, bytes):
        proto = onnx._deserialize(proto, onnx.ModelProto())

    if save_as_external_data:
        convert_model_to_external_data(proto, all_tensors_to_one_file, location, size_threshold, convert_attribute)

    s = onnx._serialize(proto)
    onnx._save_bytes(s, f)
Пример #7
0
    def initialize(self):
        """
        Parse the processed model to create the network.
        """
        # Create network.
        self.network = self.builder.create_network(
            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))

        channel_idx = 1

        # Input shape
        input_tensor_dim = [-1] + self.input_volume_dim
        input_tensor_dim.insert(channel_idx, self.num_input_channel)

        # Parse from onnx file.
        parser = trt.OnnxParser(self.network, self.logger)
        model = self.preprocess_onnx(onnx.load(self.model_path))
        success = parser.parse(onnx._serialize(model))
        if not success:
            raise RuntimeError(
                "3D-Unet onnx model parsing failed! Error: {:}".format(
                    parser.get_error(0).desc()))

        # Set input/output tensor dtype and formats
        input_tensor = self.network.get_input(0)
        output_tensor = self.network.get_output(0)
        input_tensor.shape = input_tensor_dim

        if self.input_dtype == "int8":
            input_tensor.dtype = trt.int8
        elif self.input_dtype == "fp16":
            input_tensor.dtype = trt.float16
        elif self.input_dtype == "fp32":
            input_tensor.dtype = trt.float32

        if self.input_format == "linear":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)
        elif self.input_format == "dhwc8":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.DHWC8)
        elif self.input_format == "cdhw32":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CDHW32)

        # Always use FP16 output
        # workaround for calibration not working with the identity layer properly
        force_calibration = dict_get(self.args,
                                     "force_calibration",
                                     default=False)
        output_tensor.dtype = trt.float16 if force_calibration == False else trt.float32
        output_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)

        self.initialized = True
Пример #8
0
    def initialize(self):
        """
        Parse input ONNX file to a TRT network. Apply layer optimizations and fusion plugins on network.
        """

        # Query system id for architecture
        self.system = get_system()
        self.gpu_arch = self.system.arch

        # Create network.
        self.network = self.builder.create_network(
            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))

        # Parse from onnx file.
        parser = trt.OnnxParser(self.network, self.logger)

        rn50_gs = RN50GraphSurgeon(self.model_path, self.gpu_arch,
                                   self.device_type, self.precision,
                                   self.cache_file, self.need_calibration)
        model = rn50_gs.process_onnx()
        success = parser.parse(onnx._serialize(model))
        if not success:
            raise RuntimeError(
                "ResNet50 onnx model processing failed! Error: {:}".format(
                    parser.get_error(0).desc()))
        # unmarking topk_layer_output_value, just leaving topk_layer_output_index
        assert self.network.num_outputs == 2, "Two outputs expected"
        assert self.network.get_output(0).name == "topk_layer_output_value",\
            "unexpected tensor: {}".format(self.network.get_output(0).name)
        assert self.network.get_output(1).name == "topk_layer_output_index",\
            "unexpected tensor: {}".format(self.network.get_output(1).name)
        logging.info("Unmarking output: {:}".format(
            self.network.get_output(0).name))
        self.network.unmark_output(self.network.get_output(0))

        # Set input dtype and format
        input_tensor = self.network.get_input(0)
        if self.input_dtype == "int8":
            input_tensor.dtype = trt.int8
            input_tensor.dynamic_range = (-128, 127)
        if self.input_format == "linear":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)
        elif self.input_format == "chw4":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4)

        self.initialized = True