def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version): super(Caffe2Backend, cls).run_node(node, inputs, device) device_option = get_device_option(Device(device)) with Workspace(), core.DeviceScope(device_option): # temporary! if isinstance(inputs, dict): for key, value in inputs.items(): workspace.FeedBlob(key, value) else: assert len(node.input) == len( inputs), "{}: expected {} but got {}".format( node.op_type, len(node.input), len(inputs)) for key, value in zip(node.input, inputs): workspace.FeedBlob(key, value) cls._inplace_rewrite([node]) ops = cls._onnx_node_to_caffe2_op( node, opset_version or cls._known_opset_version) for op in ops: op.device_option.CopyFrom(device_option) workspace.RunOperatorsOnce(ops) output_values = [workspace.FetchBlob(name) for name in node.output] return namedtupledict('Outputs', node.output)(*output_values)
def supports_device(cls, device_str): device = Device(device_str) if device.type == DeviceType.CPU: return True elif device.type == DeviceType.CUDA: return workspace.has_gpu_support return False
def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version, outputs_info=None): super(Caffe2Backend, cls).run_node(node, inputs, device=device, outputs_info=outputs_info) device_option = get_device_option(Device(device)) ws = Workspace() with core.DeviceScope(device_option): # temporary! if isinstance(inputs, dict): for key, value in inputs.items(): ws.FeedBlob(key, value) else: assert len(node.input) == len(inputs), "{}: expected {} but got {}".format( node.op_type, len(node.input), len(inputs)) for key, value in zip(node.input, inputs): ws.FeedBlob(key, value) ops = [] cbackend = C.Caffe2Backend(cls._dummy_name) ops_str = cbackend.convert_node(node.SerializeToString(), opset_version) for s in ops_str[0] + ops_str[1]: op = caffe2_pb2.OperatorDef() op.ParseFromString(s) op.device_option.CopyFrom(device_option) ops.append(op) # For testing if "ONNX_CAFFE2_DEBUG" in os.environ: init_ops, ops2, _ = cls._onnx_node_to_caffe2_op( None, None, node, opset_version or cls._known_opset_version) ops2 = init_ops + ops2 for op in ops2: op.device_option.CopyFrom(device_option) print("\nC++:\n{}\nPython:\n{}".format(ops, ops2)) ws.RunOperatorsOnce(ops) output_values = [ws.FetchBlob(name) for name in node.output] return namedtupledict('Outputs', node.output)(*output_values)
def run_node(cls, node, inputs, device='CPU'): super(TensorflowBackendBase, cls).run_node(node, inputs, device) node_graph = tf.Graph() with node_graph.as_default(): node = OnnxNode(node) device_option = get_device_option(Device(device)) input_tensors = [] for i in inputs: input_tensors.append(tf.constant(i)) if isinstance(inputs, dict): feed_dict_raw = inputs else: assert len(node.inputs) == len(inputs) feed_dict_raw = dict(zip(node.inputs, inputs)) # TODO: is constant the best way for feeding inputs? input_dict = dict([(x[0], tf.constant(x[1])) for x in feed_dict_raw.items()]) ops = cls._onnx_node_to_tensorflow_op(node, input_dict) output_vals = [] with tf.Session() as sess: with tf.device(device_option): sess.run(tf.global_variables_initializer()) output_vals = sess.run(ops) return namedtupledict('Outputs', node.outputs)(*output_vals)
def __init__(self, model, device, **kwargs): """Create a ``BackendRep``. Parameters ---------- model : str The path of onnx model file. device : onnx.Device The executing device. """ if not isinstance(device, Device): device = Device(device) execute_ws = workspace.get_workspace() if device.type == DeviceType.CPU: device_type, device_index = 'cpu', 0 elif device.type == DeviceType.CUDA: device_type, device_index = 'cuda', device.device_id else: raise ValueError('Unsupported device type: ' + device.type) with context.device(device_type, device_index): self._context = GraphLib.from_onnx(model) self._input_dict = collections.OrderedDict() self._output_dict = collections.OrderedDict() for input in self._context._def.input: impl = execute_ws.get_tensor(input) self._input_dict[input] = Tensor(impl=impl) for output in self._context._def.output: impl = execute_ws.get_tensor(output) self._output_dict[output] = Tensor(impl=impl) self._output_tuple = namedtupledict('Outputs', self._context._def.output)
def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers): device_option = get_device_option(Device(device)) init_model = ModelProto() init_model.ParseFromString(cls.optimize_onnx(onnx_model.SerializeToString(), init=True)) cls._inplace_rewrite(init_model.graph) predict_model = ModelProto() predict_model.ParseFromString(cls.optimize_onnx(onnx_model.SerializeToString(), predict=True)) cls._inplace_rewrite(predict_model.graph) init_net = caffe2_pb2.NetDef() predict_net = caffe2_pb2.NetDef() init_net.name = onnx_model.graph.name + '_init' predict_net.name = onnx_model.graph.name + '_predict' if include_initializers: init_net.op.extend(cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer) dummy_name(cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(predict_model.graph)) for net, model in ( (init_net, init_model), (predict_net, predict_model) ): net.device_option.CopyFrom(device_option) for node in model.graph.node: net.op.extend(cls._onnx_node_to_caffe2_op(node, opset_version)) net.external_output.extend( value_info.name for value_info in model.graph.output) net.external_input.extend( value_info.name for value_info in model.graph.input) return init_net, predict_net
def supports_device(cls, device_str): device = Device(device_str) if device.type == DeviceType.CPU: return True elif core.IsGPUDeviceType(device.type): return workspace.has_gpu_support or workspace.has_hip_support return False
def prepare(cls, predict_model, device='CPU', init_model=None, **kwargs): ''' For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph, for example, if "img" is the input blob for the predict_net, we require that in init_graph and in initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since there is no way we can know which blob is the input of the predict_graph. ''' super(Caffe2Backend, cls).prepare(predict_model, device, **kwargs) if init_model: checker.check_model(init_model) init_net, predict_net = cls.onnx_graph_to_caffe2_net(predict_model.graph) predict_net.device_option.CopyFrom(get_device_option(Device(device))) ws = Workspace() with ws, core.DeviceScope(predict_net.device_option): if init_model: _, init_net_from_model = cls.onnx_graph_to_caffe2_net(init_model.graph) init_net.op.extend(init_net_from_model.op) workspace.RunNetOnce(init_net) uninitialized = [x for x in predict_net.external_input if not workspace.HasBlob(x)] return Caffe2Rep(predict_net, ws, uninitialized)
class TensorRTBackendRep(BackendRep): def __init__(self, model, device, max_batch_size=32, max_workspace_size=None, calib=None, quantization_mode="fp32" serialize_engine=False, **kwargs): if not isinstance(device, Device): device = Device(device) self._set_device(device) self._logger = TRT_LOGGER self.builder = trt.Builder(self._logger) self.network = self.builder.create_network(flags=1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) self.parser = trt.OnnxParser(self.network, self._logger) if quantization_mode == 'fp16': self.builder.fp16_mode = True if quantization_mode == 'int8': self.builder.int8_mode = True assert(calib != None) self.builder.int8_calibrator = calib if not isinstance(model, six.string_types): model_str = model.SerializeToString() else: model_str = model if not trt.init_libnvinfer_plugins(TRT_LOGGER, ""): msg = "Failed to initialize TensorRT's plugin library." raise RuntimeError(msg) if not self.parser.parse(model_str): error = self.parser.get_error(0) msg = "While parsing node number %i:\n" % error.node() msg += ("%s:%i In function %s:\n[%i] %s" % (error.file(), error.line(), error.func(), error.code(), error.desc())) raise RuntimeError(msg) if max_workspace_size is None: max_workspace_size = 1 << 28 self.builder.max_batch_size = max_batch_size self.builder.max_workspace_size = max_workspace_size for layer in self.network: print(layer.name) print(self.network[-1].get_output(0).shape) trt_engine = self.builder.build_cuda_engine(self.network) if trt_engine is None: raise RuntimeError("Failed to build TensorRT engine from network") if serialize_engine: trt_engine = self._serialize_deserialize(trt_engine) self.engine = Engine(trt_engine) self._output_shapes = {} self._output_dtype = {} for output in model.graph.output: dims = output.type.tensor_type.shape.dim output_shape = tuple([dim.dim_value for dim in dims]) self._output_shapes[output.name] = output_shape self._output_dtype[output.name] = output.type.tensor_type.elem_type
def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers): device_option = get_device_option(Device(device)) # Prior to onnx version update to onnx-1.8.0, errors caused by failures in # in the onnx shape inference call were being supressed. Hence a try-catch block # is added around the infer_shapes call to avoid these failures and preserve status try: onnx_model = onnx.utils.polish_model(onnx_model) except RuntimeError: warnings.warn( "ShapeInferenceWarning: Inferred shape and existing shape differ in rank" ) init_model = cls.optimize_onnx(onnx_model, init=True) pred_model = cls.optimize_onnx(onnx_model, predict=True) init_net = caffe2_pb2.NetDef() pred_net = caffe2_pb2.NetDef() init_net.name = onnx_model.graph.name + '_init' pred_net.name = onnx_model.graph.name + '_predict' if include_initializers: init_net.op.extend( cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer) cls._dummy_name.reset( cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(pred_model.graph)) errors = [] for net, model in ((init_net, init_model), (pred_net, pred_model)): net.device_option.CopyFrom(device_option) for node in model.graph.node: try: c2ops = cls._onnx_node_to_caffe2_op( init_model, pred_model, node, opset_version) except Exception as e: msg = 'Error while processing node: {}. Exception: {}'.format( node, e) errors.append(msg) print('ONNX FATAL:', msg, file=sys.stderr) continue init_net.op.extend(c2ops.init_ops) net.op.extend(c2ops.ops) net.external_input.extend(c2ops.interface_blobs) net.external_output.extend(value_info.name for value_info in model.graph.output) net.external_input.extend(value_info.name for value_info in model.graph.input) if len(errors) > 0: raise RuntimeError( "ONNX conversion failed, encountered {} errors:\n\n{}".format( len(errors), "\n\n".join(errors))) return init_net, pred_net
def prepare(cls, model, device='CPU', **kwargs): ''' For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph, for example, if "img" is the input blob for the predict_net, we require that in init_graph and in initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since there is no way we can know which blob is the input of the predict_graph. ''' super(Caffe2Backend, cls).prepare(model, device, **kwargs) opset_version = None for imp in model.opset_import: if not imp.HasField("domain") or imp.domain == "": opset_version = imp.version if imp.version > cls._known_opset_version: warnings.warn( "This version of onnx-caffe2 targets ONNX operator set version {}, but the model we are trying to import uses version {}. We will try to import it anyway, but if the model uses operators which had BC-breaking changes in the intervening versions, import will fail." .format(cls._known_opset_version, imp.version)) else: warnings.warn("Unrecognized operator set {}".format( imp.domain)) if opset_version is None: if model.ir_version >= 0x00000003: raise RuntimeError( "Model with IR version >= 3 did not specify ONNX operator set version (onnx-caffe2 requires it)" ) else: opset_version = 1 ws = Workspace() device_option = get_device_option(Device(device)) # Directly load initializer data into blobs in workspace cls._direct_initialize_parameters( model.graph.initializer, ws, device_option, ) initialized = {init.name for init in model.graph.initializer} cls._direct_initialize_inputs( model.graph.input, initialized, ws, device_option, ) uninitialized = [ value_info.name for value_info in model.graph.input if value_info.name not in initialized ] init_net, predict_net = cls._onnx_model_to_caffe2_net( model, device, opset_version, False) retval = Caffe2Rep(init_net, predict_net, ws, uninitialized) return retval
def onnx_graph_to_caffe2_net(cls, graph_def, device="CPU", opset_version=_known_opset_version): device_option = get_device_option(Device(device)) cls._inplace_rewrite(graph_def) if graph_def.initializer: init_net = cls.onnx_initializer_to_caffe2_init_net( graph_def.initializer) initialized = {init.name for init in graph_def.initializer} else: init_net = caffe2_pb2.NetDef() initialized = set() dummy_name(cls._all_names_in_graph(graph_def) | initialized) predict_net = caffe2_pb2.NetDef() predict_net.name = graph_def.name for node in graph_def.node: predict_net.op.extend( cls._onnx_node_to_caffe2_op(node, opset_version)) predict_net.external_input.extend(value_info.name for value_info in graph_def.input) predict_net.external_output.extend(value_info.name for value_info in graph_def.output) # Caffe2 predictor requires all input blobs (including the # real model inputs) are initialized in init_net for value_info in graph_def.input: if value_info.name in initialized: continue op_def = caffe2_pb2.OperatorDef() op_def.output.extend([value_info.name]) op_def.type = 'GivenTensorFill' shape = list(d.dim_value for d in value_info.type.tensor_type.shape.dim) # TODO: Putting this in the init net will make it run faster, but it # causes some tests to fail... # shape = (1,) shape_arg = op_def.arg.add() shape_arg.name = 'shape' shape_arg.ints.extend(shape) values_arg = op_def.arg.add() values_arg.name = 'values' values_arg.floats.extend(np.ones(shape).flatten().tolist()) init_net.op.extend([op_def]) # Set the device option for the init_net and predict_net. init_net.device_option.CopyFrom(device_option) predict_net.device_option.CopyFrom(device_option) return init_net, predict_net
def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers): device_option = get_device_option(Device(device)) init_model = ModelProto() init_model.ParseFromString( cls.optimize_onnx(onnx_model.SerializeToString(), init=True)) pred_model = ModelProto() pred_model.ParseFromString( cls.optimize_onnx(onnx_model.SerializeToString(), predict=True)) init_net = caffe2_pb2.NetDef() pred_net = caffe2_pb2.NetDef() init_net.name = onnx_model.graph.name + '_init' pred_net.name = onnx_model.graph.name + '_predict' if include_initializers: init_net.op.extend( cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer) dummy_name( cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(pred_model.graph)) success = True for net, model in ((init_net, init_model), (pred_net, pred_model)): net.device_option.CopyFrom(device_option) for node in model.graph.node: try: c2ops = cls._onnx_node_to_caffe2_op( init_model, pred_model, node, opset_version) except Exception as e: success = False print('ONNX FATAL:', e) continue (init_net if include_initializers else net).op.extend( c2ops.init_ops) net.op.extend(c2ops.ops) net.external_input.extend(c2ops.interface_blobs) net.external_output.extend(value_info.name for value_info in model.graph.output) net.external_input.extend(value_info.name for value_info in model.graph.input) if not success: raise RuntimeError('ONNX conversion failed') return init_net, pred_net
def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers): device_option = get_device_option(Device(device)) onnx_model = onnx.utils.polish_model(onnx_model) init_model = cls.optimize_onnx(onnx_model, init=True) pred_model = cls.optimize_onnx(onnx_model, predict=True) init_net = caffe2_pb2.NetDef() pred_net = caffe2_pb2.NetDef() init_net.name = onnx_model.graph.name + '_init' pred_net.name = onnx_model.graph.name + '_predict' if include_initializers: init_net.op.extend( cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer) cls._dummy_name.reset( cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(pred_model.graph)) errors = [] for net, model in ((init_net, init_model), (pred_net, pred_model)): net.device_option.CopyFrom(device_option) for node in model.graph.node: try: c2ops = cls._onnx_node_to_caffe2_op( init_model, pred_model, node, opset_version) except Exception as e: msg = 'Error while processing node: {}. Exception: {}'.format( node, e) errors.append(msg) print('ONNX FATAL:', msg, file=sys.stderr) continue init_net.op.extend(c2ops.init_ops) net.op.extend(c2ops.ops) net.external_input.extend(c2ops.interface_blobs) net.external_output.extend(value_info.name for value_info in model.graph.output) net.external_input.extend(value_info.name for value_info in model.graph.input) if len(errors) > 0: raise RuntimeError( "ONNX conversion failed, encountered {} errors:\n\n{}".format( len(errors), "\n\n".join(errors))) return init_net, pred_net
def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version, outputs_info=None): super(Caffe2Backend, cls).run_node(node, inputs, device=device, outputs_info=outputs_info, opset_version=opset_version) value_infos = [] device_option = get_device_option(Device(device)) ws = Workspace() with core.DeviceScope(device_option): # temporary! if isinstance(inputs, dict): for key, value in inputs.items(): ws.FeedBlob(key, value) value_infos.append( onnx.helper.make_tensor_value_info( name=key, elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[ value.dtype], shape=value.shape).SerializeToString()) else: assert len(node.input) == len( inputs), "{}: expected {} but got {}".format( node.op_type, len(node.input), len(inputs)) for key, value in zip(node.input, inputs): ws.FeedBlob(key, value) value_infos.append( onnx.helper.make_tensor_value_info( name=key, elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[ value.dtype], shape=value.shape).SerializeToString()) ops = [] cbackend = C.Caffe2Backend(cls._dummy_name) ops_str = cbackend.convert_node(node.SerializeToString(), value_infos, opset_version) for s in ops_str[0] + ops_str[1]: op = caffe2_pb2.OperatorDef() op.ParseFromString(s) op.device_option.CopyFrom(device_option) ops.append(op) ws.RunOperatorsOnce(ops) output_values = [ws.FetchBlob(name) for name in node.output] return namedtupledict('Outputs', node.output)(*output_values)
def supports_device(cls, device_str): """Query if the given device is supported. Parameters ---------- device_str : str The device descriptor. Returns ------- bool **True** if device is supported otherwise **False**. """ device = Device(device_str) return device.type == DeviceType.CUDA
def supports_device(cls, device_str): """Query if the given device is supported. Parameters ---------- device_str : str The device descriptor. Returns ------- bool ``True`` if device is supported otherwise ``False``. """ device = Device(device_str) if device.type in (DeviceType.CPU, DeviceType.CUDA): return True return False
def run_node(cls, node, inputs, device='CPU'): super(Caffe2Backend, cls).run_node(node, inputs, device) device_option = get_device_option(Device(device)) with Workspace(), core.DeviceScope(device_option): # temporary! if isinstance(inputs, dict): for key, value in inputs.items(): workspace.FeedBlob(key, value) else: assert(len(node.input) == len(inputs)) for key, value in zip(node.input, inputs): workspace.FeedBlob(key, value) cls._inplace_rewrite([node]) ops = cls._onnx_node_to_caffe2_op(node) for op in ops: workspace.RunOperatorOnce(op) output_values = [workspace.FetchBlob(name) for name in node.output] return namedtupledict('Outputs', node.output)(*output_values)
def __init__(self, model, device, max_batch_size=32, max_workspace_size=None, serialize_engine=True, **kwargs): if not isinstance(device, Device): device = Device(device) self._set_device(device) self._logger = trt.infer.ConsoleLogger(trt.infer.LogSeverity.WARNING) self.builder = trt.infer.create_infer_builder(self._logger) self.network = self.builder.create_network() self.parser = parser.create_parser(self.network, self._logger) if not isinstance(model, six.string_types): model_str = model.SerializeToString() else: model_str = model if not self.parser.parse(model_str): error = self.parser.get_error(0) msg = "While parsing node number %i:\n" % error.node() msg += ("%s:%i In function %s:\n[%i] %s" % (error.file(), error.line(), error.func(), error.code(), error.desc())) raise RuntimeError(msg) if max_workspace_size is None: max_workspace_size = 1 << 28 self.builder.set_max_batch_size(max_batch_size) self.builder.set_max_workspace_size(max_workspace_size) trt_engine = self.builder.build_cuda_engine(self.network) if trt_engine is None: raise RuntimeError("Failed to build TensorRT engine from network") if serialize_engine: trt_engine = self._serialize_deserialize(trt_engine) self.engine = Engine(trt_engine) self._output_shapes = {} for output in model.graph.output: dims = output.type.tensor_type.shape.dim output_shape = tuple([dim.dim_value for dim in dims]) self._output_shapes[output.name] = output_shape
def __init__(self, model, device, **kwargs): """Create a ``BackendRep``. Parameters ---------- model : str The path of onnx model file. device : onnx.Device The executing device. """ if not isinstance(device, Device): device = Device(device) graph_str = workspace.get_workspace().PrepareONNXModel(model) graph_def = dragon_pb2.GraphDef() graph_def.ParseFromString(graph_str) if device.type == DeviceType.CPU: device_type, device_index = 'cpu', 0 elif device.type == DeviceType.CUDA: device_type, device_index = 'cuda', device.device_id else: raise ValueError('Unsupported device type: ' + device.type) with context.device(device_type, device_index): self._function = function_lib.Function(name='ONNXGraph') \ .import_from(graph_def) self._input_dict = collections.OrderedDict([ (impl.name, EagerTensor(impl=impl, device=device_spec.DeviceSpec(device_type, device_index))) for impl in self._function.inputs ]) self._output_dict = collections.OrderedDict([ (impl.name, EagerTensor(impl=impl, device=device_spec.DeviceSpec(device_type, device_index))) for impl in self._function.outputs ])
def prepare(cls, model, device='CPU', **kwargs): ''' For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph, for example, if "img" is the input blob for the predict_net, we require that in init_graph and in initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since there is no way we can know which blob is the input of the predict_graph. ''' super(Caffe2Backend, cls).prepare(model, device, **kwargs) init_net, predict_net = cls.onnx_graph_to_caffe2_net(model.graph) predict_net.device_option.CopyFrom(get_device_option(Device(device))) initialized = {init.name for init in model.graph.initializer} uninitialized = [x for x in predict_net.external_input if x not in initialized] ws = Workspace() with ws, core.DeviceScope(predict_net.device_option): workspace.RunNetOnce(init_net) return Caffe2Rep(predict_net, ws, uninitialized)
def run_node(cls, node, inputs, device='CPU', outputs_info=None, **kwargs): """ Run ONNX node. :param node: ONNX NodeProto object. :param inputs: Inputs. :param device: Device run on. :param outputs_info: None. :param kwargs: Other args. :return: Outputs. """ super(TensorflowBackend, cls).run_node(node, inputs, device) node_graph = tf.Graph() with node_graph.as_default(): node = OnnxNode(node) device_option = get_device_option(Device(device)) input_tensors = [] for i in inputs: input_tensors.append(tf.constant(i)) if isinstance(inputs, dict): feed_dict_raw = inputs else: assert len(node.inputs) == len(inputs) feed_dict_raw = dict(zip(node.inputs, inputs)) # TODO: is constant the best way for feeding inputs? input_dict = dict([ (x[0], tf.constant(x[1])) for x in feed_dict_raw.items() ]) ops = cls._onnx_node_to_tensorflow_op(node, input_dict) with tf.compat.v1.Session() as sess: with tf.device(device_option): sess.run(tf.compat.v1.global_variables_initializer()) output_vals = sess.run(ops) return namedtupledict('Outputs', node.outputs)(*output_vals)
def supports_device(cls, device): # type: (Text) -> bool d = Device(device) if d.type == DeviceType.CPU: return True return False
def supports_device(cls, device): d = Device(device) if d.type == DeviceType.CPU: return True return False
def prepare(cls, model, device='CPU', raw_values_dict=None, **kwargs): ''' For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph, for example, if "img" is the input blob for the predict_net, we require that in init_graph and in initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since there is no way we can know which blob is the input of the predict_graph. ''' if not kwargs.pop('no_check_UNSAFE', False): super(Caffe2Backend, cls).prepare(model, device, **kwargs) opset_version = None for imp in model.opset_import: if not imp.HasField("domain") or imp.domain == "": opset_version = imp.version if imp.version > cls._known_opset_version: warnings.warn( "This version of onnx-caffe2 targets ONNX operator set version {}, but the model we are trying to import uses version {}. We will try to import it anyway, but if the model uses operators which had BC-breaking changes in the intervening versions, import will fail." .format(cls._known_opset_version, imp.version)) else: warnings.warn("Unrecognized operator set {}".format( imp.domain)) if opset_version is None: if model.ir_version >= 0x00000003: raise RuntimeError( "Model with IR version >= 3 did not specify ONNX operator set version (onnx-caffe2 requires it)" ) else: opset_version = 1 model = onnx.shape_inference.infer_shapes(model) # Check whether we have RNN related ops pred_model = cls.optimize_onnx(model, predict=True) rnn_nodes = [] for node in pred_model.graph.node: if node.op_type in {'LSTM', 'GRU', 'RNN'}: rnn_nodes.append(node) # Build the C++ backend # TODO: build a predictor that supports GPU # And for RNN nets, we need to avoid adding init_net use_cpp_backend = device == 'CPU' and not rnn_nodes # use python backend for now use_cpp_backend = False if use_cpp_backend: c2_rnn_ops = [] if rnn_nodes: init_model = cls.optimize_onnx(model, init=True) for node in rnn_nodes: c2ops = cls._onnx_node_to_caffe2_op( init_model, pred_model, node, opset_version) init_ops = [x.SerializeToString() for x in c2ops.init_ops] ops = [x.SerializeToString() for x in c2ops.ops] external_inputs = c2ops.interface_blobs c2_rnn_ops.append( C.Caffe2Ops(init_ops, ops, external_inputs)) del init_model cbackend = C.Caffe2Backend(cls._dummy_name) if raw_values_dict: cls._external_value_resolution_pass(model, raw_values_dict) rep = cbackend.prepare(model.SerializeToString(), device, c2_rnn_ops) # For testing # Dump the net descriptions to file for comparison with the Python ones if "ONNX_CAFFE2_DEBUG" in os.environ: pred_net_str = rep.pred_net() pn = caffe2_pb2.NetDef() pn.ParseFromString(pred_net_str) init_net_str = rep.init_net() inn = caffe2_pb2.NetDef() inn.ParseFromString(init_net_str) with open("cpp.txt", "w") as f: f.write("pred_net: \n{}".format(pn)) rep_wrapper = Caffe2CppRep(rep) return rep_wrapper else: ws = Workspace() device_option = get_device_option(Device(device)) init_net, predict_net = cls._onnx_model_to_caffe2_net( model, device, opset_version, False) if raw_values_dict: cls._external_value_resolution_pass(model, raw_values_dict) # Directly load initializer data into blobs in workspace cls._direct_initialize_parameters( model.graph.initializer, ws, device_option, ) initialized = {init.name for init in model.graph.initializer} cls._direct_initialize_inputs( model.graph.input, initialized, ws, device_option, ) uninitialized = [ value_info.name for value_info in model.graph.input if value_info.name not in initialized ] if "ONNX_CAFFE2_DEBUG" in os.environ: with open("python.txt", "w") as f: f.write("pred_net: \n{}".format(predict_net)) retval = Caffe2Rep(init_net, predict_net, ws, uninitialized) return retval
def prepare(cls, model, device='CPU', raw_values_dict=None, **kwargs): ''' For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph, for example, if "img" is the input blob for the predict_net, we require that in init_graph and in initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since there is no way we can know which blob is the input of the predict_graph. ''' if not kwargs.pop('no_check_UNSAFE', False): super(Caffe2Backend, cls).prepare(model, device, **kwargs) opset_version = None for imp in model.opset_import: if not imp.HasField("domain") or imp.domain == "": opset_version = imp.version if imp.version > cls._known_opset_version: warnings.warn( "This version of onnx-caffe2 targets ONNX operator set version {}, but the model we are trying to import uses version {}. We will try to import it anyway, but if the model uses operators which had BC-breaking changes in the intervening versions, import will fail." .format(cls._known_opset_version, imp.version)) else: warnings.warn("Unrecognized operator set {}".format( imp.domain)) if opset_version is None: if model.ir_version >= 0x00000003: raise RuntimeError( "Model with IR version >= 3 did not specify ONNX operator set version (onnx-caffe2 requires it)" ) else: opset_version = 1 # Prior to onnx version update to onnx-1.8.0, errors caused by failures in # in the onnx shape inference call were being supressed. Hence a try-catch block # is added around the infer_shapes call to avoid these failures and preserve status try: model = onnx.shape_inference.infer_shapes(model) except RuntimeError: warnings.warn( "ShapeInferenceWarning: Inferred shape and existing shape differ in rank" ) ws = Workspace() device_option = get_device_option(Device(device)) init_net, predict_net = cls._onnx_model_to_caffe2_net( model, device, opset_version, False) if raw_values_dict: cls._external_value_resolution_pass(model, raw_values_dict) # Directly load initializer data into blobs in workspace cls._direct_initialize_parameters( model.graph.initializer, ws, device_option, ) initialized = {init.name for init in model.graph.initializer} cls._direct_initialize_inputs( model.graph.input, initialized, ws, device_option, ) uninitialized = [ value_info.name for value_info in model.graph.input if value_info.name not in initialized ] retval = Caffe2Rep(init_net, predict_net, ws, uninitialized) return retval
def __init__(self, model, device, max_batch_size=32, max_workspace_size=None, serialize_engine=False, verbose=False, **kwargs): if not isinstance(device, Device): device = Device(device) self._set_device(device) self._logger = TRT_LOGGER self.builder = trt.Builder(self._logger) self.network = self.builder.create_network(flags=1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) self.parser = trt.OnnxParser(self.network, self._logger) self.shape_tensor_inputs = [] self.serialize_engine = serialize_engine self.verbose = verbose if self.verbose: print(f'\nRunning {model.graph.name}...') if not isinstance(model, six.string_types): model_str = model.SerializeToString() else: model_str = model if not trt.init_libnvinfer_plugins(TRT_LOGGER, ""): msg = "Failed to initialize TensorRT's plugin library." raise RuntimeError(msg) if not self.parser.parse(model_str): error = self.parser.get_error(0) msg = "While parsing node number %i:\n" % error.node() msg += ("%s:%i In function %s:\n[%i] %s" % (error.file(), error.line(), error.func(), error.code(), error.desc())) raise RuntimeError(msg) if max_workspace_size is None: max_workspace_size = 1 << 28 self.builder.max_batch_size = max_batch_size self.builder.max_workspace_size = max_workspace_size num_inputs = self.network.num_inputs for idx in range(num_inputs): inp_tensor = self.network.get_input(idx) if inp_tensor.is_shape_tensor: self.shape_tensor_inputs.append((inp_tensor.name, idx)) if self.verbose: print(f'\nInput \'{inp_tensor.name}\' at index {idx} is a shape tensor') if self.verbose: for layer in self.network: print(layer) print(f'Output shape: {self.network[-1].get_output(0).shape}') if len(self.shape_tensor_inputs) == 0: self._build_engine() else: if self.verbose: print("Deferring engine build to run stage") self._output_shapes = {} self._output_dtype = {} for output in model.graph.output: dims = output.type.tensor_type.shape.dim output_shape = tuple([dim.dim_value for dim in dims]) self._output_shapes[output.name] = output_shape self._output_dtype[output.name] = output.type.tensor_type.elem_type
def supports_device(cls, device_str): device = Device(device_str) return device.type == DeviceType.CUDA
def __init__(self, model, device, path_to_trt='./pretrained/firenet_float32_batch1.engine', max_workspace_size=None, serialize_engine=False, verbose=False, **kwargs): if not isinstance(device, Device): device = Device(device) self._set_device(device) self._logger = TRT_LOGGER self.builder = trt.Builder(self._logger) self.config = self.builder.create_builder_config() self.network = self.builder.create_network( flags=1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) self.parser = trt.OnnxParser(self.network, self._logger) self.shape_tensor_inputs = [] self.serialize_engine = serialize_engine self.verbose = verbose self.dynamic = False self.path_to_trt = path_to_trt if self.verbose: print(f'\nRunning {model.graph.name}...') TRT_LOGGER.min_severity = trt.Logger.VERBOSE if not isinstance(model, six.string_types): model_str = model.SerializeToString() # print('True_isinstance') else: model_str = model if not trt.init_libnvinfer_plugins(TRT_LOGGER, ""): msg = "Failed to initialize TensorRT's plugin library." raise RuntimeError(msg) if not self.parser.parse(model_str): # print('True_parse') error = self.parser.get_error(0) msg = "While parsing node number %i:\n" % error.node() msg += ("%s:%i In function %s:\n[%i] %s" % (error.file(), error.line(), error.func(), error.code(), error.desc())) raise RuntimeError(msg) if max_workspace_size is None: max_workspace_size = 1 << 28 self.config.max_workspace_size = max_workspace_size num_inputs = self.network.num_inputs for idx in range(num_inputs): inp_tensor = self.network.get_input(idx) if inp_tensor.is_shape_tensor or -1 in inp_tensor.shape: self.dynamic = True break if self.verbose: for layer in self.network: print(layer) print(f'Output shape: {self.network[-1].get_output(0).shape}') if self.dynamic: if self.verbose: print( "Found dynamic inputs! Deferring engine build to run stage" ) else: # self._build_engine() self._load_engine() self._output_shapes = {} self._output_dtype = {} for output in model.graph.output: dims = output.type.tensor_type.shape.dim output_shape = tuple([dim.dim_value for dim in dims]) self._output_shapes[output.name] = output_shape self._output_dtype[output.name] = output.type.tensor_type.elem_type
def __init__( self, model, device, max_batch_size=32, max_workspace_size=None, optimization_profiles=None, serialize_engine=False, ): """Create a ``BackendRep``. Parameters ---------- model : onnx.ModelProto The onnx model. device : onnx.Device The executing device. max_batch_size : int, optional, default=32 The max batch size. max_workspace_size : int, optional The max workspace size in bytes. optimization_profiles : List[Dict], optional The optimization profiles. serialize_engine : bool, optional, default=False Whether to serialize engine into a file. """ if not isinstance(device, Device): device = Device(device) self._set_device(device) self._logger = TRT_LOGGER self._builder = trt.Builder(self._logger) self._builder_config = self._builder.create_builder_config() self._network = self._builder.create_network( flags=1 << (int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))) self._parser = trt.OnnxParser(self._network, self._logger) if not isinstance(model, six.string_types): model_str = model.SerializeToString() else: model_str = model if not trt.init_libnvinfer_plugins(TRT_LOGGER, ''): msg = "Failed to initialize TensorRT's plugin library." raise RuntimeError(msg) if not self._parser.parse(model_str): error = self._parser.get_error(0) msg = "While parsing node #%i:\n" % error.node() msg += ("%s:%i In function %s:\n[%i] %s" % (error.file(), error.line(), error.func(), error.code(), error.desc())) raise RuntimeError(msg) if max_workspace_size is None: max_workspace_size = 1 << 28 # Setup the builder. self._builder.max_batch_size = max_batch_size self._builder.max_workspace_size = max_workspace_size self._add_optimization_profiles(optimization_profiles) # Build and wrap for the cuda engine. if optimization_profiles is None: cuda_engine = self._builder.build_cuda_engine(self._network) else: cuda_engine = self._builder.build_engine(self._network, self._builder_config) if cuda_engine is None: raise RuntimeError("Failed to build TensorRT engine from network.") if serialize_engine: cuda_engine = self._serialize_deserialize(cuda_engine) self._engine = engine.Engine(cuda_engine, device.device_id) self._output_shapes = {} self._output_dtypes = {} for output in model.graph.output: dims = output.type.tensor_type.shape.dim output_shape = tuple([dim.dim_value for dim in dims]) self._output_shapes[output.name] = output_shape self._output_dtypes[ output.name] = output.type.tensor_type.elem_type