def add_node(self, graph): print("Start adding node(op = %s)"%(self.plugin_name)) batch_size = graph.inputs[0].shape[0] input_h = graph.inputs[0].shape[2] input_w = graph.inputs[0].shape[3] print("width %d, height %d"%(input_h, input_w)) print("target Plugin: %s"%(self.plugin_name)) tensors = graph.tensors() boxes_tensor = tensors['boxes'] confs_tensor = tensors['confs'] num_detections = gs.Variable(name="num_detections").to_variable(dtype=np.int32, shape=[batch_size, 1]) nms_boxes = gs.Variable(name="nms_boxes").to_variable(dtype=np.float32, shape=[batch_size, self.keepTopK, 4]) nms_scores = gs.Variable(name="nms_scores").to_variable(dtype=np.float32, shape=[batch_size, self.keepTopK]) nms_classes = gs.Variable(name="nms_classes").to_variable(dtype=np.float32, shape=[batch_size, self.keepTopK]) outputs = [num_detections, nms_boxes, nms_scores, nms_classes] nms_node = gs.Node( op=self.plugin_name, attrs=self.attrs, inputs=[boxes_tensor, confs_tensor], outputs=outputs) graph.nodes.append(nms_node) graph.outputs = outputs print("ADD graph node surgery complete") return graph.cleanup().toposort()
def create_and_add_plugin_node(graph, topK, keepTopK): batch_size = graph.inputs[0].shape[0] input_h = graph.inputs[0].shape[2] input_w = graph.inputs[0].shape[3] tensors = graph.tensors() boxes_tensor = tensors["boxes"] confs_tensor = tensors["confs"] num_detections = gs.Variable(name="num_detections").to_variable(dtype=np.int32, shape=[batch_size, 1]) nmsed_boxes = gs.Variable(name="nmsed_boxes").to_variable(dtype=np.float32, shape=[batch_size, keepTopK, 4]) nmsed_scores = gs.Variable(name="nmsed_scores").to_variable(dtype=np.float32, shape=[batch_size, keepTopK]) nmsed_classes = gs.Variable(name="nmsed_classes").to_variable(dtype=np.float32, shape=[batch_size, keepTopK]) new_outputs = [num_detections, nmsed_boxes, nmsed_scores, nmsed_classes] mns_node = gs.Node( op="BatchedNMS_TRT", attrs=create_attrs(input_h, input_w, topK, keepTopK), inputs=[boxes_tensor, confs_tensor], outputs=new_outputs) graph.nodes.append(mns_node) graph.outputs = new_outputs return graph.cleanup().toposort()
def make_multi_input_output(): DTYPE = np.float32 SHAPE = (1,) X0 = gs.Variable("X0", dtype=DTYPE, shape=SHAPE) Y0 = gs.Variable("Y0", dtype=DTYPE, shape=SHAPE) graph = gs.Graph(inputs=[X0, Y0]) X1 = graph.identity(X0) Y1 = graph.identity(Y0) Z0 = graph.add(X1, Y1) Z1 = graph.identity(Z0) Z1.dtype = DTYPE Z1.shape = SHAPE Z2 = graph.identity(Z0) Z2.dtype = DTYPE Z2.shape = SHAPE graph.outputs = [Z1, Z2] save(graph, "reducable.onnx")
def modeify_model2(cls, input_file="model.onnx", output_file="add.onnx"): """重新修改resize的实现 """ graph = gs.import_onnx(onnx.load(input_file)) first_add = [node for node in graph.nodes if node.op == "LeakyRelu"][0] # 找到 LeakyRelu 的节点 # first_add = [node for node in graph.nodes if node.name == "LeakyRelu_2"][0] # 找到 LeakyRelu 的节点 # first_add.inputs = [inp for inp in first_add.inputs] # 找到其对应的输入 # first_add.outputs = [inp for inp in first_add.outputs] # 找到其对应的输出 first_add.outputs.clear( ) # 必须执行,clear 删除掉输出的相关链接 ,但也导致 LeakyRelu 没有了输出,因此必须重新实现生成新的输出 # graph.nodes.remove(first_add) # 删除整个节点 second_add = [node for node in graph.nodes if node.op == "MaxPool"][0] # second_add = [node for node in graph.nodes if node.name == "MaxPool_32"][0] second_add.inputs.clear() # 必须执行,clear 删除掉输入的相关链接,后面得重新指定其输入 # 重新定义LeakyRelu层 attrs = {"alpha": 0.1} lrelu = gs.Variable("new_lrelu", np.float32) node = gs.Node(op="LeakyRelu", inputs=first_add.inputs, outputs=[lrelu], attrs=attrs) graph.nodes.append(node) # 重新定义resize层(实现upsample) attrs = { "coordinate_transformation_mode": 'asymmetric', "mode": 'nearest', "nearest_mode": 'floor', } layer_name = "new_resize" # 不要和原来 的resize节点名重复 scales = np.array([1.0, 1.0, 2, 2]).astype(np.float32) scale_name = layer_name + ".scale" roi_name = layer_name + ".roi" scale = gs.Constant(scale_name, scales) roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32)) # inputs =first_add.outputs inputs = [lrelu] inputs.append(roi) inputs.append(scale) resize = gs.Variable(layer_name, dtype=np.float32) node = gs.Node(op="Resize", inputs=inputs, outputs=[resize], attrs=attrs) graph.nodes.append(node) # 重新设置下一层的输入节点 second_add.inputs = [resize] # 5. Remove unused nodes/tensors, and topologically sort the graph graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), output_file)
def run(nM,nK,nN): tensor0 = gs.Variable("tensor0", np.float32, [nM, 1]) constant1xK = gs.Constant("constant1xK", np.ascontiguousarray(np.random.rand(1, nK).reshape(1, nK).astype(np.float32) * 2 - 1)) constantKxN = gs.Constant("constantKxN", np.ascontiguousarray(np.random.rand(nK, nN).reshape(nK, nN).astype(np.float32) * 2 - 1)) constantN = gs.Constant("constantN", np.ascontiguousarray(np.random.rand(nN).astype(np.float32) * 2 - 1)) constantNxK = gs.Constant("constantNxK", np.ascontiguousarray(np.random.rand(nN, nK).reshape(nN, nK).astype(np.float32) * 2 - 1)) constantK = gs.Constant("constantK", np.ascontiguousarray(np.random.rand(nK).astype(np.float32) * 2 - 1)) constantM1 = gs.Constant("constantM1", np.ascontiguousarray(np.array([-1], dtype=np.int64))) graphNodeList = [] tensor1 = gs.Variable("tensor1", np.float32, None) node1 = gs.Node("MatMul", "MMU1", inputs=[tensor0, constant1xK], outputs=[tensor1]) graphNodeList.append(node1) tensorLoop = tensor1 for i in range(nLoop): tensor2 = gs.Variable("tensor%d-1" % i, np.float32, None) node2 = gs.Node("MatMul", "MMU-" + str(i), inputs=[tensorLoop, constantKxN], outputs=[tensor2]) graphNodeList.append(node2) tensor3 = gs.Variable("tensor%d-2" % i, dtype=np.float32, shape=None) node3 = gs.Node("Add", "AddU-" + str(i), inputs=[tensor2, constantN], outputs=[tensor3]) graphNodeList.append(node3) tensor4 = gs.Variable("tensor%d-3" % i, dtype=np.float32, shape=None) node4 = gs.Node("Relu", "ReLUU-" + str(i), inputs=[tensor3], outputs=[tensor4]) graphNodeList.append(node4) tensor5 = gs.Variable("tensor%d-4" % i, dtype=np.float32, shape=None) node5 = gs.Node("MatMul", "MMD-" + str(i), inputs=[tensor4, constantNxK], outputs=[tensor5]) graphNodeList.append(node5) tensor6 = gs.Variable("tensor%d-5" % i, dtype=np.float32, shape=None) node6 = gs.Node("Add", "AddD-" + str(i), inputs=[tensor5, constantK], outputs=[tensor6]) graphNodeList.append(node6) tensor7 = gs.Variable("tensor%d-6" % i, dtype=np.float32, shape=None) node7 = gs.Node("Relu", "ReLUD-" + str(i), inputs=[tensor6], outputs=[tensor7]) graphNodeList.append(node7) tensorLoop = tensor7 tensor8 = gs.Variable("tensor8", dtype=np.float32, shape=None) node8 = gs.Node("ReduceSum", "Reduce", inputs=[tensorLoop, constantM1], outputs=[tensor8], attrs=OrderedDict([('keepdims', 0)])) graphNodeList.append(node8) graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor8], opset=13) onnxFile = "model-%d-%d-%d.onnx"%(nM,nK,nN) onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile) print("Succeeded building %s!" % (onnxFile)) os.system("trtexec --onnx=%s --useCudaGraph --noDataTransfers --fp16"%onnxFile)
def onnx_mul(nodes, layer_name, input_node1, input_node2, output_shape): inputs = [input_node1, input_node2] output_node = gs.Variable(layer_name, np.float32, output_shape) node = gs.Node(op="Mul", inputs=inputs, outputs=[output_node]) nodes.append(node) return output_node
def onnx_upsample(nodes, layer_name, input_node, output_shape=None, resize_scale_factors=2): attrs = { "coordinate_transformation_mode": 'asymmetric', "mode": 'nearest', "nearest_mode": 'floor', } layer_name = layer_name # 不要和原来 的resize节点名重复 scales = np.array([1.0, 1.0, resize_scale_factors, resize_scale_factors]).astype(np.float32) scale_name = layer_name + ".scale" roi_name = layer_name + ".roi" scale = gs.Constant(scale_name, scales) roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32)) inputs = [input_node, roi, scale] output_node = gs.Variable(layer_name, dtype=np.float32, shape=output_shape) node = gs.Node(op="Resize", inputs=inputs, outputs=[output_node], attrs=attrs) nodes.append(node) return output_node
def onnx_slice(nodes, layer_name, input_node, output_shape, start=(0, 0, 0, 0), shape=(2, 2, 3, 3), stride=(1, 1, 1, 1)): """ x = torch.randn([8,8]) x[:,2:4] onnx_slice(nodes,"slice",x,(0,2),(8,4),(1,1)) """ inputs = [input_node] inputs.extend([ gs.Constant(layer_name + '_constant_start', np.asarray(start, np.int32)), gs.Constant(layer_name + '_constant_shape', np.asarray(shape, np.int32)), gs.Constant(layer_name + '_constant_axis', np.arange(0, len(start)).astype(np.int32)), gs.Constant(layer_name + '_constant_stride', np.asarray(stride, np.int32)), ]) name = layer_name output_node = gs.Variable(name, np.float32, output_shape) node = gs.Node(op="Slice", inputs=inputs, outputs=[output_node]) nodes.append(node) return output_node
def add_model(cls, input_file="model.onnx", output_file="add.onnx"): """增加节点 在Sigmoid 前增加 LeakyRelu 节点() """ graph = gs.import_onnx(onnx.load(input_file)) first_add = [node for node in graph.nodes if node.op == "Sigmoid"][-1] # 找到最后一个名为 Sigmoid 的节点 # first_add.inputs = [inp for inp in first_add.inputs if inp.name == "fc"] # 找到其对应的输入 # first_add.inputs = [inp for inp in first_add.inputs] # 找到其对应的输入 # first_add.inputs = [inp for inp in first_add.inputs if inp.name != "b"] # 找到其对应的输入(删除为‘b’的输入节点) # 2. Change the Add to a LeakyRelu lrelu = gs.Variable('new_lrelu', dtype=np.float32) graph.nodes.append( gs.Node(op="LeakyRelu", inputs=first_add.inputs, outputs=[lrelu], attrs={"alpha": 0.02})) # 此时 sigmoid输入变成了lrelu(输出) first_add.inputs.clear() first_add.inputs = [lrelu] # 5. Remove unused nodes/tensors, and topologically sort the graph graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), output_file)
def get_tensor(name): if name not in tensor_map: G_LOGGER.verbose( "Tensor: {:} does not exist in the model. Creating a new tensor" .format(name)) tensor_map[name] = gs.Variable(name) return tensor_map[name]
def onnx_exp(nodes, layer_name, input_node, output_shape): inputs = [input_node] output_node = gs.Variable(layer_name, np.float32, output_shape) node = gs.Node(op="Exp", inputs=inputs, outputs=[output_node]) nodes.append(node) return output_node
def onnx_reshape(nodes, layer_name, input_node, output_shape, value): inputs = [input_node] inputs.append( gs.Constant(layer_name + '_constant', np.asarray(value, np.int64))) output_node = gs.Variable(layer_name, np.float32, output_shape) node = gs.Node(op="Reshape", inputs=inputs, outputs=[output_node]) nodes.append(node) return output_node
def run(self, args): _, graph = super().import_graph(args) TENSOR_MAP = graph.tensors() def get_tensor(name): if name not in TENSOR_MAP: G_LOGGER.critical( "Tensor: {:} does not exist in the model.".format(name)) return TENSOR_MAP[name] # We populate outputs first because we may need to update output nodes from the # input tensors if output == input. output_tensors = [] for name in args.outputs: if name in args.inputs: tensor = gs.Variable( name="{:}_polygraphy_surgeon_insert_output".format(name)) # Bind outputs to outputs of original inputs. # This construct is required to preserve ordering of the input tensors in the output nodes. for out in get_tensor(name).outputs: for index, inp in enumerate(out.inputs): if inp.name == name: out.inputs[index] = tensor G_LOGGER.verbose( "Generating new tensor for output: {:}".format(tensor)) else: tensor = get_tensor(name) tensor.inputs.clear() output_tensors.append(tensor) if not tensor.outputs: for index, out in enumerate(graph.outputs): if out.name == name: graph.outputs[index] = tensor input_tensors = [] for name in args.inputs: tensor = get_tensor(name) tensor.outputs.clear() input_tensors.append(tensor) new_node = gs.Node(op=args.op, name=args.name, inputs=input_tensors, outputs=output_tensors) G_LOGGER.verbose("Generated new node: {:}".format(new_node)) graph.nodes.append(new_node) # Since new graph outputs may be added, and we don't know the types, we skip type checks in ONNX-GraphSurgeon. super().export_graph(graph, args, do_type_check=False)
def onnx_concat(nodes, layer_name, input_node=[], output_shape=(), axis=1): attrs = {"axis": axis} inputs = input_node output_node = gs.Variable(layer_name, np.float32, output_shape) node = gs.Node(op="Concat", inputs=inputs, outputs=[output_node], attrs=attrs) nodes.append(node) return output_node
def create_and_add_plugin_node(graph, args): batch_size = graph.inputs[0].shape[0] tensors = graph.tensors() boxes_tensor = tensors["boxes"] confs_tensor = tensors["confs"] keepTopK = int(args.keepTopK) num_detections = gs.Variable(name="num_detections").to_variable( dtype=np.int32, shape=[batch_size, 1]) nmsed_boxes = gs.Variable(name="nmsed_boxes").to_variable( dtype=np.float32, shape=[batch_size, keepTopK, 4]) nmsed_scores = gs.Variable(name="nmsed_scores").to_variable( dtype=np.float32, shape=[batch_size, keepTopK]) nmsed_classes = gs.Variable(name="nmsed_classes").to_variable( dtype=np.float32, shape=[batch_size, keepTopK]) new_outputs = [num_detections, nmsed_boxes, nmsed_scores, nmsed_classes] mns_node = gs.Node( op="BatchedNMSDynamic_TRT", attrs={ "shareLocation": 1, "backgroundLabelId": -1, "numClasses": int(args.nbCls), "topK": int(args.topK), "keepTopK": keepTopK, "scoreThreshold": float(args.score), "iouThreshold": float(args.iou), "isNormalized": 1, "clipBoxes": 1, "plugin_version": "1", }, inputs=[boxes_tensor, confs_tensor], outputs=new_outputs, ) graph.nodes.append(mns_node) graph.outputs = new_outputs return graph.cleanup().toposort()
def append_nms(graph, num_classes, scoreThreshold, iouThreshold, keepTopK): out_tensors = graph.outputs bs = out_tensors[0].shape[0] nms_attrs = { 'shareLocation': True, 'backgroundLabelId': -1, 'numClasses': num_classes, 'topK': 1024, 'keepTopK': keepTopK, 'scoreThreshold': scoreThreshold, 'iouThreshold': iouThreshold, 'isNormalized': True, 'clipBoxes': True } nms_num_detections = gs.Variable(name="nms_num_detections", dtype=np.int32, shape=(bs, 1)) nms_boxes = gs.Variable(name="nms_boxes", dtype=np.float32, shape=(bs, keepTopK, 4)) nms_scores = gs.Variable(name="nms_scores", dtype=np.float32, shape=(bs, keepTopK)) nms_classes = gs.Variable(name="nms_classes", dtype=np.float32, shape=(bs, keepTopK)) nms = gs.Node( op="BatchedNMSDynamic_TRT", attrs=nms_attrs, inputs=out_tensors, outputs=[nms_num_detections, nms_boxes, nms_scores, nms_classes]) graph.nodes.append(nms) graph.outputs = [nms_num_detections, nms_boxes, nms_scores, nms_classes] return graph
def test_with_nested_graph(self): cond = gs.Variable("cond", dtype=np.bool, shape=(1, )) X = gs.Variable("X", dtype=np.float32, shape=(1, )) Y = gs.Constant("Y", values=np.ones((1, ), dtype=np.float32)) graph = Graph(inputs=[X, cond]) then_graph = Graph(name="Then") then_graph.outputs = [then_graph.add(Y, Y)] else_graph = Graph(name="Else") else_graph.outputs = [else_graph.add(X, else_graph.add(Y, Y))] graph.outputs = [graph.if_op(cond, then_graph, else_graph)] graph.fold_constants() graph.cleanup() assert len(then_graph.nodes) == 0 assert np.all(then_graph.outputs[0].values == (Y.values * 2)) assert len(else_graph.nodes) == 1 assert isinstance(else_graph.nodes[0].inputs[1], Constant) assert np.all(else_graph.nodes[0].inputs[1].values == (Y.values * 2))
def onnx_transpose(nodes, layer_name, input_node, output_shape, perm=[0, 1, 2, 3]): attrs = {"perm": perm} inputs = [input_node] output_node = gs.Variable(layer_name, np.float32, output_shape) node = gs.Node(op="Transpose", inputs=inputs, outputs=[output_node], attrs=attrs) nodes.append(node) return output_node
def test_const_inp_but_non_foldable_nested_graph(self): cond = gs.Constant("cond", values=np.array(True)) X = gs.Variable("X", dtype=np.float32, shape=(1, )) graph = Graph(inputs=[X]) then_graph = Graph(name="Then") then_graph.outputs = [then_graph.add(X, X)] else_graph = Graph(name="Else") else_graph.outputs = [else_graph.add(X, else_graph.add(X, X))] # Even though if_op looks foldable because it has all constant inputs, # it's not, since its subgraphs depend on variables in the outer scope. graph.outputs = [graph.if_op(cond, then_graph, else_graph)] # This should not raise because the `If` node should be excluded from # constant folding. graph.fold_constants(error_ok=False).cleanup() assert graph.nodes[0].op == "If" assert len(then_graph.nodes) == 1 assert len(else_graph.nodes) == 2
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import onnx_graphsurgeon as gs import numpy as np import onnx # Computes outputs = input + ((a + b) + d) shape = (1, 3) # Inputs input = gs.Variable("input", shape=shape, dtype=np.float32) # Intermediate tensors a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32)) b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32)) c = gs.Variable("c") d = gs.Constant("d", values=np.ones(shape=shape, dtype=np.float32)) e = gs.Variable("e") # Outputs output = gs.Variable("output", shape=shape, dtype=np.float32) nodes = [ # c = (a + b) gs.Node("Add", inputs=[a, b], outputs=[c]), # e = (c + d)
# from collections import OrderedDict import numpy as np import onnx import onnx_graphsurgeon as gs import os import tensorrt as trt nLoop = 10 nC = 32 onnxFile0 = "model-0.onnx" onnxFile1 = "model-1.onnx" tensor0 = gs.Variable(name="tensor-0", dtype=np.float32, shape=['B', 1, 16, 16]) constant32x1 = gs.Constant( "constant32x1", np.ascontiguousarray( np.random.rand(nC, 1, 3, 3).reshape(nC, 1, 3, 3).astype(np.float32) * 2 - 1)) constant32x32 = gs.Constant( "constant32x32", np.ascontiguousarray( np.random.rand(nC, nC, 3, 3).reshape(nC, nC, 3, 3).astype(np.float32) * 2 - 1)) constant32 = gs.Constant( "constant32", np.ascontiguousarray(
def update_nms(self, threshold=None, detections=None): """ Updates the graph to replace the NMS op by BatchedNMS_TRT TensorRT plugin node. :param threshold: Override the score threshold attribute. If set to None, use the value in the graph. :param detections: Override the max detections attribute. If set to None, use the value in the graph. """ def find_head_concat(name_scope): # This will find the concatenation node at the end of either Class Net or Box Net. These concatenation nodes # bring together prediction data for each of 5 scales. # The concatenated Class Net node will have shape [batch_size, num_anchors, num_classes], # and the concatenated Box Net node has the shape [batch_size, num_anchors, 4]. # These concatenation nodes can be be found by searching for all Concat's and checking if the node two # steps above in the graph has a name that begins with either "box_net/..." or "class_net/...". for node in [ node for node in self.graph.nodes if node.op == "Transpose" and name_scope in node.name ]: concat = self.graph.find_descendant_by_op(node, "Concat") assert concat and len(concat.inputs) == 5 log.info("Found {} node '{}' as the tip of {}".format( concat.op, concat.name, name_scope)) return concat def extract_anchors_tensor(split): # This will find the anchors that have been hardcoded somewhere within the ONNX graph. # The function will return a gs.Constant that can be directly used as an input to the NMS plugin. # The anchor tensor shape will be [1, num_anchors, 4]. Note that '1' is kept as first dim, regardless of # batch size, as it's not necessary to replicate the anchors for all images in the batch. # The anchors are available (one per coordinate) hardcoded as constants within certain box decoder nodes. # Each of these four constants have shape [1, num_anchors], so some numpy operations are used to expand the # dims and concatenate them as needed. # These constants can be found by starting from the Box Net's split operation , and for each coordinate, # walking down in the graph until either an Add or Mul node is found. The second input on this nodes will # be the anchor data required. def get_anchor_np(output_idx, op): node = self.graph.find_descendant_by_op( split.o(0, output_idx), op) assert node val = np.squeeze(node.inputs[1].values) return np.expand_dims(val.flatten(), axis=(0, 2)) anchors_y = get_anchor_np(0, "Add") anchors_x = get_anchor_np(1, "Add") anchors_h = get_anchor_np(2, "Mul") anchors_w = get_anchor_np(3, "Mul") anchors = np.concatenate( [anchors_y, anchors_x, anchors_h, anchors_w], axis=2) return gs.Constant(name="nms/anchors:0", values=anchors) self.infer() head_names = [] if self.api == "AutoML": head_names = ["class_net/", "box_net/"] if self.api == "TFOD": head_names = [ "/WeightSharedConvolutionalClassHead/", "/WeightSharedConvolutionalBoxHead/" ] # There are five nodes at the bottom of the graph that provide important connection points: # 1. Find the concat node at the end of the class net (multi-scale class predictor) class_net = find_head_concat(head_names[0]) class_net_tensor = class_net.outputs[0] # 2. Find the concat node at the end of the box net (multi-scale localization predictor) box_net = find_head_concat(head_names[1]) box_net_tensor = box_net.outputs[0] # 3. Find the split node that separates the box net coordinates and feeds them into the box decoder. box_net_split = self.graph.find_descendant_by_op(box_net, "Split") assert box_net_split and len(box_net_split.outputs) == 4 # 4. Find the concat node at the end of the box decoder. box_decoder = self.graph.find_descendant_by_op(box_net_split, "Concat") assert box_decoder and len(box_decoder.inputs) == 4 box_decoder_tensor = box_decoder.outputs[0] # 5. Find the NMS node. nms_node = self.graph.find_node_by_op("NonMaxSuppression") # Extract NMS Configuration num_detections = int( nms_node.inputs[2].values) if detections is None else detections iou_threshold = float(nms_node.inputs[3].values) score_threshold = float( nms_node.inputs[4].values) if threshold is None else threshold num_classes = class_net.i().inputs[1].values[-1] normalized = True if self.api == "TFOD" else False # NMS Inputs and Attributes # NMS expects these shapes for its input tensors: # box_net: [batch_size, number_boxes, 4] # class_net: [batch_size, number_boxes, number_classes] # anchors: [1, number_boxes, 4] (if used) nms_op = None nms_attrs = None nms_inputs = None if not self.legacy_plugins: # EfficientNMS TensorRT Plugin # Fusing the decoder will always be faster, so this is the default NMS method supported. In this case, # three inputs are given to the NMS TensorRT node: # - The box predictions (from the Box Net node found above) # - The class predictions (from the Class Net node found above) # - The default anchor coordinates (from the extracted anchor constants) # As the original tensors from EfficientDet will be used, the NMS code type is set to 1 (Center+Size), # because this is the internal box coding format used by the network. anchors_tensor = extract_anchors_tensor(box_net_split) nms_inputs = [box_net_tensor, class_net_tensor, anchors_tensor] nms_op = "EfficientNMS_TRT" nms_attrs = { 'plugin_version': "1", 'background_class': -1, 'max_output_boxes': num_detections, 'score_threshold': max(0.01, score_threshold ), # Keep threshold to at least 0.01 for better efficiency 'iou_threshold': iou_threshold, 'score_activation': True, 'box_coding': 1, } nms_output_classes_dtype = np.int32 else: # BatchedNMS TensorRT Plugin # Alternatively, the ONNX box decoder can be used. This will be slower, as more element-wise and non-fused # operations will need to be performed by TensorRT. However, it's easier to implement, so it is shown here # for reference. In this case, only two inputs are given to the NMS TensorRT node: # - The box predictions (already decoded through the ONNX Box Decoder node) # - The class predictions (from the Class Net node found above, but also needs to pass through a sigmoid) # This time, the box predictions will have the coordinate coding from the ONNX box decoder, which matches # what the BatchedNMS plugin uses. if self.api == "AutoML": # The default boxes tensor has shape [batch_size, number_boxes, 4]. This will insert a "1" dimension # in the second axis, to become [batch_size, number_boxes, 1, 4], the shape that BatchedNMS expects. box_decoder_tensor = self.graph.unsqueeze( "nms/box_net_reshape", box_decoder_tensor, axes=[2])[0] if self.api == "TFOD": # The default boxes tensor has shape [4, number_boxes]. This will transpose and insert a "1" dimension # in the 0 and 2 axes, to become [1, number_boxes, 1, 4], the shape that BatchedNMS expects. box_decoder_tensor = self.graph.transpose( "nms/box_decoder_transpose", box_decoder_tensor, perm=[1, 0]) box_decoder_tensor = self.graph.unsqueeze( "nms/box_decoder_reshape", box_decoder_tensor, axes=[0, 2])[0] # BatchedNMS also expects the classes tensor to be already activated, in the case of EfficientDet, this is # through a Sigmoid op. class_net_tensor = self.graph.sigmoid("nms/class_net_sigmoid", class_net_tensor)[0] nms_inputs = [box_decoder_tensor, class_net_tensor] nms_op = "BatchedNMS_TRT" nms_attrs = { 'plugin_version': "1", 'shareLocation': True, 'backgroundLabelId': -1, 'numClasses': num_classes, 'topK': 1024, 'keepTopK': num_detections, 'scoreThreshold': score_threshold, 'iouThreshold': iou_threshold, 'isNormalized': normalized, 'clipBoxes': False, # 'scoreBits': 10, # Some versions of the plugin may need this parameter. If so, uncomment this line. } nms_output_classes_dtype = np.float32 # NMS Outputs nms_output_num_detections = gs.Variable(name="num_detections", dtype=np.int32, shape=[self.batch_size, 1]) nms_output_boxes = gs.Variable( name="detection_boxes", dtype=np.float32, shape=[self.batch_size, num_detections, 4]) nms_output_scores = gs.Variable( name="detection_scores", dtype=np.float32, shape=[self.batch_size, num_detections]) nms_output_classes = gs.Variable( name="detection_classes", dtype=nms_output_classes_dtype, shape=[self.batch_size, num_detections]) nms_outputs = [ nms_output_num_detections, nms_output_boxes, nms_output_scores, nms_output_classes ] # Create the NMS Plugin node with the selected inputs. The outputs of the node will also become the final # outputs of the graph. self.graph.plugin(op=nms_op, name="nms/non_maximum_suppression", inputs=nms_inputs, outputs=nms_outputs, attrs=nms_attrs) log.info("Created NMS plugin '{}' with attributes: {}".format( nms_op, nms_attrs)) self.graph.outputs = nms_outputs self.infer()
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from collections import OrderedDict import numpy as np import onnx import onnx_graphsurgeon as gs tensor0 = gs.Variable(name="tensor0", dtype=np.float32, shape=['B', 3, 64, 64]) # 定义张量(变量) tensor1 = gs.Variable(name="tensor1", dtype=np.float32, shape=['B', 1, 64, 64]) tensor2 = gs.Variable(name="tensor2", dtype=np.float32, shape=None) # 可以不知道形状或者数据类型 tensor3 = gs.Variable(name="tensor3", dtype=np.float32, shape=None) constant0 = gs.Constant(name="constant0", values=np.ones(shape=[1, 3, 3, 3], dtype=np.float32)) # 定义张量(常量) constant1 = gs.Constant(name="constant1", values=np.ones(shape=[1], dtype=np.float32)) node0 = gs.Node(name="myConv", op="Conv", inputs=[tensor0, constant0], outputs=[tensor1]) # 定义节点,使用张量作为输入和输出
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import onnx_graphsurgeon as gs import numpy as np import onnx # Computes Y = x0 + (a * x1 + b) shape = (1, 3, 224, 224) # Inputs x0 = gs.Variable(name="x0", dtype=np.float32, shape=shape) x1 = gs.Variable(name="x1", dtype=np.float32, shape=shape) # Intermediate tensors a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32)) b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32)) mul_out = gs.Variable(name="mul_out") add_out = gs.Variable(name="add_out") # Outputs Y = gs.Variable(name="Y", dtype=np.float32, shape=shape) nodes = [ # mul_out = a * x1 gs.Node(op="Mul", inputs=[a, x1], outputs=[mul_out]), # add_out = mul_out + b
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import onnx_graphsurgeon as gs import numpy as np import onnx X = gs.Variable(name="X", dtype=np.float32, shape=(1, 3, 224, 224)) # Since W is a Constant, it will automatically be exported as an initializer W = gs.Constant(name="W", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32)) Y = gs.Variable(name="Y", dtype=np.float32, shape=(1, 5, 222, 222)) node = gs.Node(op="Conv", inputs=[X, W], outputs=[Y]) # Note that initializers do not necessarily have to be graph inputs graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y]) onnx.save(gs.export_onnx(graph), "test_conv.onnx")
nSlice = 0 graph.outputs = [] for node in graph.nodes: if node.op == 'Slice' and node.name == 'Slice_74': table512x256 = node.inputs[0].values[0] for i in range(1, 24, 2): factor256x256 = node.o(i).inputs[1].values tansposeNode = node.o(i).o().o() newTable = np.matmul(table512x256, factor256x256).transpose().reshape( 1, 4, 64, 512) constantData = gs.Constant("wiliConstant-" + str(nSlice), np.ascontiguousarray(newTable)) sliceV = gs.Variable(tansposeNode.outputs[0].name, np.dtype(np.float32), [1, 4, 64, 't4']) sliceN = gs.Node( "Slice", "wiliSliceN-" + str(nSlice), inputs=[ constantData, # data wiliConstant0, # start=0 graph.inputs[0], # end wiliConstant3, # axes=3 wiliConstant1, # step=1 ], outputs=[sliceV]) graph.nodes.append(sliceN) graph.outputs.append(sliceV) nSlice += 1 tansposeNode.outputs = []
@gs.Graph.register() def max(self, *args): return self.layer(op="Max", inputs=args, outputs=["max_out"])[0] @gs.Graph.register() def identity(self, inp): return self.layer(op="Identity", inputs=[inp], outputs=["identity_out"])[0] # Generate the graph graph = gs.Graph() graph.inputs = [gs.Variable("input", shape=(4, 4), dtype=np.float32)] # Clip values to [0, 6] MIN_VAL = np.array(0, np.float32) MAX_VAL = np.array(6, np.float32) # Add identity nodes to make the graph structure a bit more interesting inp = graph.identity(graph.inputs[0]) max_out = graph.max(graph.min(inp, MAX_VAL), MIN_VAL) graph.outputs = [ graph.identity(max_out), ] # Graph outputs must include dtype information graph.outputs[0].to_variable(dtype=np.float32, shape=(4, 4))
# Note that the same function can be defined in different ways for different opsets. # It will only be called if the Graph's opset matches one of the opsets for which the function is registered. # Hence, for the opset 11 graph used in this example, the following function will never be used. @gs.Graph.register(opsets=[1]) def relu(self, a): raise NotImplementedError("This function has not been implemented!") ########################################################################################################## # The functions registered above greatly simplify the process of building the graph itself. graph = gs.Graph(opset=11) # Generates a graph which computes: # output = ReLU((A * X^T) + B) (.) C + D X = gs.Variable(name="X", shape=(64, 64), dtype=np.float32) graph.inputs = [X] # axt = (A * X^T) # Note that we can use NumPy arrays directly (e.g. Tensor A), # instead of Constants. These will automatically be converted to Constants. A = np.ones(shape=(64, 64), dtype=np.float32) axt = graph.gemm(A, X, trans_b=True) # dense = ReLU(axt + B) B = np.ones((64, 64), dtype=np.float32) * 0.5 dense = graph.relu(*graph.add(*axt, B)) # output = dense (.) C + D # If a Tensor instance is provided (e.g. Tensor C), it will not be modified at all. # If you prefer to set the exact names of tensors in the graph, you should
def replace_combinedNMS(graph, top_k=1284, keep_top_k=100, num_classes=3, plugin_version="1"): """ Although, in principle, the value of top_k, keep_top_k, num_classes should be able to be inferred from the graph. Due to the limitation of the ONNX parser, we currently are not able to get these values. """ for node in graph.nodes: if node.name == "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/CombinedNonMaxSuppression": clip_boxes = node.attrs["clip_boxes"] tensor_map = graph.tensors() model_input_tensor = tensor_map["image_tensor:0"] input_boxes = tensor_map["Postprocessor/ExpandDims_1:0"] input_scores = tensor_map["Postprocessor/Slice:0"] output_boxes = tensor_map["detection_boxes:0"] output_scores = tensor_map["detection_scores:0"] output_boxes.name = "detection_boxes" output_scores.name = "detection_scores" batch_size = model_input_tensor.shape[0] iou_threshold = tensor_map[ "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/iou_threshold:0"].values.item( ) score_threshold = tensor_map[ "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/score_threshold:0"].values.item( ) output_classes = gs.Variable(name="detection_classes_nms:0", dtype=np.float32, shape=(batch_size, keep_top_k)) output_num_detections = gs.Variable(name="num_detections_nms:0", dtype=np.int32, shape=(batch_size, 1)) attributes_ordered_dict = { "shareLocation": True, "backgroundLabelId": -1, "numClasses": num_classes, "topK": top_k, "keepTopK": keep_top_k, "scoreThreshold": score_threshold, "iouThreshold": iou_threshold, "isNormalized": True, "clipBoxes": clip_boxes, "plugin_version": plugin_version } for node in graph.nodes: if node.name == "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/CombinedNonMaxSuppression": node.op = "BatchedNMS_TRT" node.attrs = attributes_ordered_dict node.inputs = [input_boxes, input_scores] node.outputs = [ output_num_detections, output_boxes, output_scores, output_classes ] for node in graph.nodes: if node.name == "add": for i, input_tensor in enumerate(node.inputs): if input_tensor.name == "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/CombinedNonMaxSuppression:2": input_id = i node.inputs[input_id] = output_classes if node.name == "Postprocessor/Cast_4": for i, input_tensor in enumerate(node.inputs): if input_tensor.name == "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/CombinedNonMaxSuppression:3": input_id = i node.inputs[input_id] = output_num_detections graph.nodes[-2].outputs[0].name = "detection_classes" graph.nodes[-1].outputs[0].name = "num_detections" tensor_map['detection_classes:0'].name = "detection_classes" tensor_map["num_detections:0"].name = "num_detections" graph.outputs[0].name = "detection_boxes" graph.outputs[1].name = "detection_classes" graph.outputs[2].name = "detection_scores" graph.outputs[3].name = "num_detections" return graph
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import numpy as np import onnx import onnx_graphsurgeon as gs tensor0 = gs.Variable(name="tensor0", dtype=np.float32, shape=['B', 3, 64, 64]) # 三个真正有用的张量 tensor1 = gs.Variable(name="tensor1", dtype=np.float32, shape=['B', 3, 64, 64]) tensor2 = gs.Variable(name="tensor2", dtype=np.float32, shape=['B', 3, 64, 64]) tensor3 = gs.Variable(name="tensor3", dtype=np.float32, shape=['B', 3, 64, 64]) # 一个假输入张量 tensor4 = gs.Variable(name="tensor4", dtype=np.float32, shape=['B', 1, 64, 64]) # 一个假输出张量 tensor5 = gs.Variable(name="tensor5", dtype=np.float32, shape=['B', 1, 64, 64]) # 两个无用张量 tensor6 = gs.Variable(name="tensor6", dtype=np.float32, shape=['B', 1, 64, 64]) tensor7 = gs.Variable(name="tensor7", dtype=np.float32, shape=None) # 中间结果张量 tensor8 = gs.Variable(name="tensor8", dtype=np.float32, shape=None) constant0 = gs.Constant(name="w", values=np.ones(shape=[1, 1, 1, 1], dtype=np.float32))