Exemplo n.º 1
0
def onnx_upsample(nodes,
                  layer_name,
                  input_node,
                  output_shape=None,
                  resize_scale_factors=2):
    attrs = {
        "coordinate_transformation_mode": 'asymmetric',
        "mode": 'nearest',
        "nearest_mode": 'floor',
    }
    layer_name = layer_name  # 不要和原来 的resize节点名重复
    scales = np.array([1.0, 1.0, resize_scale_factors,
                       resize_scale_factors]).astype(np.float32)
    scale_name = layer_name + ".scale"
    roi_name = layer_name + ".roi"
    scale = gs.Constant(scale_name, scales)
    roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32))
    inputs = [input_node, roi, scale]
    output_node = gs.Variable(layer_name, dtype=np.float32, shape=output_shape)
    node = gs.Node(op="Resize",
                   inputs=inputs,
                   outputs=[output_node],
                   attrs=attrs)

    nodes.append(node)

    return output_node
Exemplo n.º 2
0
 def add_fc(self):
     """
     add FC layer
     """
     logging.info("Adding FC layer")
     # fetch some attrs from old fc1000; note MatMul doesn't have bias
     old_fc_op = [_n for _n in self.graph.nodes if _n.name == "fc1000"][0]
     old_fc_kernel = old_fc_op.inputs[1]
     fc_kernel_weights = old_fc_kernel.values[:, 1:]
     # instantiate fc weight
     # NOTE: expects KM weight, if transpose is not set (default not set)
     fc_weight = gs.Constant("fc_replaced_weight", values=fc_kernel_weights)
     # find input to fc to be added
     squeeze_replaced_op = [
         _n for _n in self.graph.nodes if _n.name == "squeeze_replaced"
     ][0]
     squeeze_replaced_out = squeeze_replaced_op.outputs[0]
     # reshape input
     reshape_shape = np.array([-1, fc_kernel_weights.shape[0]],
                              dtype=np.int64)
     fc_reshape_shape = gs.Constant("fc_reshape_shape",
                                    values=reshape_shape)
     # add FC: Reshape=>MatMul
     fc_reshape_out = self.graph.Reshape("fc_reshape_input",
                                         squeeze_replaced_out,
                                         fc_reshape_shape)
     fc_out = self.graph.MatMul("fc_replaced", fc_reshape_out, fc_weight)
Exemplo n.º 3
0
def onnx_slice(nodes,
               layer_name,
               input_node,
               output_shape,
               start=(0, 0, 0, 0),
               shape=(2, 2, 3, 3),
               stride=(1, 1, 1, 1)):
    """
    x = torch.randn([8,8])
    x[:,2:4]

    onnx_slice(nodes,"slice",x,(0,2),(8,4),(1,1))
    """
    inputs = [input_node]

    inputs.extend([
        gs.Constant(layer_name + '_constant_start',
                    np.asarray(start, np.int32)),
        gs.Constant(layer_name + '_constant_shape',
                    np.asarray(shape, np.int32)),
        gs.Constant(layer_name + '_constant_axis',
                    np.arange(0, len(start)).astype(np.int32)),
        gs.Constant(layer_name + '_constant_stride',
                    np.asarray(stride, np.int32)),
    ])
    name = layer_name
    output_node = gs.Variable(name, np.float32, output_shape)

    node = gs.Node(op="Slice", inputs=inputs, outputs=[output_node])

    nodes.append(node)

    return output_node
Exemplo n.º 4
0
def fold_upsample_inputs(upsample, graph, opset=11):
    """
    Inplace transformation of the graph. The upsample subgraph is collapsed
    to single upsample node with input and scale factor (constant tensor).
    Args:
        upsample: upsample node in the original graph.
        graph: graph object.
    """

    if opset==9:
        # Gather the scale factor from mul op in the upsample input subgraph
        scale_factor = upsample.i(1).i(1).i(0).i(0).i(0).i(0).i(0).i(0).i(1).attrs['value'].values

        # Create the new scales tensor
        scales = np.array([1.0, 1.0, scale_factor, scale_factor], dtype=np.float32)
        scale_tensor = gs.Constant(name=upsample.inputs[-1].name, values=scales)

        # Change the last input to the node to the new constant scales tensor.
        upsample.inputs[-1] = scale_tensor
    else:
        # In opset 11, upsample layer is exported as Resize. We will transform this Resize layer into an Upsample layer
        # and collapse the input
        sizes_tensor_name = upsample.inputs[3].name

        # Create the new scales tensor
        scale_factor = upsample.i(3).i(1).i().i().i().i().i(0).i(1).attrs['value'].values
        scales = np.array([1.0, 1.0, scale_factor, scale_factor], dtype=np.float32)
        scale_tensor = gs.Constant(name=sizes_tensor_name, values=scales)

        # Rename the Resize op to upsample and add the data and scales as inputs to the upsample layer.
        input_tensor = upsample.inputs[0]
        upsample.inputs = [input_tensor, scale_tensor]
        upsample.op = 'Upsample'
Exemplo n.º 5
0
    def modeify_model2(cls, input_file="model.onnx", output_file="add.onnx"):
        """重新修改resize的实现
        """
        graph = gs.import_onnx(onnx.load(input_file))

        first_add = [node for node in graph.nodes
                     if node.op == "LeakyRelu"][0]  # 找到 LeakyRelu 的节点
        # first_add = [node for node in graph.nodes if node.name == "LeakyRelu_2"][0]  # 找到 LeakyRelu 的节点
        # first_add.inputs = [inp for inp in first_add.inputs]  # 找到其对应的输入
        # first_add.outputs = [inp for inp in first_add.outputs]  # 找到其对应的输出
        first_add.outputs.clear(
        )  # 必须执行,clear 删除掉输出的相关链接 ,但也导致 LeakyRelu 没有了输出,因此必须重新实现生成新的输出
        # graph.nodes.remove(first_add) # 删除整个节点

        second_add = [node for node in graph.nodes if node.op == "MaxPool"][0]
        # second_add = [node for node in graph.nodes if node.name == "MaxPool_32"][0]
        second_add.inputs.clear()  # 必须执行,clear 删除掉输入的相关链接,后面得重新指定其输入

        # 重新定义LeakyRelu层
        attrs = {"alpha": 0.1}
        lrelu = gs.Variable("new_lrelu", np.float32)
        node = gs.Node(op="LeakyRelu",
                       inputs=first_add.inputs,
                       outputs=[lrelu],
                       attrs=attrs)
        graph.nodes.append(node)

        # 重新定义resize层(实现upsample)
        attrs = {
            "coordinate_transformation_mode": 'asymmetric',
            "mode": 'nearest',
            "nearest_mode": 'floor',
        }
        layer_name = "new_resize"  # 不要和原来 的resize节点名重复
        scales = np.array([1.0, 1.0, 2, 2]).astype(np.float32)
        scale_name = layer_name + ".scale"
        roi_name = layer_name + ".roi"
        scale = gs.Constant(scale_name, scales)
        roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32))
        # inputs =first_add.outputs
        inputs = [lrelu]
        inputs.append(roi)
        inputs.append(scale)
        resize = gs.Variable(layer_name, dtype=np.float32)
        node = gs.Node(op="Resize",
                       inputs=inputs,
                       outputs=[resize],
                       attrs=attrs)
        graph.nodes.append(node)

        # 重新设置下一层的输入节点
        second_add.inputs = [resize]

        # 5. Remove unused nodes/tensors, and topologically sort the graph
        graph.cleanup().toposort()

        onnx.save(gs.export_onnx(graph), output_file)
Exemplo n.º 6
0
def modify(input: str, output: str, downsample_ratio: float = 0.25) -> None:
    print(f'\nonnx load: {input}')
    graph = gs.import_onnx(onnx.load(input))

    _print_graph(graph)

    # update node Resize_3: scales
    resize_3 = [n for n in graph.nodes if n.name == 'Resize_3'][0]
    print()
    print(resize_3)

    scales = gs.Constant(
        '388',
        np.asarray([1, 1, downsample_ratio, downsample_ratio],
                   dtype=np.float32))

    resize_3.inputs = [
        i if i.name != '388' else scales for i in resize_3.inputs
    ]
    print()
    print(resize_3)

    # remove input downsample_ratio
    graph.inputs = [i for i in graph.inputs if i.name != 'downsample_ratio']

    # remove node Concat_2
    concat_2 = [n for n in graph.nodes if n.name == 'Concat_2'][0]
    concat_2.outputs.clear()

    # remove unused nodes/tensors
    graph.cleanup()

    onnx.save(gs.export_onnx(graph), output)
Exemplo n.º 7
0
        def extract_anchors_tensor(split):
            # This will find the anchors that have been hardcoded somewhere within the ONNX graph.
            # The function will return a gs.Constant that can be directly used as an input to the NMS plugin.
            # The anchor tensor shape will be [1, num_anchors, 4]. Note that '1' is kept as first dim, regardless of
            # batch size, as it's not necessary to replicate the anchors for all images in the batch.

            # The anchors are available (one per coordinate) hardcoded as constants within certain box decoder nodes.
            # Each of these four constants have shape [1, num_anchors], so some numpy operations are used to expand the
            # dims and concatenate them as needed.

            # These constants can be found by starting from the Box Net's split operation , and for each coordinate,
            # walking down in the graph until either an Add or Mul node is found. The second input on this nodes will
            # be the anchor data required.
            def get_anchor_np(output_idx, op):
                node = self.graph.find_descendant_by_op(
                    split.o(0, output_idx), op)
                assert node
                val = np.squeeze(node.inputs[1].values)
                return np.expand_dims(val.flatten(), axis=(0, 2))

            anchors_y = get_anchor_np(0, "Add")
            anchors_x = get_anchor_np(1, "Add")
            anchors_h = get_anchor_np(2, "Mul")
            anchors_w = get_anchor_np(3, "Mul")
            anchors = np.concatenate(
                [anchors_y, anchors_x, anchors_h, anchors_w], axis=2)
            return gs.Constant(name="nms/anchors:0", values=anchors)
Exemplo n.º 8
0
def onnx_reshape(nodes, layer_name, input_node, output_shape, value):
    inputs = [input_node]
    inputs.append(
        gs.Constant(layer_name + '_constant', np.asarray(value, np.int64)))
    output_node = gs.Variable(layer_name, np.float32, output_shape)
    node = gs.Node(op="Reshape", inputs=inputs, outputs=[output_node])

    nodes.append(node)

    return output_node
    def change_node(self):
        print("change_node")
        for i in range(1, len(self.graph.nodes[self.idx_node].inputs)-1):
            del self.graph.nodes[self.idx_node].inputs[i]
        for removed_node in self.graph.nodes[self.idx_node-self.rdd_nodes:self.rdd_nodes]:
            removed_node.outputs.clear()
        pads_folded_tensor = gs.Constant(name = self.graph.nodes[self.idx_node].name, values= np.array(self.trt_pad_values))
        self.graph.nodes[self.idx_node].inputs[1] = pads_folded_tensor
        #self.graph.nodes[self.idx_node].attrs = self.attrs


        return self.graph 
Exemplo n.º 10
0
def fold_pad_inputs(node, graph):
    # Gather the amount of padding in each dimension from pytorch graph.
    pad_values_pyt = node.i(1).i(0).i(0).i(0).i(0).i(0).i(0).i(0).attrs['value'].values

    # Assumption a 4d input tensor
    onnx_pad_values = [0]*4*2 # 4d tensor and 2 sides padding for each dimension
    j=3
    for i in range(0, len(pad_values_pyt), 2):
        onnx_pad_values[j] = pad_values_pyt[i]
        onnx_pad_values[j+4] = pad_values_pyt[i+1]
        j-=1

    # Change the existing pad tensor to the new onnx_pad values tensor
    pads_folded_tensor = gs.Constant(name=node.inputs[1].name, values=np.array(onnx_pad_values))
    node.inputs[1] = pads_folded_tensor
Exemplo n.º 11
0
def run(nM,nK,nN):
    tensor0 = gs.Variable("tensor0", np.float32, [nM, 1])

    constant1xK = gs.Constant("constant1xK", np.ascontiguousarray(np.random.rand(1, nK).reshape(1, nK).astype(np.float32) * 2 - 1))
    constantKxN = gs.Constant("constantKxN", np.ascontiguousarray(np.random.rand(nK, nN).reshape(nK, nN).astype(np.float32) * 2 - 1))
    constantN = gs.Constant("constantN", np.ascontiguousarray(np.random.rand(nN).astype(np.float32) * 2 - 1))
    constantNxK = gs.Constant("constantNxK", np.ascontiguousarray(np.random.rand(nN, nK).reshape(nN, nK).astype(np.float32) * 2 - 1))
    constantK = gs.Constant("constantK", np.ascontiguousarray(np.random.rand(nK).astype(np.float32) * 2 - 1))
    constantM1 = gs.Constant("constantM1", np.ascontiguousarray(np.array([-1], dtype=np.int64)))

    graphNodeList = []

    tensor1 = gs.Variable("tensor1", np.float32, None)
    node1 = gs.Node("MatMul", "MMU1", inputs=[tensor0, constant1xK], outputs=[tensor1])
    graphNodeList.append(node1)

    tensorLoop = tensor1
    for i in range(nLoop):
        tensor2 = gs.Variable("tensor%d-1" % i, np.float32, None)
        node2 = gs.Node("MatMul", "MMU-" + str(i), inputs=[tensorLoop, constantKxN], outputs=[tensor2])
        graphNodeList.append(node2)

        tensor3 = gs.Variable("tensor%d-2" % i, dtype=np.float32, shape=None)
        node3 = gs.Node("Add", "AddU-" + str(i), inputs=[tensor2, constantN], outputs=[tensor3])
        graphNodeList.append(node3)

        tensor4 = gs.Variable("tensor%d-3" % i, dtype=np.float32, shape=None)
        node4 = gs.Node("Relu", "ReLUU-" + str(i), inputs=[tensor3], outputs=[tensor4])
        graphNodeList.append(node4)

        tensor5 = gs.Variable("tensor%d-4" % i, dtype=np.float32, shape=None)
        node5 = gs.Node("MatMul", "MMD-" + str(i), inputs=[tensor4, constantNxK], outputs=[tensor5])
        graphNodeList.append(node5)

        tensor6 = gs.Variable("tensor%d-5" % i, dtype=np.float32, shape=None)
        node6 = gs.Node("Add", "AddD-" + str(i), inputs=[tensor5, constantK], outputs=[tensor6])
        graphNodeList.append(node6)

        tensor7 = gs.Variable("tensor%d-6" % i, dtype=np.float32, shape=None)
        node7 = gs.Node("Relu", "ReLUD-" + str(i), inputs=[tensor6], outputs=[tensor7])
        graphNodeList.append(node7)

        tensorLoop = tensor7

    tensor8 = gs.Variable("tensor8", dtype=np.float32, shape=None)
    node8 = gs.Node("ReduceSum", "Reduce", inputs=[tensorLoop, constantM1], outputs=[tensor8], attrs=OrderedDict([('keepdims', 0)]))
    graphNodeList.append(node8)

    graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor8], opset=13)

    onnxFile = "model-%d-%d-%d.onnx"%(nM,nK,nN)
    onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile)
    print("Succeeded building %s!" % (onnxFile))

    os.system("trtexec --onnx=%s --useCudaGraph --noDataTransfers --fp16"%onnxFile)
Exemplo n.º 12
0
def make_constant_linear():
    DTYPE = np.float32
    SHAPE = (4, 4)

    graph = gs.Graph()

    X0 = graph.constant(gs.Constant("const", values=np.ones(SHAPE, dtype=DTYPE)))
    # Explicitly clear shape to trigger the failure condition in reduce
    X0.shape = None

    X1 = graph.identity(X0)
    X2 = graph.identity(X1)
    X2.dtype = DTYPE
    X2.shape = SHAPE

    graph.outputs = [X2]

    save(graph, "reducable_with_const.onnx")
Exemplo n.º 13
0
def fold_pad_inputs(node, graph):
    # Gather the amount of padding in each dimension from pytorch graph.
    #从pytroch图中收集每个维度的填充量
    pad_values_pyt = node.i(1).i(0).i(0).i(0).i(0).i(0).i(0).i(0).attrs['value'].values

    # Assumption a 4d input tensor
    #假设一个4维度的输入,同时每个维度填充2
    onnx_pad_values = [0]*4*2 # 4d tensor and 2 sides padding for each dimension
    j=3
    #创建一个步长是2的循环列表
    for i in range(0, len(pad_values_pyt), 2):
        #给相应的onnx_pad_values赋值
        onnx_pad_values[j] = pad_values_pyt[i]
        onnx_pad_values[j+4] = pad_values_pyt[i+1]
        #更新相应的索引
        j-=1

    # Change the existing pad tensor to the new onnx_pad values tensor
    #利用新的onnx_pad_values对现有的张量进行更新
    pads_folded_tensor = gs.Constant(name=node.inputs[1].name, values=np.array(onnx_pad_values))
    #重新指定相应的张量
    node.inputs[1] = pads_folded_tensor
Exemplo n.º 14
0
 def add_conv(self):
     """
     add Conv layer
     """
     logging.info("Adding Conv layer, instead of FC")
     # fetch some attrs from old fc1000; note MatMul doesn't have bias
     old_fc_op = [_n for _n in self.graph.nodes if _n.name == "fc1000"][0]
     old_fc_kernel = old_fc_op.inputs[1]
     # instantiate fc weight and attrs
     # NOTE: ONNX uses MCkHkW format
     fc_kernel_weights = old_fc_kernel.values.transpose()[1:, :].reshape(
         1000, 2048, 1, 1)
     fc_weight = gs.Constant("fc_replaced_weight", values=fc_kernel_weights)
     attrs = {"kernel_shape": [1, 1]}
     # find input to fc to be added
     squeeze_replaced_op = [
         _n for _n in self.graph.nodes if _n.name == "squeeze_replaced"
     ][0]
     squeeze_replaced_out = squeeze_replaced_op.outputs[0]
     # add FC: Conv
     fc_out = self.graph.Conv("fc_replaced", squeeze_replaced_out,
                              fc_weight, attrs)
Exemplo n.º 15
0
    def test_const_inp_but_non_foldable_nested_graph(self):
        cond = gs.Constant("cond", values=np.array(True))
        X = gs.Variable("X", dtype=np.float32, shape=(1, ))

        graph = Graph(inputs=[X])

        then_graph = Graph(name="Then")
        then_graph.outputs = [then_graph.add(X, X)]

        else_graph = Graph(name="Else")
        else_graph.outputs = [else_graph.add(X, else_graph.add(X, X))]

        # Even though if_op looks foldable because it has all constant inputs,
        # it's not, since its subgraphs depend on variables in the outer scope.
        graph.outputs = [graph.if_op(cond, then_graph, else_graph)]

        # This should not raise because the `If` node should be excluded from
        # constant folding.
        graph.fold_constants(error_ok=False).cleanup()

        assert graph.nodes[0].op == "If"
        assert len(then_graph.nodes) == 1
        assert len(else_graph.nodes) == 2
Exemplo n.º 16
0
    def test_with_nested_graph(self):
        cond = gs.Variable("cond", dtype=np.bool, shape=(1, ))

        X = gs.Variable("X", dtype=np.float32, shape=(1, ))
        Y = gs.Constant("Y", values=np.ones((1, ), dtype=np.float32))
        graph = Graph(inputs=[X, cond])

        then_graph = Graph(name="Then")
        then_graph.outputs = [then_graph.add(Y, Y)]

        else_graph = Graph(name="Else")
        else_graph.outputs = [else_graph.add(X, else_graph.add(Y, Y))]

        graph.outputs = [graph.if_op(cond, then_graph, else_graph)]

        graph.fold_constants()
        graph.cleanup()

        assert len(then_graph.nodes) == 0
        assert np.all(then_graph.outputs[0].values == (Y.values * 2))

        assert len(else_graph.nodes) == 1
        assert isinstance(else_graph.nodes[0].inputs[1], Constant)
        assert np.all(else_graph.nodes[0].inputs[1].values == (Y.values * 2))
Exemplo n.º 17
0
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import onnx_graphsurgeon as gs
import numpy as np
import onnx

X = gs.Variable(name="X", dtype=np.float32, shape=(1, 3, 224, 224))
# Since W is a Constant, it will automatically be exported as an initializer
W = gs.Constant(name="W", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32))

Y = gs.Variable(name="Y", dtype=np.float32, shape=(1, 5, 222, 222))

node = gs.Node(op="Conv", inputs=[X, W], outputs=[Y])

# Note that initializers do not necessarily have to be graph inputs
graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y])
onnx.save(gs.export_onnx(graph), "test_conv.onnx")
Exemplo n.º 18
0
        inputTensor.name = 'inputTensor'
        graph.inputs = [inputTensor]
        
        node.inputs[0] = constantData        

        for i in range(1, 24, 2):
            graph.outputs.append(node.o(i).o().o().outputs[0])  # Transpose
        continue

graph.cleanup()
onnx.save(gs.export_onnx(graph), onnxFile0)
'''

graph = gs.import_onnx(onnx.load(onnxFile0))

wiliConstant0 = gs.Constant(
    "wiliConstant0", np.ascontiguousarray(np.array([0], dtype=np.int64)))
wiliConstant1 = gs.Constant(
    "wiliConstant1", np.ascontiguousarray(np.array([1], dtype=np.int64)))
wiliConstant3 = gs.Constant(
    "wiliConstant3", np.ascontiguousarray(np.array([3], dtype=np.int64)))

nSlice = 0
graph.outputs = []
for node in graph.nodes:
    if node.op == 'Slice' and node.name == 'Slice_74':
        table512x256 = node.inputs[0].values[0]
        for i in range(1, 24, 2):
            factor256x256 = node.o(i).inputs[1].values
            tansposeNode = node.o(i).o().o()

            newTable = np.matmul(table512x256,
    verbose=True,
    operator_export_type=torch.onnx.OperatorExportTypes.ONNX_FALLTHROUGH,
    do_constant_folding=False)

import onnx_graphsurgeon as gs
import onnx
import numpy as np

graph = gs.import_onnx(onnx.load(src_onnx))

for node in graph.nodes:
    if node.op == 'Resize':
        # actually not used in this sample
        node_concat = node.i(2, 0)
        node_concat.i(0, 0).attrs['value'] = gs.Constant(
            '',
            np.concatenate((node_concat.i(0, 0).attrs['value'].values,
                            node_concat.i(1, 0).attrs['value'].values)))
        node.inputs[2] = node_concat.inputs[0]
        node_concat.outputs.clear()

    if node.op == 'Clip':
        node_cast0 = node.i(1, 0)
        node_cast1 = node.i(2, 0)
        #change data type to fp32
        node_cast0.i(0, 0).attrs['value'] = gs.Constant(
            '', np.asarray([-1.0], dtype=np.float32))
        node_cast1.i(0, 0).attrs['value'] = gs.Constant(
            '', np.asarray([1.0], dtype=np.float32))
        #skip cast
        node.inputs = [
            node.inputs[0], node_cast0.inputs[0], node_cast1.inputs[0]
Exemplo n.º 20
0
##########################################################################################################
# The functions registered above greatly simplify the process of building the graph itself.

graph = gs.Graph(opset=11)

# Generates a graph which computes:
# output = ReLU((A * X^T) + B) (.) C + D
X = gs.Variable(name="X", shape=(64, 64), dtype=np.float32)
graph.inputs = [X]

# axt = (A * X^T)
# Note that we can use NumPy arrays directly (e.g. Tensor A),
# instead of Constants. These will automatically be converted to Constants.
A = np.ones(shape=(64, 64), dtype=np.float32)
axt = graph.gemm(A, X, trans_b=True)

# dense = ReLU(axt + B)
B = np.ones((64, 64), dtype=np.float32) * 0.5
dense = graph.relu(*graph.add(*axt, B))

# output = dense (.) C + D
# If a Tensor instance is provided (e.g. Tensor C), it will not be modified at all.
# If you prefer to set the exact names of tensors in the graph, you should
# construct tensors manually instead of passing strings or NumPy arrays.
C = gs.Constant(name="C", values=np.ones(shape=(64, 64), dtype=np.float32))
D = np.ones(shape=(64, 64), dtype=np.float32)
graph.outputs = graph.add(*graph.mul(*dense, C), D)

onnx.save(gs.export_onnx(graph), "model.onnx")
Exemplo n.º 21
0
    def fuse_br2b_br2c(self):
        """
        Match and replace br2b+br2b with the fused plugin.
        This fusion is for Conv-Conv-Add-ReLU
        """
        logging.info("Fusing ops in br2b_br2c path")
        op_names_lists_to_be_fused = [
            [
                "res2b_branch2b", "res2b_branch2b_relu", "res2b_branch2c",
                "res2b", "res2b_relu"
            ],
            [
                "res2c_branch2b", "res2c_branch2b_relu", "res2c_branch2c",
                "res2c", "res2c_relu"
            ],
        ]
        for _idx, op_names_list in enumerate(op_names_lists_to_be_fused):
            # setup plugin info
            plugin_name = "RES2_BR2B_BR2C_{}".format(_idx + 1)

            # prep fusion: constants and attributes
            op_dict = dict()
            [
                op_dict.update({_n.name: _n}) for _n in self.graph.nodes
                if _n.name in op_names_list
            ]
            op_list = [op_dict[_n] for _n in op_names_list]
            assert len(op_names_list) == len(
                op_list), "Need to capture all op objects in op_names_list"

            scale64 = gs.Constant("scale64",
                                  values=np.ones((64), dtype=np.float32))
            scale256 = gs.Constant("scale256",
                                   values=np.ones((256), dtype=np.float32))
            from_shortcut = op_list[3].i(0, 0).outputs[0]\
                if op_list[3].i(1, 0).name == op_names_list[2] else\
                op_list[3].i(1, 0).outputs[0]

            # build array with dynamic ranges required for the fusion plugin
            # NOTE: order matters
            dyn_list = [
                self.dyn_range_map[from_shortcut.name],
                self.dyn_range_map[op_list[0].inputs[0].name],
                self.dyn_range_map[op_list[1].outputs[0].name],
                self.dyn_range_map[op_list[2].outputs[0].name],
                self.dyn_range_map[op_list[4].outputs[0].name],
            ]

            dynamic_ranges = np.array(dyn_list, dtype=np.float32)
            dyn_const = gs.Constant("{}_dynamic_ranges".format(plugin_name),
                                    values=dynamic_ranges)

            # this becomes attributes to ONNX node that fusion plugin uses
            # NOTE: order does not matter
            plugin_field_dict = {
                "c_br2b_w": op_list[0].inputs[1],
                "s_br2b_s": scale64,
                "s_br2b_b": op_list[0].inputs[2],
                "c_br2c_w": op_list[2].inputs[1],
                "s_br2c_s": scale256,
                "s_br2c_b": op_list[2].inputs[2],
                "dynamic_ranges": dyn_const,
            }

            attrs = {
                "plugin_version": "2",
                "plugin_namespace": "",
            }
            attrs.update(plugin_field_dict)

            # get plugin input/output
            plugin_inp = [from_shortcut, op_list[0].inputs[0]]
            plugin_out = [op_list[-1].outputs[0]]

            # replace ops with plugin
            self.graph.RES2PLUGIN("RnRes2Br2bBr2c_TRT", plugin_name,
                                  plugin_inp, plugin_out, attrs)

            # graph cleanup
            self.cleanup_graph()

            # done
            logging.info("Plugin {} successful".format(plugin_name))
Exemplo n.º 22
0
    def preprocess_onnx(self, model):
        """
        Manipulate original ONNX file with graphSurgeon: insert InstanceNormalization
        3D and PixelShuffle plugin, and export the new ONNX graph.
        """
        graph = gs.import_onnx(model)
        if self.use_instnorm3d_plugin:
            for node in graph.nodes:
                # Replace InstanceNormalization with INSTNORM3D_TRT plugin node
                if node.op == "InstanceNormalization":
                    node.op = "INSTNORM3D_TRT"
                    node.attrs["scales"] = node.inputs[1]
                    node.attrs["bias"] = node.inputs[2]
                    node.attrs["plugin_version"] = "1"
                    node.attrs["plugin_namespace"] = ""
                    node.attrs["relu"] = 0
                    node.attrs["alpha"] = 0.0
                    scales = node.attrs["scales"].values
                    biases = node.attrs["bias"].values
                    assert len(scales) == len(
                        biases
                    ), "Scales and biases do not have the same length!"
                    del node.inputs[2]
                    del node.inputs[1]

            # Set leaky-relu node attributes to INSTNORM3D plugin and remove leaky-relu nodes.
            nodes = [
                node for node in graph.nodes if node.op == "INSTNORM3D_TRT"
            ]
            for node in nodes:
                leaky_relu_node = node.o()
                attrs = leaky_relu_node.attrs
                node.attrs["relu"] = 1
                node.attrs["alpha"] = attrs["alpha"]
                node.outputs = leaky_relu_node.outputs
                leaky_relu_node.outputs.clear()

        if self.use_conv3d1x1x1k4_plugin:
            nodes = [
                node for node in graph.nodes if node.op == "INSTNORM3D_TRT"
            ]
            last_layer_node = nodes[-1].o()
            last_layer_node.op = "CONV3D1X1X1K4_TRT"
            weights = last_layer_node.inputs[1]
            weights_shape = weights.values.shape
            weights_c = weights_shape[1]
            weights_k = weights_shape[0]
            assert weights_shape == (
                4, 32, 1, 1, 1
            ), "The plugin only supports 1x1x1 convolution with c == 32 and k == 4"
            last_layer_node.attrs["inputChannels"] = weights_c
            last_layer_node.attrs["weights"] = weights
            last_layer_node.attrs["plugin_version"] = "1"
            last_layer_node.attrs["plugin_namespace"] = ""
            del last_layer_node.inputs[1]
            # add the identity layer, since the last layer is quantized
            identity_out = gs.Variable("output", dtype=np.float32)
            identity = gs.Node(op="Identity",
                               inputs=last_layer_node.outputs,
                               outputs=[identity_out])
            graph.nodes.append(identity)
            graph.outputs.append(identity_out)
            last_layer_node.outputs[0].name = "conv3d1x1x1k4_out"

        # Convert Deconv to Conv + PixelShuffle
        if self.use_conv_for_deconv:
            added_nodes = []
            input_d = graph.inputs[0].shape[2]
            input_h = graph.inputs[0].shape[3]
            input_w = graph.inputs[0].shape[4]

            # We start the conversion from the lowest dimension
            current_d = input_d // 32
            current_h = input_h // 32
            current_w = input_w // 32

            for (node_idx, node) in enumerate(graph.nodes):
                if node.op == "ConvTranspose":
                    name = node.name
                    node.op = "Conv"
                    assert node.attrs["kernel_shape"] == [
                        2, 2, 2
                    ], "The conversion only makes sense for 2x2x2 deconv"
                    node.attrs["kernel_shape"] = [1, 1, 1]
                    assert node.attrs["strides"] == [
                        2, 2, 2
                    ], "The conversion only makes sense for stride=2x2x2 deconv"
                    node.attrs["strides"] = [1, 1, 1]

                    # Transpose weights from cktrs to (ktrs)c111 or (trsk)c111
                    assert len(
                        node.inputs
                    ) == 2, "Bias not handled in deconv->conv conversion"
                    weights = node.inputs[1]
                    weights_shape = weights.values.shape
                    weights_c = weights_shape[0]
                    weights_k = weights_shape[1]
                    assert weights_shape[2:] == (
                        2, 2,
                        2), "The conversion only makes sense for 2x2x2 deconv"
                    weights_transpose_axes = (
                        1, 2, 3, 4,
                        0) if self.pixel_shuffle_cdwh else (2, 3, 4, 1, 0)
                    weights.values = weights.values.transpose(
                        weights_transpose_axes).reshape(
                            weights_k * 8, weights_c, 1, 1, 1)

                    deconv_output = node.outputs[0]
                    concat_node = graph.nodes[node_idx + 1]
                    assert concat_node.op == "Concat", "Cannot find the right Concat node"
                    if self.enable_pixelshuffle3d_plugin:
                        # Insert PixelShuffle
                        pixel_shuffle_output = gs.Variable(
                            name + "_pixelshuffle_plugin_out")
                        pixel_shuffle_node = gs.Node(
                            "PIXELSHUFFLE3D_TRT",
                            name + "_pixelshuffle_plugin", {}, [deconv_output],
                            [pixel_shuffle_output])
                        pixel_shuffle_node.op = "PIXELSHUFFLE3D_TRT"
                        pixel_shuffle_node.attrs["R"] = 2
                        pixel_shuffle_node.attrs["S"] = 2
                        pixel_shuffle_node.attrs["T"] = 2
                        pixel_shuffle_node.attrs["plugin_version"] = "1"
                        pixel_shuffle_node.attrs["plugin_namespace"] = ""
                        assert concat_node.inputs[
                            0] is deconv_output, "Wrong concat order"
                        if self.enable_pixelshuffle3d_plugin_concat_fuse:
                            pixel_shuffle_node.outputs = concat_node.outputs
                            pixel_shuffle_node.inputs.append(
                                concat_node.inputs[1])
                            concat_node.outputs.clear()
                        else:
                            concat_node.inputs[0] = pixel_shuffle_output
                        added_nodes.extend([pixel_shuffle_node])
                    else:
                        reshape1_shape = [0, weights_k, 2, 2, 2, current_d, current_h, current_w] if self.pixel_shuffle_cdwh else\
                                         [0, 2, 2, 2, weights_k, current_d, current_h, current_w]
                        shuffle_axes = [0, 1, 5, 2, 6, 3, 7, 4
                                        ] if self.pixel_shuffle_cdwh else [
                                            0, 4, 5, 1, 6, 2, 7, 3
                                        ]
                        current_d *= 2
                        current_h *= 2
                        current_w *= 2
                        reshape2_shape = [
                            0, weights_k, current_d, current_h, current_w
                        ]
                        reshape1_shape_const = gs.Constant(
                            name + "_pixelshuffle_reshape1_shape",
                            np.array(reshape1_shape, dtype=np.int32))
                        reshape2_shape_const = gs.Constant(
                            name + "_pixelshuffle_reshape2_shape",
                            np.array(reshape2_shape, dtype=np.int32))
                        reshape1_output = gs.Variable(
                            name + "_pixelshuffle_reshape1_out")
                        shuffle_output = gs.Variable(
                            name + "_pixelshuffle_shuffle_out")
                        reshape2_output = gs.Variable(
                            name + "_pixelshuffle_reshape2_out")
                        reshape1_node = gs.Node(
                            "Reshape", name + "_pixelshuffle_reshape1", {},
                            [deconv_output, reshape1_shape_const],
                            [reshape1_output])
                        shuffle_node = gs.Node(
                            "Transpose", name + "_pixelshuffle_transpose",
                            {"perm": shuffle_axes}, [reshape1_output],
                            [shuffle_output])
                        reshape2_node = gs.Node(
                            "Reshape", name + "_pixelshuffle_reshape2", {},
                            [shuffle_output, reshape2_shape_const],
                            [reshape2_output])
                        assert concat_node.inputs[
                            0] is deconv_output, "Wrong concat order"
                        concat_node.inputs[0] = reshape2_output
                        added_nodes.extend(
                            [reshape1_node, shuffle_node, reshape2_node])
            graph.nodes.extend(added_nodes)

        # Remove the four unnecessary outputs.
        graph.outputs = [
            output for output in graph.outputs if output.name == "output"
        ]

        # Remove dead nodes.
        graph.cleanup().toposort()

        # Add names to the layer after the graph is topsorted.
        uniq_num = 0
        for node in graph.nodes:
            if not node.name or node.name.isdigit():
                node.name = 'gs_{}_{}'.format(str(node.op), uniq_num)
                node.attrs['name'] = node.name
                uniq_num += 1
            for out_idx, out_tensor in enumerate(node.outputs):
                postfix = "_" + out_idx if len(node.outputs) > 1 else ""
                if not out_tensor.name or out_tensor.name.isdigit():
                    out_tensor.name = node.name + "__output" + postfix

        return gs.export_onnx(graph)
Exemplo n.º 23
0
import onnx_graphsurgeon as gs
import os
import tensorrt as trt

nLoop = 10
nC = 32
onnxFile0 = "model-0.onnx"
onnxFile1 = "model-1.onnx"

tensor0 = gs.Variable(name="tensor-0",
                      dtype=np.float32,
                      shape=['B', 1, 16, 16])

constant32x1 = gs.Constant(
    "constant32x1",
    np.ascontiguousarray(
        np.random.rand(nC, 1, 3, 3).reshape(nC, 1, 3, 3).astype(np.float32) *
        2 - 1))
constant32x32 = gs.Constant(
    "constant32x32",
    np.ascontiguousarray(
        np.random.rand(nC, nC, 3, 3).reshape(nC, nC, 3, 3).astype(np.float32) *
        2 - 1))
constant32 = gs.Constant(
    "constant32",
    np.ascontiguousarray(
        np.random.rand(1, nC, 1, 1).reshape(1, nC, 1, 1).astype(np.float32) *
        2 - 1))
constant32t = gs.Constant(
    "constant32t",
    np.ascontiguousarray(
Exemplo n.º 24
0
tensor0 = gs.Variable(name="tensor0", dtype=np.float32,
                      shape=['B', 3, 64, 64])  # 三个真正有用的张量
tensor1 = gs.Variable(name="tensor1", dtype=np.float32, shape=['B', 3, 64, 64])
tensor2 = gs.Variable(name="tensor2", dtype=np.float32, shape=['B', 3, 64, 64])
tensor3 = gs.Variable(name="tensor3", dtype=np.float32, shape=['B', 3, 64,
                                                               64])  # 一个假输入张量
tensor4 = gs.Variable(name="tensor4", dtype=np.float32, shape=['B', 1, 64,
                                                               64])  # 一个假输出张量
tensor5 = gs.Variable(name="tensor5", dtype=np.float32, shape=['B', 1, 64,
                                                               64])  # 两个无用张量
tensor6 = gs.Variable(name="tensor6", dtype=np.float32, shape=['B', 1, 64, 64])
tensor7 = gs.Variable(name="tensor7", dtype=np.float32, shape=None)  # 中间结果张量
tensor8 = gs.Variable(name="tensor8", dtype=np.float32, shape=None)

constant0 = gs.Constant(name="w",
                        values=np.ones(shape=[1, 1, 1, 1], dtype=np.float32))

node0 = gs.Node(name="myAdd0",
                op="Add",
                inputs=[constant0, constant0],
                outputs=[tensor7])
node1 = gs.Node(name="myAdd1",
                op="Add",
                inputs=[tensor7, constant0],
                outputs=[tensor8])
node2 = gs.Node(name="myAdd2",
                op="Add",
                inputs=[tensor0, tensor8],
                outputs=[tensor1])  # 有效节点
node3 = gs.Node(name="myAdd3",
                op="Add",
#

from collections import OrderedDict
import numpy as np
import onnx
import onnx_graphsurgeon as gs

tensor0 = gs.Variable(name="tensor0", dtype=np.float32, shape=['B', 3, 64,
                                                               64])  # 定义张量(变量)
tensor1 = gs.Variable(name="tensor1", dtype=np.float32, shape=['B', 1, 64, 64])
tensor2 = gs.Variable(name="tensor2", dtype=np.float32,
                      shape=None)  # 可以不知道形状或者数据类型
tensor3 = gs.Variable(name="tensor3", dtype=np.float32, shape=None)

constant0 = gs.Constant(name="constant0",
                        values=np.ones(shape=[1, 3, 3, 3],
                                       dtype=np.float32))  # 定义张量(常量)
constant1 = gs.Constant(name="constant1",
                        values=np.ones(shape=[1], dtype=np.float32))

node0 = gs.Node(name="myConv",
                op="Conv",
                inputs=[tensor0, constant0],
                outputs=[tensor1])  # 定义节点,使用张量作为输入和输出
node0.attrs = OrderedDict([
    ['dilations', [1, 1]],
    ['kernel_shape', [3, 3]],
    ['pads', [1, 1, 1, 1]],
    ['strides', [1, 1]],
])  # 节点的属性参数
Exemplo n.º 26
0
        node_concat = node.i(2, 0)
        
        values = []
        for i in range(len(node_concat.inputs)):
            c = node_concat.i(i, 0)
            # print(c)
            while c.op != 'Constant':
                c = c.i(0, 0)
            values.append(c.attrs['value'].values)
    
        #以下是不可靠的写法(不可靠地假定了0号父亲是Constant)
        #node_concat.i(0, 0).attrs['value'] = gs.Constant('', np.concatenate(values))
        #node.inputs[2] = node_concat.inputs[0]

        #以下是更可靠的写法
        node_constant = gs.Node(op="Constant", name=node_concat.name, attrs={'value':gs.Constant('', np.concatenate(values))})
        node_constant.outputs = node_concat.outputs[:]
        graph.nodes.append(node_constant)
        
        node_concat.outputs.clear()

    if node.op == 'Unsqueeze' and node.i(0, 0).op == 'Constant' and node.i(0, 0).attrs['value'].dtype == np.float64:
        node.i(0, 0).attrs['value'] = gs.Constant('', np.asarray([node.i(0, 0).attrs['value'].values], dtype=np.float32))
        
    if node.op == 'Clip':
        node_cast0 = node.i(1, 0)
        node_cast1 = node.i(2, 0)
        #change data type to fp32
        node_cast0.i(0, 0).attrs['value'] = gs.Constant('', np.asarray([-1.0], dtype=np.float32))
        node_cast1.i(0, 0).attrs['value'] = gs.Constant('', np.asarray([1.0], dtype=np.float32))
        #skip cast
Exemplo n.º 27
0
    def fuse_res2_mega(self):
        """
        Search and replace all the res2 layers with the res2 megakernel plugin.
        This fusion is for mega fusion of entire res2a_*
        """
        logging.info("Fusing ops in res2_mega")
        op_names_list = [
            "res2a_branch1",
            "res2a_branch2a",
            "res2a_branch2a_relu",
            "res2a_branch2b",
            "res2a_branch2b_relu",
            "res2a_branch2c",
            "res2a",
            "res2a_relu",
            "res2b_branch2a",
            "res2b_branch2a_relu",
            "res2b_branch2b",
            "res2b_branch2b_relu",
            "res2b_branch2c",
            "res2b",
            "res2b_relu",
            "res2c_branch2a",
            "res2c_branch2a_relu",
            "res2c_branch2b",
            "res2c_branch2b_relu",
            "res2c_branch2c",
            "res2c",
            "res2c_relu",
        ]

        # setup plugin info
        plugin_name = "RES2_FULL_FUSION"

        # prep fusion: constants and attributes
        op_dict = dict()
        [
            op_dict.update({_n.name: _n}) for _n in self.graph.nodes
            if _n.name in op_names_list
        ]
        op_list = [op_dict[_n] for _n in op_names_list]
        assert len(op_names_list) == len(
            op_list), "Need to capture all op objects in op_names_list"

        plugin_inp = [op_list[0].inputs[0]]
        plugin_out = [op_list[-1].outputs[0]]

        scale64 = gs.Constant("scale64",
                              values=np.ones((64), dtype=np.float32))
        scale256 = gs.Constant("scale256",
                               values=np.ones((256), dtype=np.float32))
        rescale = gs.Constant("rescale",
                              values=np.ones((256), dtype=np.float32))

        # build array with dynamic ranges required for the fusion plugin
        # NOTE: order matters
        dyn_list = [
            self.dyn_range_map[plugin_inp[0].name],
            self.dyn_range_map[op_list[0].outputs[0].name],
            self.dyn_range_map[op_list[2].outputs[0].name],
            self.dyn_range_map[op_list[4].outputs[0].name],
            self.dyn_range_map[op_list[5].outputs[0].name],
            self.dyn_range_map[op_list[7].outputs[0].name],
            self.dyn_range_map[op_list[9].outputs[0].name],
            self.dyn_range_map[op_list[11].outputs[0].name],
            self.dyn_range_map[op_list[12].outputs[0].name],
            self.dyn_range_map[op_list[14].outputs[0].name],
            self.dyn_range_map[op_list[16].outputs[0].name],
            self.dyn_range_map[op_list[18].outputs[0].name],
            self.dyn_range_map[op_list[19].outputs[0].name],
            self.dyn_range_map[op_list[21].outputs[0].name],
        ]

        dynamic_ranges = np.array(dyn_list, dtype=np.float32)
        dyn_const = gs.Constant("{}_dynamic_ranges".format(plugin_name),
                                values=dynamic_ranges)

        # this becomes attributes to ONNX node that fusion plugin uses
        # NOTE: order does not matter
        plugin_field_dict = {
            "c_res2a_br1_w": op_list[0].inputs[1],
            "s_res2a_br1_s": scale256,
            "s_res2a_br1_b": op_list[0].inputs[2],
            "c_res2a_br2a_w": op_list[1].inputs[1],
            "s_res2a_br2a_s": scale64,
            "s_res2a_br2a_b": op_list[1].inputs[2],
            "c_res2a_br2b_w": op_list[3].inputs[1],
            "s_res2a_br2b_s": scale64,
            "s_res2a_br2b_b": op_list[3].inputs[2],
            "c_res2a_br2c_w": op_list[5].inputs[1],
            "s_res2a_br2c_s": scale256,
            "s_res2a_br2c_b": op_list[5].inputs[2],
            "c_res2b_br2a_w": op_list[8].inputs[1],
            "s_res2b_br2a_s": scale64,
            "s_res2b_br2a_b": op_list[8].inputs[2],
            "c_res2b_br2b_w": op_list[10].inputs[1],
            "s_res2b_br2b_s": scale64,
            "s_res2b_br2b_b": op_list[10].inputs[2],
            "c_res2b_br2c_w": op_list[12].inputs[1],
            "s_res2b_br2c_s": scale256,
            "s_res2b_br2c_b": op_list[12].inputs[2],
            "c_res2c_br2a_w": op_list[15].inputs[1],
            "s_res2c_br2a_s": scale64,
            "s_res2c_br2a_b": op_list[15].inputs[2],
            "c_res2c_br2b_w": op_list[17].inputs[1],
            "s_res2c_br2b_s": scale64,
            "s_res2c_br2b_b": op_list[17].inputs[2],
            "c_res2c_br2c_w": op_list[19].inputs[1],
            "s_res2c_br2c_s": scale256,
            "s_res2c_br2c_b": op_list[19].inputs[2],
            "r_res2a_br2c_r": rescale,
            "r_res2b_br2c_r": rescale,
            "r_res2c_br2c_r": rescale,
            "dynamic_ranges": dyn_const,
        }

        attrs = {
            "plugin_version": "1",
            "plugin_namespace": "",
        }
        attrs.update(plugin_field_dict)

        # replace ops with plugin
        self.graph.RES2PLUGIN("RnRes2FullFusion_TRT", plugin_name, plugin_inp,
                              plugin_out, attrs)

        # graph cleanup
        self.cleanup_graph()

        # done
        logging.info("Plugin {} successful".format(plugin_name))
Exemplo n.º 28
0
node0 = gs.Node(name="myIdentity0",
                op="Identity",
                inputs=[tensor0],
                outputs=[tensor1])
node1 = gs.Node(name="myIdentity1",
                op="Identity",
                inputs=[tensor1],
                outputs=[tensor2])

graph = gs.Graph(nodes=[node0, node1], inputs=[tensor0], outputs=[tensor2])
graph.cleanup().toposort()
onnx.save(gs.export_onnx(graph), "model-02-01.onnx")

for node in graph.nodes:
    if node.op == 'Identity' and node.name == 'myIdentity0':  # 遍历计算图找到需要添加节点的位置
        constant0 = gs.Constant(name="constant0",
                                values=np.ones(shape=[1, 1, 1, 1],
                                               dtype=np.float32))  # 构造新节点和新张量
        tensor3 = gs.Variable(name="tensor3", dtype=np.float32, shape=None)
        newNode = gs.Node(name="myAdd",
                          op="Add",
                          inputs=[node.outputs[0], constant0],
                          outputs=[tensor3])

        graph.nodes.append(newNode)  # 记得把新节点加入计算图中
        index = node.o().inputs.index(node.outputs[0])  # 小心地找到下一个节点中对应输入张量的位置
        node.o().inputs[index] = tensor3  # 替换为新张量

graph.cleanup().toposort()
onnx.save(gs.export_onnx(graph), "model-02-02.onnx")
Exemplo n.º 29
0
# See the License for the specific language governing permissions and
# limitations under the License.
#

import onnx_graphsurgeon as gs
import numpy as np
import onnx

# Computes outputs = input + ((a + b) + d)

shape = (1, 3)
# Inputs
input = gs.Variable("input", shape=shape, dtype=np.float32)

# Intermediate tensors
a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32))
b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32))
c = gs.Variable("c")
d = gs.Constant("d", values=np.ones(shape=shape, dtype=np.float32))
e = gs.Variable("e")

# Outputs
output = gs.Variable("output", shape=shape, dtype=np.float32)

nodes = [
    # c = (a + b)
    gs.Node("Add", inputs=[a, b], outputs=[c]),
    # e = (c + d)
    gs.Node("Add", inputs=[c, d], outputs=[e]),
    # output = input + e
    gs.Node("Add", inputs=[input, e], outputs=[output]),
Exemplo n.º 30
0
    def fuse_br1_br2c(self):
        """
        Match and replace br1+br2c with the fused plugin.
        This fusion is for Conv(shortcut)-Add-ReLU
        """
        logging.info("Fusing ops in br1_br2c path")
        op_names_list = [
            "res2a_branch1", "res2a_branch2c", "res2a", "res2a_relu"
        ]

        # setup plugin info
        plugin_name = "RES2_BR1_BR2C_1"

        # prep fusion: constants and attributes
        op_dict = dict()
        [
            op_dict.update({_n.name: _n}) for _n in self.graph.nodes
            if _n.name in op_names_list
        ]
        op_list = [op_dict[_n] for _n in op_names_list]
        assert len(op_names_list) == len(
            op_list), "Need to capture all op objects in op_names_list"

        scale = gs.Constant("scale", values=np.ones((256), dtype=np.float32))

        # build array with dynamic ranges required for the fusion plugin
        # NOTE: order matters
        dyn_list = [
            self.dyn_range_map[op_list[0].inputs[0].name],
            self.dyn_range_map[op_list[0].outputs[0].name],
            self.dyn_range_map[op_list[1].inputs[0].name],
            self.dyn_range_map[op_list[1].outputs[0].name],
            self.dyn_range_map[op_list[3].outputs[0].name],
        ]
        dynamic_ranges = np.array(dyn_list, dtype=np.float32)
        dyn_const = gs.Constant("{}_dynamic_ranges".format(plugin_name),
                                values=dynamic_ranges)

        # this becomes attributes to ONNX node that fusion plugin uses
        # NOTE: order does not matter
        plugin_field_dict = {
            "c_br1_w": op_list[0].inputs[1],
            "s_br1_s": scale,
            "s_br1_b": op_list[0].inputs[2],
            "c_br2c_w": op_list[1].inputs[1],
            "s_br2c_s": scale,
            "s_br2c_b": op_list[1].inputs[2],
            "dynamic_ranges": dyn_const,
        }

        attrs = {
            "plugin_version": "2",
            "plugin_namespace": "",
        }
        attrs.update(plugin_field_dict)

        # get plugin input/output
        plugin_inp = [op_list[0].inputs[0], op_list[1].inputs[0]]
        plugin_out = [op_list[-1].outputs[0]]

        # replace ops with plugin
        self.graph.RES2PLUGIN("RnRes2Br1Br2c_TRT", plugin_name, plugin_inp,
                              plugin_out, attrs)

        # graph cleanup
        self.cleanup_graph()

        # done
        logging.info("Plugin {} successful".format(plugin_name))