Exemplo n.º 1
0
 def compute(self, input_shape, kernel_shape, stride, padding, dilation,
             layer_name):
     hw = []
     for i in range(2):
         out = math.ceil((input_shape[i] + 2 * padding[i] -
                          kernel_shape[i]) / stride[i] + 1)
         hw.append(out)
         print(out)
     return trt.DimsHW(hw)
Exemplo n.º 2
0
    def add_pooling_func(self,
                         name,
                         input_tensor_name,
                         pooling_type,
                         padding_type,
                         kernel_size,
                         strides,
                         blend_factor=None):
        """
        Similar to
        https://github.com/onnx/onnx-tensorrt/blob/6.0-full-dims/onnx2trt_utils.cpp#L1002

        :param name:
        :param input_tensor_name:
        :param pooling_type:
        :param padding_type:
        :param kernel_size:
        :param strides:
        :param blend_factor: optional
        :return:
        """

        input_tensor = self.get_layer_output(input_tensor_name)

        # check for valid padding in pooling layers
        assert padding_type in [
            "VALID", "SAME"
        ], "Pooling only supports valid or same padding"

        # 2D windows size
        window_size = trt.DimsHW(kernel_size[-2:])

        # create layer
        pooling_layer = self._network.add_pooling(input=input_tensor,
                                                  type=pooling_type,
                                                  window_size=window_size)
        pooling_layer.stride = trt.DimsHW(strides[-2:])
        pooling_layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_DOWN if padding_type == "VALID" else trt.PaddingMode.SAME_UPPER
        if blend_factor:
            pooling_layer.blend_factor = blend_factor

        self._remember_op_and_output(pooling_layer, name)
        return pooling_layer
Exemplo n.º 3
0
    def add_padding(self, name, input_tensor_name, padding_name):
        """
        Similar to
        https://github.com/onnx/onnx-tensorrt/blob/6.0-full-dims/builtin_op_importers.cpp#L1321

        :param name:
        :param input_tensor_name:
        :param padding_name:
        :return:
        """

        input_tensor = self.get_layer_output(input_tensor_name)
        pad = self.get_layer_weights(padding_name)

        # just use the last two padding dims in a channel first setup to get width/height paddings
        pre_padding = trt.DimsHW(pad[-2:, 0])
        post_padding = trt.DimsHW(pad[-2:, 1])

        # create layer
        pad_layer = self._network.add_padding(input=input_tensor,
                                              pre_padding=pre_padding,
                                              post_padding=post_padding)
        return self._remember_op_and_output(pad_layer, name)
Exemplo n.º 4
0
def add_se_layer(network, weight_map, input, c, w, lname):
    h = w
    l1 = network.add_pooling(input=input,
                             type=trt.PoolingType.AVERAGE,
                             window_size=trt.DimsHW(w, h))
    assert l1
    l1.stride_nd = (w, h)

    l2 = network.add_fully_connected(input=l1.get_output(0),
                                     num_outputs=BS * c // 4,
                                     kernel=weight_map[lname + "fc.0.weight"],
                                     bias=weight_map[lname + "fc.0.bias"])
    relu1 = network.add_activation(l2.get_output(0),
                                   type=trt.ActivationType.RELU)
    l4 = network.add_fully_connected(input=relu1.get_output(0),
                                     num_outputs=BS * c,
                                     kernel=weight_map[lname + "fc.2.weight"],
                                     bias=weight_map[lname + "fc.2.bias"])

    se = add_h_swish(network, l4.get_output(0))

    return se
Exemplo n.º 5
0
def add_transition(network, input, weight_map, outch, lname):
    bn1 = add_batch_norm_2d(network, weight_map, input, lname + ".norm")

    relu1 = network.add_activation(bn1.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu1

    conv1 = network.add_convolution(input=relu1.get_output(0),
                                    num_output_maps=outch,
                                    kernel_shape=(1, 1),
                                    kernel=weight_map[lname + ".conv.weight"],
                                    bias=trt.Weights())
    assert conv1
    conv1.stride = (1, 1)

    pool1 = network.add_pooling(input=conv1.get_output(0),
                                type=trt.PoolingType.AVERAGE,
                                window_size=trt.DimsHW(2, 2))
    assert pool1
    pool1.stride_nd = (2, 2)
    pool1.padding_nd = (0, 0)

    return pool1
Exemplo n.º 6
0
def create_engine(maxBatchSize, builder, dt, weights):
    weight_map = load_weights(weights)
    network = builder.create_network()

    data = network.add_input(INPUT_BLOB_NAME, dt,
                             (NUM_SEGMENTS, 3, INPUT_H, INPUT_W))
    assert data

    conv1 = network.add_convolution(input=data,
                                    num_output_maps=64,
                                    kernel_shape=(7, 7),
                                    kernel=weight_map["conv1.weight"],
                                    bias=trt.Weights())
    assert conv1
    conv1.stride = (2, 2)
    conv1.padding = (3, 3)

    bn1 = add_batch_norm_2d(network, weight_map, conv1.get_output(0), "bn1",
                            EPS)
    assert bn1

    relu1 = network.add_activation(bn1.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu1

    pool1 = network.add_pooling(input=relu1.get_output(0),
                                window_size=trt.DimsHW(3, 3),
                                type=trt.PoolingType.MAX)
    assert pool1
    pool1.stride = (2, 2)
    pool1.padding = (1, 1)

    cur_height = INPUT_H // 4
    cur_width = INPUT_W // 4
    x = bottleneck(network, weight_map, pool1.get_output(0), 64, 64, 1,
                   "layer1.0.", (NUM_SEGMENTS, 64, cur_height, cur_width))
    x = bottleneck(network, weight_map, x.get_output(0), 256, 64, 1,
                   "layer1.1.", (NUM_SEGMENTS, 256, cur_height, cur_width))
    x = bottleneck(network, weight_map, x.get_output(0), 256, 64, 1,
                   "layer1.2.", (NUM_SEGMENTS, 256, cur_height, cur_width))

    x = bottleneck(network, weight_map, x.get_output(0), 256, 128, 2,
                   "layer2.0.", (NUM_SEGMENTS, 256, cur_height, cur_width))
    cur_height = INPUT_H // 8
    cur_width = INPUT_W // 8
    x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1,
                   "layer2.1.", (NUM_SEGMENTS, 512, cur_height, cur_width))
    x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1,
                   "layer2.2.", (NUM_SEGMENTS, 512, cur_height, cur_width))
    x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1,
                   "layer2.3.", (NUM_SEGMENTS, 512, cur_height, cur_width))
    x = bottleneck(network, weight_map, x.get_output(0), 512, 256, 2,
                   "layer3.0.", (NUM_SEGMENTS, 512, cur_height, cur_width))
    cur_height = INPUT_H // 16
    cur_width = INPUT_W // 16
    x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1,
                   "layer3.1.", (NUM_SEGMENTS, 1024, cur_height, cur_width))
    x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1,
                   "layer3.2.", (NUM_SEGMENTS, 1024, cur_height, cur_width))
    x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1,
                   "layer3.3.", (NUM_SEGMENTS, 1024, cur_height, cur_width))
    x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1,
                   "layer3.4.", (NUM_SEGMENTS, 1024, cur_height, cur_width))
    x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1,
                   "layer3.5.", (NUM_SEGMENTS, 1024, cur_height, cur_width))

    x = bottleneck(network, weight_map, x.get_output(0), 1024, 512, 2,
                   "layer4.0.", (NUM_SEGMENTS, 1024, cur_height, cur_width))
    cur_height = INPUT_H // 32
    cur_width = INPUT_W // 32
    x = bottleneck(network, weight_map, x.get_output(0), 2048, 512, 1,
                   "layer4.1.", (NUM_SEGMENTS, 2048, cur_height, cur_width))
    x = bottleneck(network, weight_map, x.get_output(0), 2048, 512, 1,
                   "layer4.2.", (NUM_SEGMENTS, 2048, cur_height, cur_width))

    pool2 = network.add_pooling(x.get_output(0),
                                window_size=trt.DimsHW(cur_height, cur_width),
                                type=trt.PoolingType.AVERAGE)
    assert pool2
    pool2.stride = (1, 1)

    fc1 = network.add_fully_connected(input=pool2.get_output(0),
                                      num_outputs=OUTPUT_SIZE,
                                      kernel=weight_map['fc.weight'],
                                      bias=weight_map['fc.bias'])
    assert fc1

    reshape = network.add_shuffle(fc1.get_output(0))
    assert reshape
    reshape.reshape_dims = (NUM_SEGMENTS, OUTPUT_SIZE)

    reduce = network.add_reduce(reshape.get_output(0),
                                op=trt.ReduceOperation.AVG,
                                axes=1,
                                keep_dims=False)
    assert reduce

    softmax = network.add_softmax(reduce.get_output(0))
    assert softmax
    softmax.axes = 1

    softmax.get_output(0).name = OUTPUT_BLOB_NAME
    network.mark_output(softmax.get_output(0))

    # Build engine
    builder.max_batch_size = maxBatchSize
    builder.max_workspace_size = 1 << 20
    engine = builder.build_cuda_engine(network)

    del network
    del weight_map

    return engine
Exemplo n.º 7
0
def cola_output(prefix, config, init_dict, network, input_tensor):
    """
    Create the CoLA output
    """
    print(input_tensor.shape)
    idims = input_tensor.shape
    assert len(idims) == 5
    B, S, hidden_size, _, _ = idims

    # add shuffle layer for reshaping and permutation
    shuffle = network.add_shuffle(input_tensor)
    shuffle.first_transpose = (0, 2, 1, 3, 4)
    shuffle.reshape_dims = (B, hidden_size, S, 1)
    input_tensor = shuffle.get_output(0)
    print(input_tensor.shape)

    # add convolution layers
    conv_outputs = []
    bag = []  # to make sure conv_w and conv_b won't be released by python
    for i in range(3):
        # add conv
        kernel_size = trt.DimsHW(3 + i, 1)
        conv_w = init_dict['conv{}_kernel'.format(i)]
        conv_b = init_dict['conv{}_biases'.format(i)]
        print(input_tensor.shape, kernel_size, conv_w.size)
        conv = network.add_convolution(input=input_tensor,
                                       num_output_maps=100,
                                       kernel_shape=kernel_size,
                                       kernel=conv_w,
                                       bias=conv_b)
        conv.stride = (1, 1)
        conv.padding_mode = trt.PaddingMode.SAME_LOWER
        set_layer_name(conv, prefix, "conv{}".format(i))
        bag += [conv_w, conv_b]
        print("conv output shape: ", conv.get_output(0).shape)
        # add relu
        relu = network.add_activation(input=conv.get_output(0),
                                      type=trt.ActivationType.RELU)
        set_layer_name(relu, prefix, "relu{}".format(i))
        # add pooling
        pooling = network.add_pooling(input=relu.get_output(0),
                                      type=trt.PoolingType.MAX,
                                      window_size=(8, 1))
        pooling.stride = (1, 1)
        set_layer_name(pooling, prefix, "pooling{}".format(i))
        print("Pooling output shape", pooling.get_output(0).shape)
        # add flatten
        #         flatten = network.add_reduce(
        #             input=pooling.get_output(0),
        #             op=trt.tensorrt.ReduceOperation.SUM,
        #             axes=1, # first non-batch dimension
        #             keep_dims=False
        #         )
        #         set_layer_name(flatten, prefix, "flatten{}".format(i))

        # for concat
        conv_outputs.append(pooling.get_output(0))
    concat = network.add_concatenation(inputs=conv_outputs)
    print("Concat output shape:", concat.get_output(0).shape)
    set_layer_name(concat, prefix, "concat")
    # fc layer
    dense = network.add_fully_connected(concat.get_output(0), 22,
                                        init_dict['fc0_weights'].numpy(),
                                        init_dict['fc0_biases'].numpy())
    set_layer_name(dense, prefix, "dense")
    print("fc layer output shape: ", dense.get_output(0).shape)
    # softmax layer
    softmax = network.add_softmax(input=dense.get_output(0))
    print("softmax layer output shape: ", softmax.get_output(0).shape)
    return softmax
Exemplo n.º 8
0
def createLenetEngine(maxBatchSize, builder, config, dt):
    weight_map = load_weights(WEIGHT_PATH)
    network = builder.create_network()

    data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W))
    assert data

    conv1 = network.add_convolution(input=data,
                                    num_output_maps=64,
                                    kernel_shape=(7, 7),
                                    kernel=weight_map["conv1.weight"],
                                    bias=trt.Weights())
    assert conv1
    conv1.stride = (2, 2)
    conv1.padding = (3, 3)

    bn1 = addBatchNorm2d(network, weight_map, conv1.get_output(0), "bn1", EPS)
    assert bn1

    relu1 = network.add_activation(bn1.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu1

    pool1 = network.add_pooling(input=relu1.get_output(0),
                                window_size=trt.DimsHW(3, 3),
                                type=trt.PoolingType.MAX)
    assert pool1
    pool1.stride = (2, 2)
    pool1.padding = (1, 1)

    x = bottleneck(network, weight_map, pool1.get_output(0), 64, 64, 1,
                   "layer1.0.")
    x = bottleneck(network, weight_map, x.get_output(0), 256, 64, 1,
                   "layer1.1.")
    x = bottleneck(network, weight_map, x.get_output(0), 256, 64, 1,
                   "layer1.2.")

    x = bottleneck(network, weight_map, x.get_output(0), 256, 128, 2,
                   "layer2.0.")
    x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1,
                   "layer2.1.")
    x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1,
                   "layer2.2.")
    x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1,
                   "layer2.3.")

    x = bottleneck(network, weight_map, x.get_output(0), 512, 256, 2,
                   "layer3.0.")
    x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1,
                   "layer3.1.")
    x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1,
                   "layer3.2.")
    x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1,
                   "layer3.3.")
    x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1,
                   "layer3.4.")
    x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1,
                   "layer3.5.")

    x = bottleneck(network, weight_map, x.get_output(0), 1024, 512, 2,
                   "layer4.0.")
    x = bottleneck(network, weight_map, x.get_output(0), 2048, 512, 1,
                   "layer4.1.")
    x = bottleneck(network, weight_map, x.get_output(0), 2048, 512, 1,
                   "layer4.2.")

    pool2 = network.add_pooling(x.get_output(0),
                                window_size=trt.DimsHW(7, 7),
                                type=trt.PoolingType.AVERAGE)
    assert pool2
    pool2.stride = (1, 1)

    fc1 = network.add_fully_connected(input=pool2.get_output(0),
                                      num_outputs=OUTPUT_SIZE,
                                      kernel=weight_map['fc.weight'],
                                      bias=weight_map['fc.bias'])
    assert fc1

    fc1.get_output(0).name = OUTPUT_BLOB_NAME
    network.mark_output(fc1.get_output(0))

    # Build engine
    builder.max_batch_size = maxBatchSize
    builder.max_workspace_size = 1 << 20
    engine = builder.build_engine(network, config)

    del network
    del weight_map

    return engine
Exemplo n.º 9
0
def create_engine_large(max_batch_size, builder, config, dt):
    weight_map = load_weights(WEIGHT_PATH_SMALL)
    network = builder.create_network()

    data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W))
    assert data

    ew1 = conv_bn_h_swish(network, weight_map, data, 16, 3, 2, 1,
                          "features.0.")
    ir1 = inverted_res(network, weight_map, ew1.get_output(0), "features.1.",
                       16, 16, 1, 16, 3, 0, 0, 112)
    ir2 = inverted_res(network, weight_map, ir1.get_output(0), "features.2.",
                       16, 24, 2, 64, 3, 0, 0, 56)
    ir3 = inverted_res(network, weight_map, ir2.get_output(0), "features.3.",
                       24, 24, 1, 72, 3, 0, 0, 56)
    ir4 = inverted_res(network, weight_map, ir3.get_output(0), "features.4.",
                       24, 40, 2, 72, 5, 1, 0, 28)
    ir5 = inverted_res(network, weight_map, ir4.get_output(0), "features.5.",
                       40, 40, 1, 120, 5, 1, 0, 28)
    ir6 = inverted_res(network, weight_map, ir5.get_output(0), "features.6.",
                       40, 40, 1, 120, 5, 1, 0, 28)
    ir7 = inverted_res(network, weight_map, ir6.get_output(0), "features.7.",
                       40, 80, 2, 240, 3, 0, 1, 14)
    ir8 = inverted_res(network, weight_map, ir7.get_output(0), "features.8.",
                       80, 80, 1, 200, 3, 0, 1, 14)
    ir9 = inverted_res(network, weight_map, ir8.get_output(0), "features.9.",
                       80, 80, 1, 184, 3, 0, 1, 14)
    ir10 = inverted_res(network, weight_map, ir9.get_output(0), "features.10.",
                        80, 80, 1, 184, 3, 0, 1, 14)
    ir11 = inverted_res(network, weight_map, ir10.get_output(0),
                        "features.11.", 80, 112, 1, 480, 3, 1, 1, 14)
    ir12 = inverted_res(network, weight_map, ir11.get_output(0),
                        "features.12.", 112, 112, 1, 672, 3, 1, 1, 14)
    ir13 = inverted_res(network, weight_map, ir12.get_output(0),
                        "features.13.", 112, 160, 1, 672, 5, 1, 1, 14)
    ir14 = inverted_res(network, weight_map, ir13.get_output(0),
                        "features.14.", 160, 160, 2, 672, 5, 1, 1, 7)
    ir15 = inverted_res(network, weight_map, ir14.get_output(0),
                        "features.15.", 160, 160, 1, 960, 5, 1, 1, 7)
    ew2 = conv_bn_h_swish(network, weight_map, ir15.get_output(0), 960, 1, 1,
                          1, "conv.0.")

    pool1 = network.add_pooling(input=ew2.get_output(0),
                                type=trt.PoolingType.AVERAGE,
                                window_size=trt.DimsHW(7, 7))
    assert pool1
    pool1.stride_nd = (7, 7)
    sw1 = add_h_swish(network, pool1.get_output(0))

    fc1 = network.add_fully_connected(input=sw1.get_output(0),
                                      num_outputs=1280,
                                      kernel=weight_map["classifier.0.weight"],
                                      bias=weight_map["classifier.0.bias"])
    assert fc1
    sw2 = add_h_swish(network, fc1.get_output(0))

    fc2 = network.add_fully_connected(input=sw2.get_output(0),
                                      num_outputs=OUTPUT_SIZE,
                                      kernel=weight_map["classifier.3.weight"],
                                      bias=weight_map["classifier.3.bias"])

    fc2.get_output(0).name = OUTPUT_BLOB_NAME
    network.mark_output(fc2.get_output(0))

    # Build Engine
    builder.max_batch_size = max_batch_size
    builder.max_workspace_size = 1 << 20
    engine = builder.build_engine(network, config)

    del network
    del weight_map

    return engine
Exemplo n.º 10
0
def create_engine(max_batch_size, builder, config, dt):
    weight_map = load_weights(WEIGHT_PATH)
    network = builder.create_network()

    data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W))
    assert data

    conv1 = network.add_convolution(input=data,
                                    num_output_maps=64,
                                    kernel_shape=(11, 11),
                                    kernel=weight_map["features.0.weight"],
                                    bias=weight_map["features.0.bias"])
    assert conv1
    conv1.stride = (4, 4)
    conv1.padding = (2, 2)

    relu1 = network.add_activation(conv1.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu1

    pool1 = network.add_pooling(input=relu1.get_output(0),
                                type=trt.PoolingType.MAX,
                                window_size=trt.DimsHW(3, 3))
    assert pool1
    pool1.stride_nd = (2, 2)

    conv2 = network.add_convolution(input=pool1.get_output(0),
                                    num_output_maps=192,
                                    kernel_shape=(5, 5),
                                    kernel=weight_map["features.3.weight"],
                                    bias=weight_map["features.3.bias"])
    assert conv2
    conv2.padding = (2, 2)

    relu2 = network.add_activation(conv2.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu2

    pool2 = network.add_pooling(input=relu2.get_output(0),
                                type=trt.PoolingType.MAX,
                                window_size=trt.DimsHW(3, 3))
    assert pool2
    pool2.stride_nd = (2, 2)

    conv3 = network.add_convolution(input=pool2.get_output(0),
                                    num_output_maps=384,
                                    kernel_shape=(3, 3),
                                    kernel=weight_map["features.6.weight"],
                                    bias=weight_map["features.6.bias"])
    assert conv3
    conv3.padding = (1, 1)

    relu3 = network.add_activation(conv3.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu3

    conv4 = network.add_convolution(input=relu3.get_output(0),
                                    num_output_maps=256,
                                    kernel_shape=(3, 3),
                                    kernel=weight_map["features.8.weight"],
                                    bias=weight_map["features.8.bias"])
    assert conv4
    conv4.padding = (1, 1)

    relu4 = network.add_activation(conv4.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu4

    conv5 = network.add_convolution(input=relu4.get_output(0),
                                    num_output_maps=256,
                                    kernel_shape=(3, 3),
                                    kernel=weight_map["features.10.weight"],
                                    bias=weight_map["features.10.bias"])
    assert conv5
    conv5.padding = (1, 1)

    relu5 = network.add_activation(conv5.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu5

    pool3 = network.add_pooling(input=relu5.get_output(0),
                                type=trt.PoolingType.MAX,
                                window_size=trt.DimsHW(3, 3))
    assert pool3
    pool3.stride_nd = (2, 2)

    fc1 = network.add_fully_connected(input=pool3.get_output(0),
                                      num_outputs=4096,
                                      kernel=weight_map["classifier.1.weight"],
                                      bias=weight_map["classifier.1.bias"])
    assert fc1

    relu6 = network.add_activation(fc1.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu6

    fc2 = network.add_fully_connected(input=relu6.get_output(0),
                                      num_outputs=4096,
                                      kernel=weight_map["classifier.4.weight"],
                                      bias=weight_map["classifier.4.bias"])
    assert fc2

    relu7 = network.add_activation(fc2.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu7

    fc3 = network.add_fully_connected(input=relu7.get_output(0),
                                      num_outputs=1000,
                                      kernel=weight_map["classifier.6.weight"],
                                      bias=weight_map["classifier.6.bias"])
    assert fc3

    fc3.get_output(0).name = OUTPUT_BLOB_NAME
    network.mark_output(fc3.get_output(0))

    # Build Engine
    builder.max_batch_size = max_batch_size
    builder.max_workspace_size = 1 << 20
    engine = builder.build_engine(network, config)

    del network
    del weight_map

    return engine
Exemplo n.º 11
0
def create_engine(max_batch_size, builder, config, dt):
    weight_map = load_weights(WEIGHT_PATH)
    network = builder.create_network()

    data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W))
    assert data

    ew1 = conv_bn_relu(network, weight_map, data, 32, 3, 2, 1, "features.0.")
    ir1 = inverted_res(network, weight_map, ew1.get_output(0), "features.1.",
                       32, 16, 1, 1)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.2.",
                       16, 24, 2, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.3.",
                       24, 24, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.4.",
                       24, 32, 2, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.5.",
                       32, 32, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.6.",
                       32, 32, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.7.",
                       32, 64, 2, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.8.",
                       64, 64, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.9.",
                       64, 64, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.10.",
                       64, 64, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.11.",
                       64, 96, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.12.",
                       96, 96, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.13.",
                       96, 96, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.14.",
                       96, 160, 2, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.15.",
                       160, 160, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.16.",
                       160, 160, 1, 6)
    ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.17.",
                       160, 320, 1, 6)
    ew2 = conv_bn_relu(network, weight_map, ir1.get_output(0), 1280, 1, 1, 1,
                       "features.18.")

    pool1 = network.add_pooling(input=ew2.get_output(0),
                                type=trt.PoolingType.AVERAGE,
                                window_size=trt.DimsHW(7, 7))
    assert pool1

    fc1 = network.add_fully_connected(input=pool1.get_output(0),
                                      num_outputs=OUTPUT_SIZE,
                                      kernel=weight_map["classifier.1.weight"],
                                      bias=weight_map["classifier.1.bias"])
    assert fc1

    fc1.get_output(0).name = OUTPUT_BLOB_NAME
    network.mark_output(fc1.get_output(0))

    # Build Engine
    builder.max_batch_size = max_batch_size
    builder.max_workspace_size = 1 << 32
    engine = builder.build_engine(network, config)

    del network
    del weight_map

    return engine
Exemplo n.º 12
0
def createLenetEngine(maxBatchSize, builder, config, dt):
    weight_map = load_weights(weight_path)
    network = builder.create_network()

    data = network.add_input(INPUT_BLOB_NAME, dt, (1, INPUT_H, INPUT_W))
    assert data

    conv1 = network.add_convolution(input=data,
                                    num_output_maps=6,
                                    kernel_shape=(5, 5),
                                    kernel=weight_map["conv1.weight"],
                                    bias=weight_map["conv1.bias"])
    assert conv1
    conv1.stride = (1, 1)

    relu1 = network.add_activation(conv1.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu1

    pool1 = network.add_pooling(input=relu1.get_output(0),
                                window_size=trt.DimsHW(2, 2),
                                type=trt.PoolingType.AVERAGE)
    assert pool1
    pool1.stride = (2, 2)

    conv2 = network.add_convolution(pool1.get_output(0), 16, trt.DimsHW(5, 5),
                                    weight_map["conv2.weight"],
                                    weight_map["conv2.bias"])
    assert conv2
    conv2.stride = (1, 1)

    relu2 = network.add_activation(conv2.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu2

    pool2 = network.add_pooling(input=relu2.get_output(0),
                                window_size=trt.DimsHW(2, 2),
                                type=trt.PoolingType.AVERAGE)
    assert pool2
    pool2.stride = (2, 2)

    fc1 = network.add_fully_connected(input=pool2.get_output(0),
                                      num_outputs=120,
                                      kernel=weight_map['fc1.weight'],
                                      bias=weight_map['fc1.bias'])
    assert fc1

    relu3 = network.add_activation(fc1.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu3

    fc2 = network.add_fully_connected(input=relu3.get_output(0),
                                      num_outputs=84,
                                      kernel=weight_map['fc2.weight'],
                                      bias=weight_map['fc2.bias'])
    assert fc2

    relu4 = network.add_activation(fc2.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu4

    fc3 = network.add_fully_connected(input=relu4.get_output(0),
                                      num_outputs=OUTPUT_SIZE,
                                      kernel=weight_map['fc3.weight'],
                                      bias=weight_map['fc3.bias'])
    assert fc3

    prob = network.add_softmax(fc3.get_output(0))
    assert prob

    prob.get_output(0).name = OUTPUT_BLOB_NAME
    network.mark_output(prob.get_output(0))

    # Build engine
    builder.max_batch_size = maxBatchSize
    builder.max_workspace_size = 1 << 20
    engine = builder.build_engine(network, config)

    del network
    del weight_map

    return engine
    def populate_duration_predictor(self, name, network, weights, seq_tensor,
                                    seq_mask_tensor, batch_size, max_seq_len,
                                    d_model):
        duration_predictor_filter_size = self.model.duration_predictor_filter_size
        duration_predictor_kernel_size = self.model.duration_predictor_kernel_size

        # Pytorch: input *= input_mask.to(input.dtype)
        # can be skipped.

        # Pytorch: out = self.conv1d_1(input.transpose(1,2)).transpose(1,2)
        trans1 = network.add_shuffle(
            input=seq_tensor)  # (b, t, d_model) to  (b, d_model, t, 1)
        trans1.first_transpose = trt.Permutation([0, 2, 1])
        trans1.reshape_dims = Dims((batch_size, d_model, max_seq_len, 1))
        trans1.name = "{}.trans1".format(name)
        out = trans1.get_output(0)  # (b, d_model, t, 1)

        conv1_w = weights["{}.conv1d_1.weight".format(
            name
        )]  # (1, d_model, duration_predictor_filter_size, duration_predictor_kernel_size, 1)
        conv1_b = weights["{}.conv1d_1.bias".format(
            name)]  # (duration_predictor_filter_size, )
        conv1 = network.add_convolution(
            input=out,
            num_output_maps=duration_predictor_filter_size,
            kernel_shape=trt.DimsHW(duration_predictor_kernel_size, 1),
            kernel=Weights(conv1_w),
            bias=Weights(conv1_b))
        conv1.padding = trt.DimsHW(1, 0)
        conv1.name = "{}.conv1".format(name)
        out = conv1.get_output(0)  # (b, duration_predictor_filter_size, t, 1)

        trans2 = network.add_shuffle(
            input=out
        )  # (b, duration_predictor_filter_size, t, 1) to (b, t, duration_predictor_filter_size)
        trans2.first_transpose = trt.Permutation([0, 2, 1, 3])
        trans2.reshape_dims = Dims(
            (batch_size, max_seq_len, duration_predictor_filter_size))
        trans2.name = "{}.trans2".format(name)
        out = trans2.get_output(0)  # (b, t, duration_predictor_filter_size)

        # Pytorch: out = self.relu_1(out)
        relu = network.add_activation(input=out, type=trt.ActivationType.RELU)
        relu.name = "{}.relu1".format(name)
        out_relu = relu.get_output(0)  # (b, t, duration_predictor_filter_size)

        # Pytorch: out = self.layer_norm_1(out)
        out = self.populate_layernorm(name="{}.layer_norm_1".format(name),
                                      network=network,
                                      weights=weights,
                                      seq_tensor=out_relu,
                                      d_layer=duration_predictor_filter_size,
                                      batch_size=batch_size,
                                      max_seq_len=max_seq_len)

        # Pytorch: out = self.conv1d_2(out.transpose(1,2)).transpose(1,2)
        trans3 = network.add_shuffle(
            input=out
        )  # (b, t, duration_predictor_filter_size) to (b, duration_predictor_filter_size, t, 1)
        trans3.first_transpose = trt.Permutation([0, 2, 1])
        trans3.reshape_dims = Dims(
            (batch_size, duration_predictor_filter_size, max_seq_len, 1))
        trans3.name = "{}.trans3".format(name)
        out = trans3.get_output(0)  # (b, duration_predictor_filter_size, t, 1)

        conv2_w = weights["{}.conv1d_2.weight".format(
            name
        )]  # (1, duration_predictor_filter_size, duration_predictor_filter_size, duration_predictor_kernel_size, 1)
        conv2_b = weights["{}.conv1d_2.bias".format(
            name)]  # (duration_predictor_filter_size, )
        conv2 = network.add_convolution(
            input=out,
            num_output_maps=duration_predictor_filter_size,
            kernel_shape=trt.DimsHW(duration_predictor_kernel_size, 1),
            kernel=Weights(conv2_w),
            bias=Weights(conv2_b))
        conv2.padding = trt.DimsHW(1, 0)
        conv2.name = "{}.conv2".format(name)
        out = conv2.get_output(0)

        trans4 = network.add_shuffle(
            input=out
        )  # (b, duration_predictor_filter_size, t, 1) to (b, t, duration_predictor_filter_size)
        trans4.first_transpose = trt.Permutation([0, 2, 1, 3])
        trans4.reshape_dims = Dims(
            (batch_size, max_seq_len, duration_predictor_filter_size))
        trans4.name = "{}.trans4".format(name)
        out = trans4.get_output(0)  # (b, t, duration_predictor_filter_size)

        # Pytorch: out = self.relu_2(out)
        relu = network.add_activation(input=out, type=trt.ActivationType.RELU)
        relu.name = "{}.relu2".format(name)
        out_relu = relu.get_output(0)  # (b, t, duration_predictor_filter_size)

        # Pytorch: out = self.layer_norm_2(out)
        out = self.populate_layernorm(
            name="{}.layer_norm_2".format(name),
            network=network,
            weights=weights,
            seq_tensor=out_relu,
            d_layer=duration_predictor_filter_size,
            batch_size=batch_size,
            max_seq_len=max_seq_len,
        )  # (b, t, duration_predictor_filter_size)

        # Pytorch: out = self.linear_layer(out)
        w = weights["{}.linear_layer.weight".format(
            name)]  # (1, duration_predictor_filter_size)
        out_w = network.add_constant(
            shape=(1, 1, duration_predictor_filter_size),
            weights=trt.Weights(w)).get_output(
                0)  # (1, 1, duration_predictor_filter_size)
        linear_w = network.add_matrix_multiply(
            out, MatrixOperation.NONE, out_w, MatrixOperation.TRANSPOSE
        )  # (b, t, duration_predictor_filter_size) * (1->b, duration_predictor_filter_size, 1) => (b, t, 1)
        linear_w.name = "{}.linear.w".format(name)
        out = linear_w.get_output(0)  # (b, t, 1)

        b = weights["{}.linear_layer.bias".format(name)]  # (1,)
        out_b = network.add_constant(
            shape=(1, 1, 1), weights=trt.Weights(b)).get_output(0)  # (1, 1, 1)
        linear_b = network.add_elementwise(input1=out,
                                           input2=out_b,
                                           op=trt.ElementWiseOperation.SUM)
        linear_b.name = "{}.linear.b".format(name)
        out = linear_b.get_output(0)  # (b, t, 1)

        # Pytorch: out *= input_mask.to(out.dtype)
        zeros = network.add_constant(weights=Weights(
            np.zeros(shape=(batch_size, max_seq_len, 1), dtype=np.float32)),
                                     shape=(batch_size, max_seq_len, 1))
        out_zeros = zeros.get_output(0)  # (b, t, 1)
        dur = network.add_select(condition=seq_mask_tensor,
                                 then_input=out,
                                 else_input=out_zeros)
        dur.name = "{}.mask".format(name)
        out_dur = dur.get_output(0)

        # Pytorch: duration = torch.clamp_min(torch.exp(duration) - 1, 0)
        exp = network.add_unary(input=out_dur, op=trt.UnaryOperation.EXP)
        exp.name = "{}.exp".format(name)
        out_exp = exp.get_output(0)
        ones = network.add_constant(weights=Weights(
            np.ones(shape=(batch_size, max_seq_len, 1), dtype=np.float32)),
                                    shape=(batch_size, max_seq_len, 1))
        out_ones = ones.get_output(0)  # (b, t, 1)
        sub = network.add_elementwise(input1=out_exp,
                                      input2=out_ones,
                                      op=trt.ElementWiseOperation.SUB)
        sub.name = "{}.sub_one".format(name)
        out_sub = sub.get_output(0)
        dur = network.add_elementwise(input1=out_sub,
                                      input2=out_zeros,
                                      op=trt.ElementWiseOperation.MAX)
        dur.name = "{}.max".format(name)
        out_dur = dur.get_output(0)

        # Pytorch: repeats = torch.round(repeats).long()
        half_ones = network.add_constant(weights=Weights(
            np.full((batch_size, max_seq_len, 1), 0.5, dtype=np.float32)),
                                         shape=(batch_size, max_seq_len, 1))
        out_half_ones = half_ones.get_output(0)  # (b, t, 1)
        add = network.add_elementwise(input1=out_dur,
                                      input2=out_half_ones,
                                      op=trt.ElementWiseOperation.SUM)
        add.name = "{}.round_add".format(name)
        out_add = add.get_output(0)  # (b, t, 1)
        dur = network.add_elementwise(input1=out_add,
                                      input2=out_ones,
                                      op=trt.ElementWiseOperation.FLOOR_DIV)
        dur.name = "{}.round_floor_div".format(name)
        out_dur = dur.get_output(0)  # (b, t, 1)

        dur = network.add_shuffle(input=out_dur)  # (b, t, 1) to (b, t)
        dur.reshape_dims = Dims(shape=(batch_size, max_seq_len))
        out_dur = dur.get_output(0)  # (b, t)

        return out_dur
    def populate_pos_wise(self, name, network, weights, seq_tensor, batch_size,
                          max_seq_len, d_model, conv_filter_size,
                          conv_kernel_size, conv_padding):
        # Pytorch: output = x.transpose(1, 2)
        trans1 = network.add_shuffle(
            input=seq_tensor)  # (b, t, d_model) to (b, d_model, t, 1)
        trans1.first_transpose = trt.Permutation([0, 2, 1])
        trans1.reshape_dims = Dims((batch_size, d_model, max_seq_len, 1))
        trans1.name = "{}.trans1".format(name)
        out = trans1.get_output(0)  # (b, d_model, t, 1)

        # Pytorch: output = self.w_1(output)
        conv1_w = weights["{}.w_1.weight".format(
            name)]  # (1, conv_filter_size, d_model, conv_kernel_size, 1)
        conv1_b = weights["{}.w_1.bias".format(name)]  # (cov_filter_size,)
        conv1 = network.add_convolution(input=out,
                                        num_output_maps=conv_filter_size,
                                        kernel_shape=trt.DimsHW(
                                            conv_kernel_size, 1),
                                        kernel=Weights(conv1_w),
                                        bias=Weights(conv1_b))
        conv1.padding = trt.DimsHW(1, 0)
        conv1.name = "{}.conv1".format(name)
        out = conv1.get_output(0)  # (b, conv_filter_size, t, 1)

        if self.validate_accuracy:
            self.add_activation_as_output(network, out,
                                          "act.{}.conv1".format(name))

        # Pytorch: output = F.relu(output)
        relu = network.add_activation(input=out, type=trt.ActivationType.RELU)
        relu.name = "{}.relu".format(name)
        out = relu.get_output(0)  # (b, conv_filter_size, t, 1)

        # Pytorch: output = self.w_2(output)
        conv2_w = weights["{}.w_2.weight".format(
            name)]  # (1, d_model, conv_filter_size, conv_kernel_size, 1)
        conv2_b = weights["{}.w_2.bias".format(name)]  # (d_model, )
        conv2 = network.add_convolution(input=out,
                                        num_output_maps=d_model,
                                        kernel_shape=trt.DimsHW(
                                            conv_kernel_size, 1),
                                        kernel=Weights(conv2_w),
                                        bias=Weights(conv2_b))
        conv2.padding = trt.DimsHW(1, 0)
        conv2.name = "{}.conv2".format(name)
        out = conv2.get_output(0)  # (b, d_model, t, 1)

        if self.validate_accuracy:
            self.add_activation_as_output(network, out,
                                          "act.{}.conv2".format(name))

        # Pytorch: output = output.transpose(1, 2)
        trans2 = network.add_shuffle(
            input=out)  # (b, d_model, t, 1) to (b, t, d_model)
        trans2.first_transpose = trt.Permutation([0, 2, 1, 3])
        trans2.reshape_dims = Dims((batch_size, max_seq_len, d_model))
        trans2.name = "{}.trans2".format(name)
        out = trans2.get_output(0)  # (b, t, d_model)

        # Pytorch: output += residual
        residual = network.add_elementwise(input1=seq_tensor,
                                           input2=out,
                                           op=trt.ElementWiseOperation.SUM)
        residual.name = "{}.residual".format(name)
        out = residual.get_output(0)  # (b, t, d_model)

        if self.validate_accuracy:
            self.add_activation_as_output(network, out,
                                          "act.{}.residual".format(name))

        # Pytorch: output = self.layer_norm(output)
        out = self.populate_layernorm(
            name="{}.layer_norm".format(name),
            network=network,
            weights=weights,
            seq_tensor=out,
            batch_size=self.batch_size,
            max_seq_len=max_seq_len,
            d_layer=d_model,
        )  # (b, t, d_model)

        if self.validate_accuracy:
            self.add_activation_as_output(network, out,
                                          "act.{}.ln".format(name))

        return out
Exemplo n.º 15
0
    def add_conv2d(self, name, input_tensor_name, weights_name, data_format,
                   padding_type, strides):
        """
        Similar to
        https://github.com/onnx/onnx-tensorrt/blob/6.0-full-dims/builtin_op_importers.cpp#L332

        :param name:
        :param input_tensor_name:
        :param weights_name:
        :param data_format:
        :param padding_type:
        :param strides:
        :return:
        """

        input_tensor = self.get_layer_output(input_tensor_name)
        weights = self.get_layer_weights(weights_name)

        # Check that the number of spatial dimensions and the kernel shape matches up.
        nb_spatial_dims = len(input_tensor.shape) - 2
        assert nb_spatial_dims == len(
            weights.shape
        ) - 2, "input tensor and weights do not have the same rank"

        # Check that the data of the weights is in NCHW
        assert 'NCHW' in data_format, "conv2d is in " + data_format + ", not in NCHW"

        # check for valid padding in pooling layers
        assert padding_type in [
            "VALID", "SAME"
        ], "Conv2d only supports valid or same padding not " + padding_type

        # Create empty bias arrays
        bias = trt.Weights(type=TRTNetworkBuilder._to_dtype(weights.dtype))
        #if len(input_names) == 3:
        #    bias = self.get_layer_weights(bias_name)

        # weight are stored in RSCK where K is the number of output feature maps,
        # C the number of input channels, and R and S are the height and width of the filter.
        num_output_maps = weights.shape[-1]
        kernel_shape = trt.DimsHW(weights.shape[:2])

        # Cannot construct Weights object from non-contiguous array. Please use numpy.ascontiguousarray.
        weights = weights.transpose([3, 2, 0, 1])
        weights = np.ascontiguousarray(weights, dtype=weights.dtype)
        weights = trt.Weights(a=weights)

        # create layer
        conv2d_layer = self._network.add_convolution(
            input=input_tensor,
            num_output_maps=num_output_maps,
            kernel_shape=kernel_shape,
            kernel=weights,
            bias=bias)
        conv2d_layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_DOWN if padding_type == "VALID" else trt.PaddingMode.SAME_UPPER
        #conv2d_layer.pre_padding = trt.DimsHW([1, 1])
        #conv2d_layer.post_padding = trt.DimsHW([1, 1])
        conv2d_layer.stride = trt.DimsHW(strides[-2:])

        self._remember_op_and_output(conv2d_layer, name)
        return conv2d_layer
Exemplo n.º 16
0
def create_engine(max_batch_size, builder, config, dt):
    weight_map = load_weights(WEIGHT_PATH)
    network = builder.create_network()

    data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W))
    assert data

    conv0 = network.add_convolution(input=data,
                                    num_output_maps=64,
                                    kernel_shape=(7, 7),
                                    kernel=weight_map["features.conv0.weight"],
                                    bias=trt.Weights())
    assert conv0
    conv0.stride = (2, 2)
    conv0.padding = (3, 3)

    bn0 = add_batch_norm_2d(network, weight_map, conv0.get_output(0),
                            "features.norm0")

    relu0 = network.add_activation(bn0.get_output(0),
                                   type=trt.ActivationType.RELU)
    assert relu0

    pool0 = network.add_pooling(input=relu0.get_output(0),
                                type=trt.PoolingType.MAX,
                                window_size=trt.DimsHW(3, 3))
    assert pool0
    pool0.stride_nd = (2, 2)
    pool0.padding_nd = (1, 1)

    dense1 = add_dense_block(network, pool0.get_output(0), weight_map, 6,
                             "features.denseblock1")
    transition1 = add_transition(network, dense1.get_output(0), weight_map,
                                 128, "features.transition1")

    dense2 = add_dense_block(network, transition1.get_output(0), weight_map,
                             12, "features.denseblock2")
    transition2 = add_transition(network, dense2.get_output(0), weight_map,
                                 256, "features.transition2")

    dense3 = add_dense_block(network, transition2.get_output(0), weight_map,
                             24, "features.denseblock3")
    transition3 = add_transition(network, dense3.get_output(0), weight_map,
                                 512, "features.transition3")

    dense4 = add_dense_block(network, transition3.get_output(0), weight_map,
                             16, "features.denseblock4")

    bn5 = add_batch_norm_2d(network, weight_map, dense4.get_output(0),
                            "features.norm5")
    relu5 = network.add_activation(bn5.get_output(0),
                                   type=trt.ActivationType.RELU)

    pool5 = network.add_pooling(relu5.get_output(0),
                                type=trt.PoolingType.AVERAGE,
                                window_size=trt.DimsHW(7, 7))

    fc1 = network.add_fully_connected(input=pool5.get_output(0),
                                      num_outputs=OUTPUT_SIZE,
                                      kernel=weight_map["classifier.weight"],
                                      bias=weight_map["classifier.bias"])
    assert fc1

    fc1.get_output(0).name = OUTPUT_BLOB_NAME
    network.mark_output(fc1.get_output(0))

    # Build Engine
    builder.max_batch_size = max_batch_size
    builder.max_workspace_size = 1 << 20
    engine = builder.build_engine(network, config)

    del network
    del weight_map

    return engine