def compute(self, input_shape, kernel_shape, stride, padding, dilation, layer_name): hw = [] for i in range(2): out = math.ceil((input_shape[i] + 2 * padding[i] - kernel_shape[i]) / stride[i] + 1) hw.append(out) print(out) return trt.DimsHW(hw)
def add_pooling_func(self, name, input_tensor_name, pooling_type, padding_type, kernel_size, strides, blend_factor=None): """ Similar to https://github.com/onnx/onnx-tensorrt/blob/6.0-full-dims/onnx2trt_utils.cpp#L1002 :param name: :param input_tensor_name: :param pooling_type: :param padding_type: :param kernel_size: :param strides: :param blend_factor: optional :return: """ input_tensor = self.get_layer_output(input_tensor_name) # check for valid padding in pooling layers assert padding_type in [ "VALID", "SAME" ], "Pooling only supports valid or same padding" # 2D windows size window_size = trt.DimsHW(kernel_size[-2:]) # create layer pooling_layer = self._network.add_pooling(input=input_tensor, type=pooling_type, window_size=window_size) pooling_layer.stride = trt.DimsHW(strides[-2:]) pooling_layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_DOWN if padding_type == "VALID" else trt.PaddingMode.SAME_UPPER if blend_factor: pooling_layer.blend_factor = blend_factor self._remember_op_and_output(pooling_layer, name) return pooling_layer
def add_padding(self, name, input_tensor_name, padding_name): """ Similar to https://github.com/onnx/onnx-tensorrt/blob/6.0-full-dims/builtin_op_importers.cpp#L1321 :param name: :param input_tensor_name: :param padding_name: :return: """ input_tensor = self.get_layer_output(input_tensor_name) pad = self.get_layer_weights(padding_name) # just use the last two padding dims in a channel first setup to get width/height paddings pre_padding = trt.DimsHW(pad[-2:, 0]) post_padding = trt.DimsHW(pad[-2:, 1]) # create layer pad_layer = self._network.add_padding(input=input_tensor, pre_padding=pre_padding, post_padding=post_padding) return self._remember_op_and_output(pad_layer, name)
def add_se_layer(network, weight_map, input, c, w, lname): h = w l1 = network.add_pooling(input=input, type=trt.PoolingType.AVERAGE, window_size=trt.DimsHW(w, h)) assert l1 l1.stride_nd = (w, h) l2 = network.add_fully_connected(input=l1.get_output(0), num_outputs=BS * c // 4, kernel=weight_map[lname + "fc.0.weight"], bias=weight_map[lname + "fc.0.bias"]) relu1 = network.add_activation(l2.get_output(0), type=trt.ActivationType.RELU) l4 = network.add_fully_connected(input=relu1.get_output(0), num_outputs=BS * c, kernel=weight_map[lname + "fc.2.weight"], bias=weight_map[lname + "fc.2.bias"]) se = add_h_swish(network, l4.get_output(0)) return se
def add_transition(network, input, weight_map, outch, lname): bn1 = add_batch_norm_2d(network, weight_map, input, lname + ".norm") relu1 = network.add_activation(bn1.get_output(0), type=trt.ActivationType.RELU) assert relu1 conv1 = network.add_convolution(input=relu1.get_output(0), num_output_maps=outch, kernel_shape=(1, 1), kernel=weight_map[lname + ".conv.weight"], bias=trt.Weights()) assert conv1 conv1.stride = (1, 1) pool1 = network.add_pooling(input=conv1.get_output(0), type=trt.PoolingType.AVERAGE, window_size=trt.DimsHW(2, 2)) assert pool1 pool1.stride_nd = (2, 2) pool1.padding_nd = (0, 0) return pool1
def create_engine(maxBatchSize, builder, dt, weights): weight_map = load_weights(weights) network = builder.create_network() data = network.add_input(INPUT_BLOB_NAME, dt, (NUM_SEGMENTS, 3, INPUT_H, INPUT_W)) assert data conv1 = network.add_convolution(input=data, num_output_maps=64, kernel_shape=(7, 7), kernel=weight_map["conv1.weight"], bias=trt.Weights()) assert conv1 conv1.stride = (2, 2) conv1.padding = (3, 3) bn1 = add_batch_norm_2d(network, weight_map, conv1.get_output(0), "bn1", EPS) assert bn1 relu1 = network.add_activation(bn1.get_output(0), type=trt.ActivationType.RELU) assert relu1 pool1 = network.add_pooling(input=relu1.get_output(0), window_size=trt.DimsHW(3, 3), type=trt.PoolingType.MAX) assert pool1 pool1.stride = (2, 2) pool1.padding = (1, 1) cur_height = INPUT_H // 4 cur_width = INPUT_W // 4 x = bottleneck(network, weight_map, pool1.get_output(0), 64, 64, 1, "layer1.0.", (NUM_SEGMENTS, 64, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 256, 64, 1, "layer1.1.", (NUM_SEGMENTS, 256, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 256, 64, 1, "layer1.2.", (NUM_SEGMENTS, 256, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 256, 128, 2, "layer2.0.", (NUM_SEGMENTS, 256, cur_height, cur_width)) cur_height = INPUT_H // 8 cur_width = INPUT_W // 8 x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1, "layer2.1.", (NUM_SEGMENTS, 512, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1, "layer2.2.", (NUM_SEGMENTS, 512, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1, "layer2.3.", (NUM_SEGMENTS, 512, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 512, 256, 2, "layer3.0.", (NUM_SEGMENTS, 512, cur_height, cur_width)) cur_height = INPUT_H // 16 cur_width = INPUT_W // 16 x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1, "layer3.1.", (NUM_SEGMENTS, 1024, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1, "layer3.2.", (NUM_SEGMENTS, 1024, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1, "layer3.3.", (NUM_SEGMENTS, 1024, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1, "layer3.4.", (NUM_SEGMENTS, 1024, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1, "layer3.5.", (NUM_SEGMENTS, 1024, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 1024, 512, 2, "layer4.0.", (NUM_SEGMENTS, 1024, cur_height, cur_width)) cur_height = INPUT_H // 32 cur_width = INPUT_W // 32 x = bottleneck(network, weight_map, x.get_output(0), 2048, 512, 1, "layer4.1.", (NUM_SEGMENTS, 2048, cur_height, cur_width)) x = bottleneck(network, weight_map, x.get_output(0), 2048, 512, 1, "layer4.2.", (NUM_SEGMENTS, 2048, cur_height, cur_width)) pool2 = network.add_pooling(x.get_output(0), window_size=trt.DimsHW(cur_height, cur_width), type=trt.PoolingType.AVERAGE) assert pool2 pool2.stride = (1, 1) fc1 = network.add_fully_connected(input=pool2.get_output(0), num_outputs=OUTPUT_SIZE, kernel=weight_map['fc.weight'], bias=weight_map['fc.bias']) assert fc1 reshape = network.add_shuffle(fc1.get_output(0)) assert reshape reshape.reshape_dims = (NUM_SEGMENTS, OUTPUT_SIZE) reduce = network.add_reduce(reshape.get_output(0), op=trt.ReduceOperation.AVG, axes=1, keep_dims=False) assert reduce softmax = network.add_softmax(reduce.get_output(0)) assert softmax softmax.axes = 1 softmax.get_output(0).name = OUTPUT_BLOB_NAME network.mark_output(softmax.get_output(0)) # Build engine builder.max_batch_size = maxBatchSize builder.max_workspace_size = 1 << 20 engine = builder.build_cuda_engine(network) del network del weight_map return engine
def cola_output(prefix, config, init_dict, network, input_tensor): """ Create the CoLA output """ print(input_tensor.shape) idims = input_tensor.shape assert len(idims) == 5 B, S, hidden_size, _, _ = idims # add shuffle layer for reshaping and permutation shuffle = network.add_shuffle(input_tensor) shuffle.first_transpose = (0, 2, 1, 3, 4) shuffle.reshape_dims = (B, hidden_size, S, 1) input_tensor = shuffle.get_output(0) print(input_tensor.shape) # add convolution layers conv_outputs = [] bag = [] # to make sure conv_w and conv_b won't be released by python for i in range(3): # add conv kernel_size = trt.DimsHW(3 + i, 1) conv_w = init_dict['conv{}_kernel'.format(i)] conv_b = init_dict['conv{}_biases'.format(i)] print(input_tensor.shape, kernel_size, conv_w.size) conv = network.add_convolution(input=input_tensor, num_output_maps=100, kernel_shape=kernel_size, kernel=conv_w, bias=conv_b) conv.stride = (1, 1) conv.padding_mode = trt.PaddingMode.SAME_LOWER set_layer_name(conv, prefix, "conv{}".format(i)) bag += [conv_w, conv_b] print("conv output shape: ", conv.get_output(0).shape) # add relu relu = network.add_activation(input=conv.get_output(0), type=trt.ActivationType.RELU) set_layer_name(relu, prefix, "relu{}".format(i)) # add pooling pooling = network.add_pooling(input=relu.get_output(0), type=trt.PoolingType.MAX, window_size=(8, 1)) pooling.stride = (1, 1) set_layer_name(pooling, prefix, "pooling{}".format(i)) print("Pooling output shape", pooling.get_output(0).shape) # add flatten # flatten = network.add_reduce( # input=pooling.get_output(0), # op=trt.tensorrt.ReduceOperation.SUM, # axes=1, # first non-batch dimension # keep_dims=False # ) # set_layer_name(flatten, prefix, "flatten{}".format(i)) # for concat conv_outputs.append(pooling.get_output(0)) concat = network.add_concatenation(inputs=conv_outputs) print("Concat output shape:", concat.get_output(0).shape) set_layer_name(concat, prefix, "concat") # fc layer dense = network.add_fully_connected(concat.get_output(0), 22, init_dict['fc0_weights'].numpy(), init_dict['fc0_biases'].numpy()) set_layer_name(dense, prefix, "dense") print("fc layer output shape: ", dense.get_output(0).shape) # softmax layer softmax = network.add_softmax(input=dense.get_output(0)) print("softmax layer output shape: ", softmax.get_output(0).shape) return softmax
def createLenetEngine(maxBatchSize, builder, config, dt): weight_map = load_weights(WEIGHT_PATH) network = builder.create_network() data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W)) assert data conv1 = network.add_convolution(input=data, num_output_maps=64, kernel_shape=(7, 7), kernel=weight_map["conv1.weight"], bias=trt.Weights()) assert conv1 conv1.stride = (2, 2) conv1.padding = (3, 3) bn1 = addBatchNorm2d(network, weight_map, conv1.get_output(0), "bn1", EPS) assert bn1 relu1 = network.add_activation(bn1.get_output(0), type=trt.ActivationType.RELU) assert relu1 pool1 = network.add_pooling(input=relu1.get_output(0), window_size=trt.DimsHW(3, 3), type=trt.PoolingType.MAX) assert pool1 pool1.stride = (2, 2) pool1.padding = (1, 1) x = bottleneck(network, weight_map, pool1.get_output(0), 64, 64, 1, "layer1.0.") x = bottleneck(network, weight_map, x.get_output(0), 256, 64, 1, "layer1.1.") x = bottleneck(network, weight_map, x.get_output(0), 256, 64, 1, "layer1.2.") x = bottleneck(network, weight_map, x.get_output(0), 256, 128, 2, "layer2.0.") x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1, "layer2.1.") x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1, "layer2.2.") x = bottleneck(network, weight_map, x.get_output(0), 512, 128, 1, "layer2.3.") x = bottleneck(network, weight_map, x.get_output(0), 512, 256, 2, "layer3.0.") x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1, "layer3.1.") x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1, "layer3.2.") x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1, "layer3.3.") x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1, "layer3.4.") x = bottleneck(network, weight_map, x.get_output(0), 1024, 256, 1, "layer3.5.") x = bottleneck(network, weight_map, x.get_output(0), 1024, 512, 2, "layer4.0.") x = bottleneck(network, weight_map, x.get_output(0), 2048, 512, 1, "layer4.1.") x = bottleneck(network, weight_map, x.get_output(0), 2048, 512, 1, "layer4.2.") pool2 = network.add_pooling(x.get_output(0), window_size=trt.DimsHW(7, 7), type=trt.PoolingType.AVERAGE) assert pool2 pool2.stride = (1, 1) fc1 = network.add_fully_connected(input=pool2.get_output(0), num_outputs=OUTPUT_SIZE, kernel=weight_map['fc.weight'], bias=weight_map['fc.bias']) assert fc1 fc1.get_output(0).name = OUTPUT_BLOB_NAME network.mark_output(fc1.get_output(0)) # Build engine builder.max_batch_size = maxBatchSize builder.max_workspace_size = 1 << 20 engine = builder.build_engine(network, config) del network del weight_map return engine
def create_engine_large(max_batch_size, builder, config, dt): weight_map = load_weights(WEIGHT_PATH_SMALL) network = builder.create_network() data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W)) assert data ew1 = conv_bn_h_swish(network, weight_map, data, 16, 3, 2, 1, "features.0.") ir1 = inverted_res(network, weight_map, ew1.get_output(0), "features.1.", 16, 16, 1, 16, 3, 0, 0, 112) ir2 = inverted_res(network, weight_map, ir1.get_output(0), "features.2.", 16, 24, 2, 64, 3, 0, 0, 56) ir3 = inverted_res(network, weight_map, ir2.get_output(0), "features.3.", 24, 24, 1, 72, 3, 0, 0, 56) ir4 = inverted_res(network, weight_map, ir3.get_output(0), "features.4.", 24, 40, 2, 72, 5, 1, 0, 28) ir5 = inverted_res(network, weight_map, ir4.get_output(0), "features.5.", 40, 40, 1, 120, 5, 1, 0, 28) ir6 = inverted_res(network, weight_map, ir5.get_output(0), "features.6.", 40, 40, 1, 120, 5, 1, 0, 28) ir7 = inverted_res(network, weight_map, ir6.get_output(0), "features.7.", 40, 80, 2, 240, 3, 0, 1, 14) ir8 = inverted_res(network, weight_map, ir7.get_output(0), "features.8.", 80, 80, 1, 200, 3, 0, 1, 14) ir9 = inverted_res(network, weight_map, ir8.get_output(0), "features.9.", 80, 80, 1, 184, 3, 0, 1, 14) ir10 = inverted_res(network, weight_map, ir9.get_output(0), "features.10.", 80, 80, 1, 184, 3, 0, 1, 14) ir11 = inverted_res(network, weight_map, ir10.get_output(0), "features.11.", 80, 112, 1, 480, 3, 1, 1, 14) ir12 = inverted_res(network, weight_map, ir11.get_output(0), "features.12.", 112, 112, 1, 672, 3, 1, 1, 14) ir13 = inverted_res(network, weight_map, ir12.get_output(0), "features.13.", 112, 160, 1, 672, 5, 1, 1, 14) ir14 = inverted_res(network, weight_map, ir13.get_output(0), "features.14.", 160, 160, 2, 672, 5, 1, 1, 7) ir15 = inverted_res(network, weight_map, ir14.get_output(0), "features.15.", 160, 160, 1, 960, 5, 1, 1, 7) ew2 = conv_bn_h_swish(network, weight_map, ir15.get_output(0), 960, 1, 1, 1, "conv.0.") pool1 = network.add_pooling(input=ew2.get_output(0), type=trt.PoolingType.AVERAGE, window_size=trt.DimsHW(7, 7)) assert pool1 pool1.stride_nd = (7, 7) sw1 = add_h_swish(network, pool1.get_output(0)) fc1 = network.add_fully_connected(input=sw1.get_output(0), num_outputs=1280, kernel=weight_map["classifier.0.weight"], bias=weight_map["classifier.0.bias"]) assert fc1 sw2 = add_h_swish(network, fc1.get_output(0)) fc2 = network.add_fully_connected(input=sw2.get_output(0), num_outputs=OUTPUT_SIZE, kernel=weight_map["classifier.3.weight"], bias=weight_map["classifier.3.bias"]) fc2.get_output(0).name = OUTPUT_BLOB_NAME network.mark_output(fc2.get_output(0)) # Build Engine builder.max_batch_size = max_batch_size builder.max_workspace_size = 1 << 20 engine = builder.build_engine(network, config) del network del weight_map return engine
def create_engine(max_batch_size, builder, config, dt): weight_map = load_weights(WEIGHT_PATH) network = builder.create_network() data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W)) assert data conv1 = network.add_convolution(input=data, num_output_maps=64, kernel_shape=(11, 11), kernel=weight_map["features.0.weight"], bias=weight_map["features.0.bias"]) assert conv1 conv1.stride = (4, 4) conv1.padding = (2, 2) relu1 = network.add_activation(conv1.get_output(0), type=trt.ActivationType.RELU) assert relu1 pool1 = network.add_pooling(input=relu1.get_output(0), type=trt.PoolingType.MAX, window_size=trt.DimsHW(3, 3)) assert pool1 pool1.stride_nd = (2, 2) conv2 = network.add_convolution(input=pool1.get_output(0), num_output_maps=192, kernel_shape=(5, 5), kernel=weight_map["features.3.weight"], bias=weight_map["features.3.bias"]) assert conv2 conv2.padding = (2, 2) relu2 = network.add_activation(conv2.get_output(0), type=trt.ActivationType.RELU) assert relu2 pool2 = network.add_pooling(input=relu2.get_output(0), type=trt.PoolingType.MAX, window_size=trt.DimsHW(3, 3)) assert pool2 pool2.stride_nd = (2, 2) conv3 = network.add_convolution(input=pool2.get_output(0), num_output_maps=384, kernel_shape=(3, 3), kernel=weight_map["features.6.weight"], bias=weight_map["features.6.bias"]) assert conv3 conv3.padding = (1, 1) relu3 = network.add_activation(conv3.get_output(0), type=trt.ActivationType.RELU) assert relu3 conv4 = network.add_convolution(input=relu3.get_output(0), num_output_maps=256, kernel_shape=(3, 3), kernel=weight_map["features.8.weight"], bias=weight_map["features.8.bias"]) assert conv4 conv4.padding = (1, 1) relu4 = network.add_activation(conv4.get_output(0), type=trt.ActivationType.RELU) assert relu4 conv5 = network.add_convolution(input=relu4.get_output(0), num_output_maps=256, kernel_shape=(3, 3), kernel=weight_map["features.10.weight"], bias=weight_map["features.10.bias"]) assert conv5 conv5.padding = (1, 1) relu5 = network.add_activation(conv5.get_output(0), type=trt.ActivationType.RELU) assert relu5 pool3 = network.add_pooling(input=relu5.get_output(0), type=trt.PoolingType.MAX, window_size=trt.DimsHW(3, 3)) assert pool3 pool3.stride_nd = (2, 2) fc1 = network.add_fully_connected(input=pool3.get_output(0), num_outputs=4096, kernel=weight_map["classifier.1.weight"], bias=weight_map["classifier.1.bias"]) assert fc1 relu6 = network.add_activation(fc1.get_output(0), type=trt.ActivationType.RELU) assert relu6 fc2 = network.add_fully_connected(input=relu6.get_output(0), num_outputs=4096, kernel=weight_map["classifier.4.weight"], bias=weight_map["classifier.4.bias"]) assert fc2 relu7 = network.add_activation(fc2.get_output(0), type=trt.ActivationType.RELU) assert relu7 fc3 = network.add_fully_connected(input=relu7.get_output(0), num_outputs=1000, kernel=weight_map["classifier.6.weight"], bias=weight_map["classifier.6.bias"]) assert fc3 fc3.get_output(0).name = OUTPUT_BLOB_NAME network.mark_output(fc3.get_output(0)) # Build Engine builder.max_batch_size = max_batch_size builder.max_workspace_size = 1 << 20 engine = builder.build_engine(network, config) del network del weight_map return engine
def create_engine(max_batch_size, builder, config, dt): weight_map = load_weights(WEIGHT_PATH) network = builder.create_network() data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W)) assert data ew1 = conv_bn_relu(network, weight_map, data, 32, 3, 2, 1, "features.0.") ir1 = inverted_res(network, weight_map, ew1.get_output(0), "features.1.", 32, 16, 1, 1) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.2.", 16, 24, 2, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.3.", 24, 24, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.4.", 24, 32, 2, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.5.", 32, 32, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.6.", 32, 32, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.7.", 32, 64, 2, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.8.", 64, 64, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.9.", 64, 64, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.10.", 64, 64, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.11.", 64, 96, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.12.", 96, 96, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.13.", 96, 96, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.14.", 96, 160, 2, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.15.", 160, 160, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.16.", 160, 160, 1, 6) ir1 = inverted_res(network, weight_map, ir1.get_output(0), "features.17.", 160, 320, 1, 6) ew2 = conv_bn_relu(network, weight_map, ir1.get_output(0), 1280, 1, 1, 1, "features.18.") pool1 = network.add_pooling(input=ew2.get_output(0), type=trt.PoolingType.AVERAGE, window_size=trt.DimsHW(7, 7)) assert pool1 fc1 = network.add_fully_connected(input=pool1.get_output(0), num_outputs=OUTPUT_SIZE, kernel=weight_map["classifier.1.weight"], bias=weight_map["classifier.1.bias"]) assert fc1 fc1.get_output(0).name = OUTPUT_BLOB_NAME network.mark_output(fc1.get_output(0)) # Build Engine builder.max_batch_size = max_batch_size builder.max_workspace_size = 1 << 32 engine = builder.build_engine(network, config) del network del weight_map return engine
def createLenetEngine(maxBatchSize, builder, config, dt): weight_map = load_weights(weight_path) network = builder.create_network() data = network.add_input(INPUT_BLOB_NAME, dt, (1, INPUT_H, INPUT_W)) assert data conv1 = network.add_convolution(input=data, num_output_maps=6, kernel_shape=(5, 5), kernel=weight_map["conv1.weight"], bias=weight_map["conv1.bias"]) assert conv1 conv1.stride = (1, 1) relu1 = network.add_activation(conv1.get_output(0), type=trt.ActivationType.RELU) assert relu1 pool1 = network.add_pooling(input=relu1.get_output(0), window_size=trt.DimsHW(2, 2), type=trt.PoolingType.AVERAGE) assert pool1 pool1.stride = (2, 2) conv2 = network.add_convolution(pool1.get_output(0), 16, trt.DimsHW(5, 5), weight_map["conv2.weight"], weight_map["conv2.bias"]) assert conv2 conv2.stride = (1, 1) relu2 = network.add_activation(conv2.get_output(0), type=trt.ActivationType.RELU) assert relu2 pool2 = network.add_pooling(input=relu2.get_output(0), window_size=trt.DimsHW(2, 2), type=trt.PoolingType.AVERAGE) assert pool2 pool2.stride = (2, 2) fc1 = network.add_fully_connected(input=pool2.get_output(0), num_outputs=120, kernel=weight_map['fc1.weight'], bias=weight_map['fc1.bias']) assert fc1 relu3 = network.add_activation(fc1.get_output(0), type=trt.ActivationType.RELU) assert relu3 fc2 = network.add_fully_connected(input=relu3.get_output(0), num_outputs=84, kernel=weight_map['fc2.weight'], bias=weight_map['fc2.bias']) assert fc2 relu4 = network.add_activation(fc2.get_output(0), type=trt.ActivationType.RELU) assert relu4 fc3 = network.add_fully_connected(input=relu4.get_output(0), num_outputs=OUTPUT_SIZE, kernel=weight_map['fc3.weight'], bias=weight_map['fc3.bias']) assert fc3 prob = network.add_softmax(fc3.get_output(0)) assert prob prob.get_output(0).name = OUTPUT_BLOB_NAME network.mark_output(prob.get_output(0)) # Build engine builder.max_batch_size = maxBatchSize builder.max_workspace_size = 1 << 20 engine = builder.build_engine(network, config) del network del weight_map return engine
def populate_duration_predictor(self, name, network, weights, seq_tensor, seq_mask_tensor, batch_size, max_seq_len, d_model): duration_predictor_filter_size = self.model.duration_predictor_filter_size duration_predictor_kernel_size = self.model.duration_predictor_kernel_size # Pytorch: input *= input_mask.to(input.dtype) # can be skipped. # Pytorch: out = self.conv1d_1(input.transpose(1,2)).transpose(1,2) trans1 = network.add_shuffle( input=seq_tensor) # (b, t, d_model) to (b, d_model, t, 1) trans1.first_transpose = trt.Permutation([0, 2, 1]) trans1.reshape_dims = Dims((batch_size, d_model, max_seq_len, 1)) trans1.name = "{}.trans1".format(name) out = trans1.get_output(0) # (b, d_model, t, 1) conv1_w = weights["{}.conv1d_1.weight".format( name )] # (1, d_model, duration_predictor_filter_size, duration_predictor_kernel_size, 1) conv1_b = weights["{}.conv1d_1.bias".format( name)] # (duration_predictor_filter_size, ) conv1 = network.add_convolution( input=out, num_output_maps=duration_predictor_filter_size, kernel_shape=trt.DimsHW(duration_predictor_kernel_size, 1), kernel=Weights(conv1_w), bias=Weights(conv1_b)) conv1.padding = trt.DimsHW(1, 0) conv1.name = "{}.conv1".format(name) out = conv1.get_output(0) # (b, duration_predictor_filter_size, t, 1) trans2 = network.add_shuffle( input=out ) # (b, duration_predictor_filter_size, t, 1) to (b, t, duration_predictor_filter_size) trans2.first_transpose = trt.Permutation([0, 2, 1, 3]) trans2.reshape_dims = Dims( (batch_size, max_seq_len, duration_predictor_filter_size)) trans2.name = "{}.trans2".format(name) out = trans2.get_output(0) # (b, t, duration_predictor_filter_size) # Pytorch: out = self.relu_1(out) relu = network.add_activation(input=out, type=trt.ActivationType.RELU) relu.name = "{}.relu1".format(name) out_relu = relu.get_output(0) # (b, t, duration_predictor_filter_size) # Pytorch: out = self.layer_norm_1(out) out = self.populate_layernorm(name="{}.layer_norm_1".format(name), network=network, weights=weights, seq_tensor=out_relu, d_layer=duration_predictor_filter_size, batch_size=batch_size, max_seq_len=max_seq_len) # Pytorch: out = self.conv1d_2(out.transpose(1,2)).transpose(1,2) trans3 = network.add_shuffle( input=out ) # (b, t, duration_predictor_filter_size) to (b, duration_predictor_filter_size, t, 1) trans3.first_transpose = trt.Permutation([0, 2, 1]) trans3.reshape_dims = Dims( (batch_size, duration_predictor_filter_size, max_seq_len, 1)) trans3.name = "{}.trans3".format(name) out = trans3.get_output(0) # (b, duration_predictor_filter_size, t, 1) conv2_w = weights["{}.conv1d_2.weight".format( name )] # (1, duration_predictor_filter_size, duration_predictor_filter_size, duration_predictor_kernel_size, 1) conv2_b = weights["{}.conv1d_2.bias".format( name)] # (duration_predictor_filter_size, ) conv2 = network.add_convolution( input=out, num_output_maps=duration_predictor_filter_size, kernel_shape=trt.DimsHW(duration_predictor_kernel_size, 1), kernel=Weights(conv2_w), bias=Weights(conv2_b)) conv2.padding = trt.DimsHW(1, 0) conv2.name = "{}.conv2".format(name) out = conv2.get_output(0) trans4 = network.add_shuffle( input=out ) # (b, duration_predictor_filter_size, t, 1) to (b, t, duration_predictor_filter_size) trans4.first_transpose = trt.Permutation([0, 2, 1, 3]) trans4.reshape_dims = Dims( (batch_size, max_seq_len, duration_predictor_filter_size)) trans4.name = "{}.trans4".format(name) out = trans4.get_output(0) # (b, t, duration_predictor_filter_size) # Pytorch: out = self.relu_2(out) relu = network.add_activation(input=out, type=trt.ActivationType.RELU) relu.name = "{}.relu2".format(name) out_relu = relu.get_output(0) # (b, t, duration_predictor_filter_size) # Pytorch: out = self.layer_norm_2(out) out = self.populate_layernorm( name="{}.layer_norm_2".format(name), network=network, weights=weights, seq_tensor=out_relu, d_layer=duration_predictor_filter_size, batch_size=batch_size, max_seq_len=max_seq_len, ) # (b, t, duration_predictor_filter_size) # Pytorch: out = self.linear_layer(out) w = weights["{}.linear_layer.weight".format( name)] # (1, duration_predictor_filter_size) out_w = network.add_constant( shape=(1, 1, duration_predictor_filter_size), weights=trt.Weights(w)).get_output( 0) # (1, 1, duration_predictor_filter_size) linear_w = network.add_matrix_multiply( out, MatrixOperation.NONE, out_w, MatrixOperation.TRANSPOSE ) # (b, t, duration_predictor_filter_size) * (1->b, duration_predictor_filter_size, 1) => (b, t, 1) linear_w.name = "{}.linear.w".format(name) out = linear_w.get_output(0) # (b, t, 1) b = weights["{}.linear_layer.bias".format(name)] # (1,) out_b = network.add_constant( shape=(1, 1, 1), weights=trt.Weights(b)).get_output(0) # (1, 1, 1) linear_b = network.add_elementwise(input1=out, input2=out_b, op=trt.ElementWiseOperation.SUM) linear_b.name = "{}.linear.b".format(name) out = linear_b.get_output(0) # (b, t, 1) # Pytorch: out *= input_mask.to(out.dtype) zeros = network.add_constant(weights=Weights( np.zeros(shape=(batch_size, max_seq_len, 1), dtype=np.float32)), shape=(batch_size, max_seq_len, 1)) out_zeros = zeros.get_output(0) # (b, t, 1) dur = network.add_select(condition=seq_mask_tensor, then_input=out, else_input=out_zeros) dur.name = "{}.mask".format(name) out_dur = dur.get_output(0) # Pytorch: duration = torch.clamp_min(torch.exp(duration) - 1, 0) exp = network.add_unary(input=out_dur, op=trt.UnaryOperation.EXP) exp.name = "{}.exp".format(name) out_exp = exp.get_output(0) ones = network.add_constant(weights=Weights( np.ones(shape=(batch_size, max_seq_len, 1), dtype=np.float32)), shape=(batch_size, max_seq_len, 1)) out_ones = ones.get_output(0) # (b, t, 1) sub = network.add_elementwise(input1=out_exp, input2=out_ones, op=trt.ElementWiseOperation.SUB) sub.name = "{}.sub_one".format(name) out_sub = sub.get_output(0) dur = network.add_elementwise(input1=out_sub, input2=out_zeros, op=trt.ElementWiseOperation.MAX) dur.name = "{}.max".format(name) out_dur = dur.get_output(0) # Pytorch: repeats = torch.round(repeats).long() half_ones = network.add_constant(weights=Weights( np.full((batch_size, max_seq_len, 1), 0.5, dtype=np.float32)), shape=(batch_size, max_seq_len, 1)) out_half_ones = half_ones.get_output(0) # (b, t, 1) add = network.add_elementwise(input1=out_dur, input2=out_half_ones, op=trt.ElementWiseOperation.SUM) add.name = "{}.round_add".format(name) out_add = add.get_output(0) # (b, t, 1) dur = network.add_elementwise(input1=out_add, input2=out_ones, op=trt.ElementWiseOperation.FLOOR_DIV) dur.name = "{}.round_floor_div".format(name) out_dur = dur.get_output(0) # (b, t, 1) dur = network.add_shuffle(input=out_dur) # (b, t, 1) to (b, t) dur.reshape_dims = Dims(shape=(batch_size, max_seq_len)) out_dur = dur.get_output(0) # (b, t) return out_dur
def populate_pos_wise(self, name, network, weights, seq_tensor, batch_size, max_seq_len, d_model, conv_filter_size, conv_kernel_size, conv_padding): # Pytorch: output = x.transpose(1, 2) trans1 = network.add_shuffle( input=seq_tensor) # (b, t, d_model) to (b, d_model, t, 1) trans1.first_transpose = trt.Permutation([0, 2, 1]) trans1.reshape_dims = Dims((batch_size, d_model, max_seq_len, 1)) trans1.name = "{}.trans1".format(name) out = trans1.get_output(0) # (b, d_model, t, 1) # Pytorch: output = self.w_1(output) conv1_w = weights["{}.w_1.weight".format( name)] # (1, conv_filter_size, d_model, conv_kernel_size, 1) conv1_b = weights["{}.w_1.bias".format(name)] # (cov_filter_size,) conv1 = network.add_convolution(input=out, num_output_maps=conv_filter_size, kernel_shape=trt.DimsHW( conv_kernel_size, 1), kernel=Weights(conv1_w), bias=Weights(conv1_b)) conv1.padding = trt.DimsHW(1, 0) conv1.name = "{}.conv1".format(name) out = conv1.get_output(0) # (b, conv_filter_size, t, 1) if self.validate_accuracy: self.add_activation_as_output(network, out, "act.{}.conv1".format(name)) # Pytorch: output = F.relu(output) relu = network.add_activation(input=out, type=trt.ActivationType.RELU) relu.name = "{}.relu".format(name) out = relu.get_output(0) # (b, conv_filter_size, t, 1) # Pytorch: output = self.w_2(output) conv2_w = weights["{}.w_2.weight".format( name)] # (1, d_model, conv_filter_size, conv_kernel_size, 1) conv2_b = weights["{}.w_2.bias".format(name)] # (d_model, ) conv2 = network.add_convolution(input=out, num_output_maps=d_model, kernel_shape=trt.DimsHW( conv_kernel_size, 1), kernel=Weights(conv2_w), bias=Weights(conv2_b)) conv2.padding = trt.DimsHW(1, 0) conv2.name = "{}.conv2".format(name) out = conv2.get_output(0) # (b, d_model, t, 1) if self.validate_accuracy: self.add_activation_as_output(network, out, "act.{}.conv2".format(name)) # Pytorch: output = output.transpose(1, 2) trans2 = network.add_shuffle( input=out) # (b, d_model, t, 1) to (b, t, d_model) trans2.first_transpose = trt.Permutation([0, 2, 1, 3]) trans2.reshape_dims = Dims((batch_size, max_seq_len, d_model)) trans2.name = "{}.trans2".format(name) out = trans2.get_output(0) # (b, t, d_model) # Pytorch: output += residual residual = network.add_elementwise(input1=seq_tensor, input2=out, op=trt.ElementWiseOperation.SUM) residual.name = "{}.residual".format(name) out = residual.get_output(0) # (b, t, d_model) if self.validate_accuracy: self.add_activation_as_output(network, out, "act.{}.residual".format(name)) # Pytorch: output = self.layer_norm(output) out = self.populate_layernorm( name="{}.layer_norm".format(name), network=network, weights=weights, seq_tensor=out, batch_size=self.batch_size, max_seq_len=max_seq_len, d_layer=d_model, ) # (b, t, d_model) if self.validate_accuracy: self.add_activation_as_output(network, out, "act.{}.ln".format(name)) return out
def add_conv2d(self, name, input_tensor_name, weights_name, data_format, padding_type, strides): """ Similar to https://github.com/onnx/onnx-tensorrt/blob/6.0-full-dims/builtin_op_importers.cpp#L332 :param name: :param input_tensor_name: :param weights_name: :param data_format: :param padding_type: :param strides: :return: """ input_tensor = self.get_layer_output(input_tensor_name) weights = self.get_layer_weights(weights_name) # Check that the number of spatial dimensions and the kernel shape matches up. nb_spatial_dims = len(input_tensor.shape) - 2 assert nb_spatial_dims == len( weights.shape ) - 2, "input tensor and weights do not have the same rank" # Check that the data of the weights is in NCHW assert 'NCHW' in data_format, "conv2d is in " + data_format + ", not in NCHW" # check for valid padding in pooling layers assert padding_type in [ "VALID", "SAME" ], "Conv2d only supports valid or same padding not " + padding_type # Create empty bias arrays bias = trt.Weights(type=TRTNetworkBuilder._to_dtype(weights.dtype)) #if len(input_names) == 3: # bias = self.get_layer_weights(bias_name) # weight are stored in RSCK where K is the number of output feature maps, # C the number of input channels, and R and S are the height and width of the filter. num_output_maps = weights.shape[-1] kernel_shape = trt.DimsHW(weights.shape[:2]) # Cannot construct Weights object from non-contiguous array. Please use numpy.ascontiguousarray. weights = weights.transpose([3, 2, 0, 1]) weights = np.ascontiguousarray(weights, dtype=weights.dtype) weights = trt.Weights(a=weights) # create layer conv2d_layer = self._network.add_convolution( input=input_tensor, num_output_maps=num_output_maps, kernel_shape=kernel_shape, kernel=weights, bias=bias) conv2d_layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_DOWN if padding_type == "VALID" else trt.PaddingMode.SAME_UPPER #conv2d_layer.pre_padding = trt.DimsHW([1, 1]) #conv2d_layer.post_padding = trt.DimsHW([1, 1]) conv2d_layer.stride = trt.DimsHW(strides[-2:]) self._remember_op_and_output(conv2d_layer, name) return conv2d_layer
def create_engine(max_batch_size, builder, config, dt): weight_map = load_weights(WEIGHT_PATH) network = builder.create_network() data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W)) assert data conv0 = network.add_convolution(input=data, num_output_maps=64, kernel_shape=(7, 7), kernel=weight_map["features.conv0.weight"], bias=trt.Weights()) assert conv0 conv0.stride = (2, 2) conv0.padding = (3, 3) bn0 = add_batch_norm_2d(network, weight_map, conv0.get_output(0), "features.norm0") relu0 = network.add_activation(bn0.get_output(0), type=trt.ActivationType.RELU) assert relu0 pool0 = network.add_pooling(input=relu0.get_output(0), type=trt.PoolingType.MAX, window_size=trt.DimsHW(3, 3)) assert pool0 pool0.stride_nd = (2, 2) pool0.padding_nd = (1, 1) dense1 = add_dense_block(network, pool0.get_output(0), weight_map, 6, "features.denseblock1") transition1 = add_transition(network, dense1.get_output(0), weight_map, 128, "features.transition1") dense2 = add_dense_block(network, transition1.get_output(0), weight_map, 12, "features.denseblock2") transition2 = add_transition(network, dense2.get_output(0), weight_map, 256, "features.transition2") dense3 = add_dense_block(network, transition2.get_output(0), weight_map, 24, "features.denseblock3") transition3 = add_transition(network, dense3.get_output(0), weight_map, 512, "features.transition3") dense4 = add_dense_block(network, transition3.get_output(0), weight_map, 16, "features.denseblock4") bn5 = add_batch_norm_2d(network, weight_map, dense4.get_output(0), "features.norm5") relu5 = network.add_activation(bn5.get_output(0), type=trt.ActivationType.RELU) pool5 = network.add_pooling(relu5.get_output(0), type=trt.PoolingType.AVERAGE, window_size=trt.DimsHW(7, 7)) fc1 = network.add_fully_connected(input=pool5.get_output(0), num_outputs=OUTPUT_SIZE, kernel=weight_map["classifier.weight"], bias=weight_map["classifier.bias"]) assert fc1 fc1.get_output(0).name = OUTPUT_BLOB_NAME network.mark_output(fc1.get_output(0)) # Build Engine builder.max_batch_size = max_batch_size builder.max_workspace_size = 1 << 20 engine = builder.build_engine(network, config) del network del weight_map return engine