def skipln(prefix, config, init_dict, network, input_tensor, skip): """ Add the skip layer """ hidden_size = config.hidden_size dtype = config.get_trt_dtype() pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) wbeta = init_dict[prefix + "beta"] pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32) wgamma = init_dict[prefix + "gamma"] pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32) pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32) if config.use_int8 and config.interleaved: pfc = trt.PluginFieldCollection([pf_beta, pf_gamma]) skipln_plug = skln_plg_creator3.create_plugin("skipln", pfc) else: pfc = trt.PluginFieldCollection([pf_ld, pf_beta, pf_gamma, pf_type]) skipln_plug = skln_plg_creator2.create_plugin("skipln", pfc) skipln_inputs = [input_tensor, skip] layer = network.add_plugin_v2(skipln_inputs, skipln_plug) return layer
def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask): """ Add the attention layer """ assert(len(input_tensor.shape) == 5) B, S, hidden_size, _, _ = input_tensor.shape num_heads = config.num_attention_heads head_size = int(hidden_size / num_heads) Wall = init_dict[prefix + WQKV] Ball = init_dict[prefix + BQKV] # FC_attention if config.use_int8: mult_all = network.add_convolution(input_tensor, 3 * hidden_size, (1, 1), Wall, Ball) else: mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball) if config.use_qat: dr_qkv = max( init_dict[prefix + 'self_qv_a_input_quantizer_amax'], init_dict[prefix + 'self_qv_b_input_quantizer_amax'], init_dict[prefix + 'self_av_b_input_quantizer_amax'], ) set_output_range(mult_all, dr_qkv) set_output_name(mult_all, prefix, "qkv_mult") has_mask = imask is not None # QKV2CTX dtype = trt.float32 if config.use_fp16: dtype = trt.float16 # Multi-head attention doesn't use INT8 inputs and output by default unless it is specified. if config.use_int8 and config.use_int8_multihead and not config.is_calib_mode: dtype = trt.int8 pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32) pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32) pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32) if config.use_qat: dr_probs = init_dict[prefix + 'self_av_a_input_quantizer_amax'] dq_probs = dr_probs / 127.0 pf_dq_probs = trt.PluginField("dq_probs", np.array([dq_probs], np.float32), trt.PluginFieldType.FLOAT32) pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type, pf_dq_probs]) else: pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type]) qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc) qkv_in = [mult_all.get_output(0)] if has_mask: qkv_in.append(imask) qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug) if config.use_qat: dr_ctx = init_dict[prefix + 'output_dense_input_amax'] set_output_range(qkv2ctx, dr_ctx) set_output_name(qkv2ctx, prefix, "context_layer") return qkv2ctx
def get_trt_plugin(plugin_name): plugin = None for plugin_creator in PLUGIN_CREATORS: if (plugin_creator.name == "Normalize_TRT") and \ (plugin_name == "Normalize_TRT"): nbWeights = trt.PluginField("nbWeights", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32) eps = trt.PluginField("eps", np.array([0.00001], dtype=np.float32), trt.PluginFieldType.FLOAT32) weights = trt.PluginField('weights', np.array([1] * 16, dtype=np.float32), trt.PluginFieldType.FLOAT32) field_collection = trt.PluginFieldCollection( [weights, eps, nbWeights]) plugin = plugin_creator.create_plugin( name=plugin_name, field_collection=field_collection) break elif (plugin_creator.name == "CustomGeluPluginDynamic") and (plugin_name == "CustomGeluPluginDynamic"): type_id = trt.PluginField("type_id", np.array([0], np.int32), trt.PluginFieldType.INT32) bias = trt.PluginField("bias", np.array([[[1]]], np.float32), trt.PluginFieldType.FLOAT32) field_collection = trt.PluginFieldCollection([type_id, bias]) plugin = plugin_creator.create_plugin( name=plugin_name, field_collection=field_collection) break return plugin
def emb_layernorm(builder, network, config, weights_dict, builder_config, max_sequence_length, max_batch_size): input_ids = network.add_input(name="input_ids", dtype=trt.int32, shape=(-1,)) segment_ids = network.add_input(name="segment_ids", dtype=trt.int32, shape=(-1,)) cu_seqlens = network.add_input(name="cu_seqlens", dtype=trt.int32, shape=(-1,)) max_seqlen = network.add_input(name="max_seqlen", dtype=trt.int32, shape=(-1,)) # Specify profiles profile = builder.create_optimization_profile() min_shape = (1,) shape = (max_sequence_length*max_batch_size,) profile.set_shape("input_ids", min=min_shape, opt=shape, max=shape) profile.set_shape("segment_ids", min=min_shape, opt=shape, max=shape) profile.set_shape("cu_seqlens", min=min_shape, opt=(max_batch_size+1,), max=(max_batch_size+1,)) profile.set_shape("max_seqlen", min=min_shape, opt=(max_sequence_length,), max=(max_sequence_length,)) builder_config.add_optimization_profile(profile) wbeta = trt.PluginField("bert_embeddings_layernorm_beta", weights_dict["bert_embeddings_layernorm_beta"].numpy(), trt.PluginFieldType.FLOAT32) wgamma = trt.PluginField("bert_embeddings_layernorm_gamma", weights_dict["bert_embeddings_layernorm_gamma"].numpy(), trt.PluginFieldType.FLOAT32) wwordemb = trt.PluginField("bert_embeddings_word_embeddings", weights_dict["bert_embeddings_word_embeddings"].numpy(), trt.PluginFieldType.FLOAT32) wtokemb = trt.PluginField("bert_embeddings_token_type_embeddings", weights_dict["bert_embeddings_token_type_embeddings"].numpy(), trt.PluginFieldType.FLOAT32) wposemb = trt.PluginField("bert_embeddings_position_embeddings", weights_dict["bert_embeddings_position_embeddings"].numpy(), trt.PluginFieldType.FLOAT32) output_fp16 = trt.PluginField("output_fp16", np.array([1 if config.use_fp16 or config.use_int8 else 0]).astype(np.int32), trt.PluginFieldType.INT32) pfc = trt.PluginFieldCollection([wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16]) fn = emln_plg_creator2.create_plugin("embeddings", pfc) inputs = [input_ids, segment_ids, cu_seqlens, max_seqlen] emb_layer = network.add_plugin_v2(inputs, fn) if config.use_int8 and config.use_qat: dr_input = weights_dict['l0_attention_self_query_input_amax'] set_output_range(emb_layer, dr_input) set_output_name(emb_layer, "embeddings_", "output") return emb_layer, cu_seqlens, max_seqlen
def dcn_pack(network, weights, feat, offset, name_prefix, dim2): out = network.add_convolution(offset.get_output(0), 72 * 3, (3, 3), weights[name_prefix + '.conv_offset.weight'], weights[name_prefix + '.conv_offset.bias']) out.stride = (1, 1) out.padding = (1, 1) offset = network.add_slice(out.get_output(0), (0, 0, 0, 0, 0), (1, 1, 72 * 2) + dim2, (1, 1, 1, 1, 1)) mask = network.add_slice(out.get_output(0), (0, 0, 72 * 2, 0, 0), (1, 1, 72) + dim2, (1, 1, 1, 1, 1)) weight = network.add_constant(weights[name_prefix + '.weight'].shape, weights[name_prefix + '.weight']) bias = network.add_constant(weights[name_prefix + '.bias'].shape, weights[name_prefix + '.bias']) plugin_creator = get_plugin_creator('DeformConvPlugin') if plugin_creator == None: print('Plugin DeformConvPlugin not found. Exiting') exit() print('plugin input', feat.get_output(0).shape) print('plugin weight', weight.get_output(0).shape) print('plugin bias', bias.get_output(0).shape) print('plugin offset', offset.get_output(0).shape) print('plugin mask', mask.get_output(0).shape) return network.add_plugin_v2([ feat.get_output(0), weight.get_output(0), bias.get_output(0), offset.get_output(0), mask.get_output(0) ], plugin_creator.create_plugin( 'DeformConvPlugin', tensorrt.PluginFieldCollection()))
def add_nms(self, input_tensors): shareLocation = trt.PluginField("shareLocation", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32) backgroundLabelId = trt.PluginField("backgroundLabelId", np.array([-1], dtype=np.int32), trt.PluginFieldType.INT32) numClasses = trt.PluginField("numClasses", np.array([3], dtype=np.int32), trt.PluginFieldType.INT32) topK = trt.PluginField("topK", np.array([300], dtype=np.int32), trt.PluginFieldType.INT32) keepTopK = trt.PluginField("keepTopK", np.array([100], dtype=np.int32), trt.PluginFieldType.INT32) scoreThreshold = trt.PluginField("scoreThreshold", np.array([0.65], dtype=np.float32), trt.PluginFieldType.FLOAT32) iouThreshold = trt.PluginField("iouThreshold", np.array([0.5], dtype=np.float32), trt.PluginFieldType.FLOAT32) isNormalized = trt.PluginField("isNormalized", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32) clipBoxes = trt.PluginField("clipBoxes", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32) field_collection = trt.PluginFieldCollection([ shareLocation, backgroundLabelId, numClasses, topK, keepTopK, scoreThreshold, iouThreshold, isNormalized, clipBoxes ]) nms = nmsCreator.create_plugin(name='BatchedNMS_TRT', field_collection=field_collection) return self.network.add_plugin_v2( inputs=[input_tensors.get_output(x) for x in range(2)], plugin=nms)
def create_deformable_pool_plugin(layer_name, out_size, spatial_scale, sampling_ratio, gamma): creator = trt.get_plugin_registry().get_plugin_creator( 'DeformablePoolPluginDynamic', '1', '') if not isinstance(out_size, Iterable): out_size = [out_size, out_size] pfc = trt.PluginFieldCollection() pf_out_size = trt.PluginField('out_size', np.array(out_size, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_out_size) pf_spatial_scale = trt.PluginField( 'spatial_scale', np.array([spatial_scale], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_spatial_scale) pf_sampling_ratio = trt.PluginField( 'sampling_ratio', np.array([sampling_ratio], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_sampling_ratio) pf_gamma = trt.PluginField('gamma', np.array([gamma], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_gamma) return creator.create_plugin(layer_name, pfc)
def build_engine(shape_indices): plugin_creator = get_plugin_creator('OnehotPlugin') if plugin_creator == None: print('OnehotPlugin plugin not found. Exiting') exit() builder = trt.Builder(logger) network = builder.create_network( flags=1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) tensor_indices = network.add_input('indices', trt.DataType.INT32, shape_indices) depth = 10 layer = network.add_plugin_v2([tensor_indices], plugin_creator.create_plugin( 'OnehotPlugin', trt.PluginFieldCollection([ trt.PluginField( 'depth', np.array([depth], dtype=np.int32), trt.PluginFieldType.INT32) ]))) network.mark_output(layer.get_output(0)) return builder.build_engine(network, builder.create_builder_config())
def create_roipool_plugin(layer_name, out_size, featmap_strides, roi_scale_factor, finest_scale): creator = trt.get_plugin_registry().get_plugin_creator( 'RoiPoolPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_out_size = trt.PluginField('out_size', np.array([out_size], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_out_size) pf_featmap_strides = trt.PluginField( 'featmap_strides', np.array(featmap_strides).astype(np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_featmap_strides) pf_roi_scale_factor = trt.PluginField( 'roi_scale_factor', np.array([roi_scale_factor], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_roi_scale_factor) pf_finest_scale = trt.PluginField('finest_scale', np.array([finest_scale], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_finest_scale) return creator.create_plugin(layer_name, pfc)
def getTopKAveragePlugin(nTopK, maxTopK): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == 'TopKAveragePlugin': p0 = trt.PluginField("nTopK", np.array([nTopK], dtype=np.int32), trt.PluginFieldType.INT32) p1 = trt.PluginField("maxTopK", np.array([maxTopK], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin('TopKAveragePlugin', trt.PluginFieldCollection([p0, p1])) return None
def create_layernorm_plugin(layer_name, normalized_shape, W, B, eps=1e-5, type_id=trt.DataType.FLOAT): creator = trt.get_plugin_registry().get_plugin_creator( 'LayerNormPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_normalized_shape = trt.PluginField( "normalized_shape", np.array(normalized_shape, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_normalized_shape) pf_eps = trt.PluginField("eps", np.array([eps], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_eps) pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32) pfc.append(pf_W) pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32) pfc.append(pf_B) pf_type_id = trt.PluginField("type_id", np.array([type_id], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_type_id) return creator.create_plugin(layer_name, pfc)
def convert_grid_sample(ctx): # parse args input = ctx.method_args[0] dim = ctx.method_args[1] index = ctx.method_args[2] output = ctx.method_return dim = convert_dim(dim, input.dim()) # get tensorrt input input_trt = add_missing_trt_tensors(ctx.network, [input])[0] index_trt = add_missing_trt_tensors(ctx.network, [index])[0] # add tensorrt layer creator = trt.get_plugin_registry().get_plugin_creator( 'GatherElementsPlugins', '1') assert creator is not None, 'Has no GatherElementsPlugins version 1' fc = [] fc.append( trt.PluginField(name='dim', data=np.array([dim], dtype=np.int32), type=trt.PluginFieldType.INT32)) fc = trt.PluginFieldCollection(fc) plugin = creator.create_plugin('GatherElementsPlugins', fc) layer = ctx.network.add_plugin_v2([input_trt, index_trt], plugin) # get tensorrt output output._trt = layer.get_output(0)
def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask): """ Add the attention layer """ assert len(input_tensor.shape) == 5 B, S, hidden_size, _, _ = input_tensor.shape num_heads = config.num_attention_heads head_size = int(hidden_size / num_heads) Wall = init_dict[prefix + WQKV] Ball = init_dict[prefix + BQKV] mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball) set_layer_name(mult_all, prefix, "qkv_mult") has_mask = imask is not None pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32,) pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32) pf_S = trt.PluginField("S", np.array([S], np.int32), trt.PluginFieldType.INT32) pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32) pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_S, pf_has_mask]) qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc) qkv_in = [mult_all.get_output(0), imask] qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug) set_layer_name(qkv2ctx, prefix, "context_layer") return qkv2ctx
def create_plugins(self): # create "adding positional encoding" plugin self.plugins['AddPosEncPlugin'] = self.get_plugin_creator( 'AddPosEncPlugin').create_plugin('AddPosEncPlugin', trt.PluginFieldCollection()) # create "repeat" plugin self.plugins['RepeatPlugin'] = self.get_plugin_creator( 'RepeatPlugin').create_plugin( 'RepeatPlugin', trt.PluginFieldCollection([ trt.PluginField( 'maxOutputLength', np.array([self.trt_max_output_seq_len], dtype=np.int32), trt.PluginFieldType.INT32) ]))
def build_engine(shape, shape2): plugin_creator = get_plugin_creator('RepeatPlugin') if plugin_creator == None: print('Plugin not found. Exiting') exit() builder = trt.Builder(logger) builder.max_batch_size = 1024 builder.max_workspace_size = 1 << 20 builder.fp16_mode = use_fp16 network = builder.create_network() tensor = network.add_input('input1', trt.DataType.FLOAT, shape) tensor2 = network.add_input('input2', trt.DataType.FLOAT, shape2) tensor = network.add_plugin_v2([tensor, tensor2], plugin_creator.create_plugin( 'RepeatPlugin', trt.PluginFieldCollection([ trt.PluginField( 'maxOutputLength', np.array([MAX_OUTPUT_LENGTH], dtype=np.int32), trt.PluginFieldType.INT32) ]))).get_output(0) network.mark_output(tensor) return builder.build_cuda_engine(network)
def getSortPlugin(): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == 'SortPlugin': p0 = trt.PluginField("descending", np.array([0], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0])) return None
def create_carafefeaturereassemble_plugin(layer_name, scale_factor, up_kernel, up_group, type_id=trt.DataType.FLOAT): creator = trt.get_plugin_registry().get_plugin_creator( 'CarafeFeatureReassemblePluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_scale_factor = trt.PluginField("scale_factor", np.array([scale_factor], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_scale_factor) pf_up_kernel = trt.PluginField("up_kernel", np.array([up_kernel], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_up_kernel) pf_up_group = trt.PluginField("up_group", np.array([up_group], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_up_group) pf_type_id = trt.PluginField("type_id", np.array([type_id], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_type_id) return creator.create_plugin(layer_name, pfc)
def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None): """ Add the skip layer """ idims = input_tensor.shape assert len(idims) == 5 hidden_size = idims[2] dtype = trt.float32 if config.use_fp16: dtype = trt.float16 # Skip layernorm doesn't use INT8 inputs and output by default unless it is specified. if config.use_int8 and config.use_int8_skipln and not config.is_calib_mode: dtype = trt.int8 pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) wbeta = init_dict[prefix + "beta"] pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32) wgamma = init_dict[prefix + "gamma"] pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32) pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32) fields = [pf_ld, pf_beta, pf_gamma, pf_type ] if bias: pf_bias = trt.PluginField("bias", bias.numpy(), trt.PluginFieldType.FLOAT32) fields.append(pf_bias) pfc = trt.PluginFieldCollection(fields) skipln_plug = skln_plg_creator.create_plugin("skipln", pfc) skipln_inputs = [input_tensor, skip] layer = network.add_plugin_v2(skipln_inputs, skipln_plug) return layer
def skipln(prefix, init_dict, network, input_tensor, skip): """ Add the skip layer """ idims = input_tensor.shape assert len(idims) == 5 hidden_size = idims[2] pf_type_id = trt.PluginField("type_id", np.array([0], dtype=np.float32), trt.PluginFieldType.INT32) pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) wbeta = init_dict[prefix + "beta"] pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32) wgamma = init_dict[prefix + "gamma"] pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32) pfc = trt.PluginFieldCollection([pf_type_id, pf_ld, pf_beta, pf_gamma]) skipln_plug = skln_plg_creator.create_plugin("skipln", pfc) skipln_inputs = [input_tensor, skip] layer = network.add_plugin_v2(skipln_inputs, skipln_plug) return layer
def get_dlrm_interactions_plugin(self, plugin_name, tableOffsets, interactionsOutputInterleaved): """Create a plugin layer for the DLRM Interactions plugin and return it. DLRM Interactions plugin takes two inputs: from bottom MLP and categorical input and looks up their embeddings. Since DLRM embeddings can be larger than GPU memory, the plugin keeps the most frequently used embeddings on GPU and rest on host and manages the lookup with good performance. """ plugin = None for plugin_creator in trt.get_plugin_registry().plugin_creator_list: if plugin_creator.name == plugin_name: embeddingSize_field = trt.PluginField("embeddingSize", np.array([self.embedding_size], dtype=np.int32), trt.PluginFieldType.INT32) embeddingRows_field = trt.PluginField("embeddingRows", np.array([self.embedding_rows_total], dtype=np.int32), trt.PluginFieldType.INT32) reducedPrecisionIO_field = trt.PluginField("reducedPrecisionIO", np.array( [0 if self.need_calibration else (1 if self.precision == "fp16" else 2)], dtype=np.int32), trt.PluginFieldType.INT32) embeddingWeightsOnGpuPart_field = trt.PluginField("embeddingWeightsOnGpuPart", np.array([self.embedding_weights_on_gpu_part], dtype=np.float32), trt.PluginFieldType.FLOAT32) interactionsOutputInterleaved_field = trt.PluginField("interactionsOutputInterleaved", np.array([1 if interactionsOutputInterleaved else 0], dtype=np.int32), trt.PluginFieldType.INT32) tableOffsets_field = trt.PluginField("tableOffsets", tableOffsets, trt.PluginFieldType.INT32) embeddingWeightsFilepath_field = trt.PluginField("embeddingWeightsFilepath", np.array(list(self.embedding_weights_binary_filepath.encode()), dtype=np.int8), trt.PluginFieldType.CHAR) if self.use_row_frequencies: rowFrequenciesFilepath_field = trt.PluginField("rowFrequenciesFilepath", np.array(list(self.row_frequencies_binary_filepath.encode()), dtype=np.int8), trt.PluginFieldType.CHAR) else: rowFrequenciesFilepath_field = trt.PluginField("rowFrequenciesFilepath", np.array(list("".encode()), dtype=np.int8), trt.PluginFieldType.CHAR) output_padding_field = trt.PluginField("outputPaddingGranularity", np.array([self.output_padding], dtype=np.int32), trt.PluginFieldType.INT32) field_collection = trt.PluginFieldCollection([embeddingSize_field, embeddingRows_field, reducedPrecisionIO_field, embeddingWeightsOnGpuPart_field, interactionsOutputInterleaved_field, output_padding_field, tableOffsets_field, embeddingWeightsFilepath_field, rowFrequenciesFilepath_field]) plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection) return plugin
def build_engine(builder, input_shape): plugin_creator = get_plugin_creator('AddPlugin') if plugin_creator == None: print('Plugin not found. Exiting') exit() config = builder.create_builder_config() config.max_workspace_size = 1 << 20 builder.max_batch_size = 8 network = builder.create_network() tensor = network.add_input('data', tensorrt.DataType.FLOAT, input_shape) layer = network.add_plugin_v2([tensor], plugin_creator.create_plugin( 'AddPlugin', tensorrt.PluginFieldCollection([ tensorrt.PluginField( 'valueToAdd', np.array([100.0], dtype=np.float32), tensorrt.PluginFieldType.FLOAT32) ]))) tensor = layer.get_output(0) network.mark_output(tensor) return builder.build_engine(network, config)
def create_meshgrid_plugin(layer_name, num_inputs, slice_dims=[2, 3], starts=[0., 0.], strides=[1., 1.]): creator = trt.get_plugin_registry().get_plugin_creator( 'MeshGridPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_num_inputs = trt.PluginField( "num_inputs", np.array([int(num_inputs)], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_num_inputs) pf_slice_dims = trt.PluginField("slice_dims", np.array(slice_dims, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_slice_dims) pf_starts = trt.PluginField("starts", np.array(starts, dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_starts) pf_strides = trt.PluginField("strides", np.array(strides, dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_strides) return creator.create_plugin(layer_name, pfc)
def build_engine(shape): plugin_creator = get_plugin_creator('AddPlugin') if plugin_creator == None: print('Plugin not found. Exiting') exit() builder = trt.Builder(logger) builder.max_batch_size = 1024 builder.max_workspace_size = 1 << 20 builder.fp16_mode = use_fp16 network = builder.create_network() tensor = network.add_input('data', trt.DataType.FLOAT, shape) for _ in range(10): tensor = network.add_plugin_v2([tensor], plugin_creator.create_plugin( 'AddPlugin', trt.PluginFieldCollection([ trt.PluginField( 'valueToAdd', np.array([10.0], dtype=np.float32), trt.PluginFieldType.FLOAT32) ]))).get_output(0) network.mark_output(tensor) return builder.build_cuda_engine(network)
def optimize(self, network, point): fields = trt.PluginFieldCollection() saved = [] #values must be alive when creating the plugin. inputs = [network.get_layer(point[0]).get_input(0)] append_fields(network, point[0], fields, saved, self.scale_map) append_fields(network, point[0] + 2, fields, saved, self.scale_map) append_fields(network, point[0] + 4, fields, saved, self.scale_map) plugin = self.creator.create_plugin(self.name, fields) if plugin is None: raise Exception("Plugin creation failed") plugin_layer = network.add_plugin_v2(inputs, plugin) plugin_layer.name = self.name + "_%d" % self.count self.count += 1 origin_output = network.get_layer(point[1]).get_output(0) plugin_output = plugin_layer.get_output(0) assert (origin_output.name in self.scale_map), "%s not found!" % origin_output.name dynamic_range = self.scale_map[origin_output.name] * 127.0 plugin_output.set_dynamic_range(-dynamic_range, dynamic_range) for j in range(network.num_layers): layer = network.get_layer(j) if layer.name == plugin_layer.name: continue for k in range(layer.num_inputs): if layer.get_input(k) == origin_output: layer.set_input(k, plugin_output)
def add_yoloHead(self, input_tensors): mh = ModelData.INPUT_SHAPE[1] / 32 mw = ModelData.INPUT_SHAPE[2] / 32 anchors = np.array( [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], dtype=np.float32) num_cls = trt.PluginField("num_cls", np.array([3], dtype=np.int32), trt.PluginFieldType.INT32) max_det = trt.PluginField("max_det", np.array([3024], dtype=np.int32), trt.PluginFieldType.INT32) heights = trt.PluginField( "heights", np.array([mh * 4, mh * 2, mh], dtype=np.int32), trt.PluginFieldType.INT32) widths = trt.PluginField( "widths", np.array([mw * 4, mw * 2, mw], dtype=np.int32), trt.PluginFieldType.INT32) strides = trt.PluginField("strides", np.array([8, 16, 32], dtype=np.int32), trt.PluginFieldType.INT32) anchors = trt.PluginField("anchors", anchors, trt.PluginFieldType.FLOAT32) field_collection = trt.PluginFieldCollection( [num_cls, max_det, heights, widths, strides, anchors]) yoloHead = YoloCreator.create_plugin(name='Yolo_TRT', field_collection=field_collection) return self.network.add_plugin_v2( inputs=[x.get_output(0) for x in input_tensors], plugin=yoloHead)
def create_torchembedding_plugin(layer_name, weight): creator = trt.get_plugin_registry().get_plugin_creator( 'TorchEmbeddingPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() num_embeddings = weight.shape[0] embedding_dim = weight.shape[1] pf_num_embeddings = trt.PluginField( "num_embeddings", np.array([num_embeddings], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_num_embeddings) pf_embedding_dim = trt.PluginField( "embedding_dim", np.array([embedding_dim], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_embedding_dim) pf_weight = trt.PluginField("weight", np.array(weight, dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_weight) return creator.create_plugin(layer_name, pfc)
def getLayerNormPlugin(): for c in trt.get_plugin_registry().plugin_creator_list: #print(c.name) if c.name == 'LayerNorm': p0 = trt.PluginField('epsilon', np.float32(epsilon), trt.PluginFieldType.FLOAT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0])) return None
def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None): """ Add the skip layer """ idims = input_tensor.shape assert len(idims) == 5 hidden_size = idims[2] pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) wbeta = init_dict[prefix + "bias"] pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32) wgamma = init_dict[prefix + "weight"] pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32) pf_type = trt.PluginField( "type_id", np.array([1 if config.use_fp16 else 0], np.int32), trt.PluginFieldType.INT32) fields = [pf_ld, pf_beta, pf_gamma, pf_type] if bias: pf_bias = trt.PluginField("bias", bias.numpy(), trt.PluginFieldType.FLOAT32) fields.append(pf_bias) pfc = trt.PluginFieldCollection(fields) skipln_plug = skln_plg_creator.create_plugin("skipln", pfc) skipln_inputs = [input_tensor, skip] layer = network.add_plugin_v2(skipln_inputs, skipln_plug) return layer
def add_plugin(cls, network): """ Adapted from https://github.com/jkjung-avt/tensorrt_demos """ def get_plugin_creator(plugin_name): plugin_creators = trt.get_plugin_registry().plugin_creator_list for plugin_creator in plugin_creators: if plugin_creator.name == plugin_name: return plugin_creator return None plugin_creator = get_plugin_creator("YoloLayer_TRT") if not plugin_creator: raise RuntimeError("Failed to get YoloLayer_TRT plugin creator") old_tensors = [ network.get_output(i) for i in range(network.num_outputs) ] new_tensors = [] plugin = network.add_plugin_v2( old_tensors, plugin_creator.create_plugin("YoloLayer_TRT", trt.PluginFieldCollection([])), ) new_tensors = plugin.get_output(0) network.mark_output(new_tensors) for old_tensor in old_tensors: network.unmark_output(old_tensor) return network
def create_torchunfold_plugin(layer_name, kernel_size, dilation, padding, stride): creator = trt.get_plugin_registry().get_plugin_creator( 'TorchUnfoldPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) pf_kernel_size = trt.PluginField('kernel_size', np.array(kernel_size, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_kernel_size) if isinstance(dilation, int): dilation = (dilation, dilation) pf_dilation = trt.PluginField('dilation', np.array(dilation, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_dilation) if isinstance(padding, int): padding = (padding, padding) pf_padding = trt.PluginField('padding', np.array(padding, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_padding) if isinstance(stride, int): stride = (stride, stride) pf_stride = trt.PluginField('stride', np.array(stride, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_stride) return creator.create_plugin(layer_name, pfc)