示例#1
0
def skipln(prefix, config, init_dict, network, input_tensor, skip):
    """
    Add the skip layer
    """
    hidden_size = config.hidden_size
    dtype = config.get_trt_dtype()

    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32),
                            trt.PluginFieldType.INT32)
    wbeta = init_dict[prefix + "beta"]
    pf_beta = trt.PluginField("beta", wbeta.numpy(),
                              trt.PluginFieldType.FLOAT32)
    wgamma = init_dict[prefix + "gamma"]
    pf_gamma = trt.PluginField("gamma", wgamma.numpy(),
                               trt.PluginFieldType.FLOAT32)
    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32),
                              trt.PluginFieldType.INT32)

    if config.use_int8 and config.interleaved:
        pfc = trt.PluginFieldCollection([pf_beta, pf_gamma])
        skipln_plug = skln_plg_creator3.create_plugin("skipln", pfc)
    else:
        pfc = trt.PluginFieldCollection([pf_ld, pf_beta, pf_gamma, pf_type])
        skipln_plug = skln_plg_creator2.create_plugin("skipln", pfc)

    skipln_inputs = [input_tensor, skip]
    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
    return layer
示例#2
0
def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
    """
    Add the attention layer
    """
    assert(len(input_tensor.shape) == 5)
    B, S, hidden_size, _, _ = input_tensor.shape
    num_heads = config.num_attention_heads
    head_size = int(hidden_size / num_heads)

    Wall = init_dict[prefix + WQKV]
    Ball = init_dict[prefix + BQKV]

    # FC_attention
    if config.use_int8:
        mult_all = network.add_convolution(input_tensor, 3 * hidden_size, (1, 1), Wall, Ball)
    else:
        mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball)

    if config.use_qat:
        dr_qkv = max(
            init_dict[prefix + 'self_qv_a_input_quantizer_amax'],
            init_dict[prefix + 'self_qv_b_input_quantizer_amax'],
            init_dict[prefix + 'self_av_b_input_quantizer_amax'],
        )
        set_output_range(mult_all, dr_qkv)
    set_output_name(mult_all, prefix, "qkv_mult")

    has_mask = imask is not None

    # QKV2CTX
    dtype = trt.float32
    if config.use_fp16:
        dtype = trt.float16
    # Multi-head attention doesn't use INT8 inputs and output by default unless it is specified.
    if config.use_int8 and config.use_int8_multihead and not config.is_calib_mode:
        dtype = trt.int8

    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)
    pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
    pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32)
    pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32)
    if config.use_qat:
        dr_probs = init_dict[prefix + 'self_av_a_input_quantizer_amax']
        dq_probs = dr_probs / 127.0
        pf_dq_probs =  trt.PluginField("dq_probs", np.array([dq_probs], np.float32), trt.PluginFieldType.FLOAT32)
        pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type, pf_dq_probs])
    else:
        pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type])
    qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc)

    qkv_in = [mult_all.get_output(0)]
    if has_mask:
        qkv_in.append(imask)
    qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug)

    if config.use_qat:
        dr_ctx = init_dict[prefix + 'output_dense_input_amax']
        set_output_range(qkv2ctx, dr_ctx)
    set_output_name(qkv2ctx, prefix, "context_layer")
    return qkv2ctx
def get_trt_plugin(plugin_name):
    plugin = None
    for plugin_creator in PLUGIN_CREATORS:
        if (plugin_creator.name == "Normalize_TRT") and \
                (plugin_name == "Normalize_TRT"):
            nbWeights = trt.PluginField("nbWeights",
                                        np.array([1], dtype=np.int32),
                                        trt.PluginFieldType.INT32)
            eps = trt.PluginField("eps", np.array([0.00001], dtype=np.float32),
                                  trt.PluginFieldType.FLOAT32)
            weights = trt.PluginField('weights',
                                      np.array([1] * 16, dtype=np.float32),
                                      trt.PluginFieldType.FLOAT32)
            field_collection = trt.PluginFieldCollection(
                [weights, eps, nbWeights])
            plugin = plugin_creator.create_plugin(
                name=plugin_name, field_collection=field_collection)
            break
        elif (plugin_creator.name
              == "CustomGeluPluginDynamic") and (plugin_name
                                                 == "CustomGeluPluginDynamic"):
            type_id = trt.PluginField("type_id", np.array([0], np.int32),
                                      trt.PluginFieldType.INT32)
            bias = trt.PluginField("bias", np.array([[[1]]], np.float32),
                                   trt.PluginFieldType.FLOAT32)
            field_collection = trt.PluginFieldCollection([type_id, bias])
            plugin = plugin_creator.create_plugin(
                name=plugin_name, field_collection=field_collection)
            break
    return plugin
def emb_layernorm(builder, network, config, weights_dict, builder_config, max_sequence_length, max_batch_size):
    input_ids = network.add_input(name="input_ids", dtype=trt.int32, shape=(-1,))
    segment_ids = network.add_input(name="segment_ids", dtype=trt.int32, shape=(-1,))
    cu_seqlens = network.add_input(name="cu_seqlens", dtype=trt.int32, shape=(-1,))
    max_seqlen = network.add_input(name="max_seqlen", dtype=trt.int32, shape=(-1,))

    # Specify profiles 
    profile = builder.create_optimization_profile()
    min_shape = (1,)
    shape = (max_sequence_length*max_batch_size,)
    profile.set_shape("input_ids", min=min_shape, opt=shape, max=shape)
    profile.set_shape("segment_ids", min=min_shape, opt=shape, max=shape)
    profile.set_shape("cu_seqlens", min=min_shape, opt=(max_batch_size+1,), max=(max_batch_size+1,))
    profile.set_shape("max_seqlen", min=min_shape, opt=(max_sequence_length,), max=(max_sequence_length,))
    builder_config.add_optimization_profile(profile)

    wbeta = trt.PluginField("bert_embeddings_layernorm_beta", weights_dict["bert_embeddings_layernorm_beta"].numpy(), trt.PluginFieldType.FLOAT32)
    wgamma = trt.PluginField("bert_embeddings_layernorm_gamma", weights_dict["bert_embeddings_layernorm_gamma"].numpy(), trt.PluginFieldType.FLOAT32)
    wwordemb = trt.PluginField("bert_embeddings_word_embeddings", weights_dict["bert_embeddings_word_embeddings"].numpy(), trt.PluginFieldType.FLOAT32)
    wtokemb = trt.PluginField("bert_embeddings_token_type_embeddings", weights_dict["bert_embeddings_token_type_embeddings"].numpy(), trt.PluginFieldType.FLOAT32)
    wposemb = trt.PluginField("bert_embeddings_position_embeddings", weights_dict["bert_embeddings_position_embeddings"].numpy(), trt.PluginFieldType.FLOAT32)

    output_fp16 = trt.PluginField("output_fp16", np.array([1 if config.use_fp16 or config.use_int8 else 0]).astype(np.int32), trt.PluginFieldType.INT32)

    pfc = trt.PluginFieldCollection([wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16])
    fn = emln_plg_creator2.create_plugin("embeddings", pfc)

    inputs = [input_ids, segment_ids, cu_seqlens, max_seqlen]
    emb_layer = network.add_plugin_v2(inputs, fn)

    if config.use_int8 and config.use_qat:
        dr_input = weights_dict['l0_attention_self_query_input_amax'] 
        set_output_range(emb_layer, dr_input)
    set_output_name(emb_layer, "embeddings_", "output")
    return emb_layer, cu_seqlens, max_seqlen
示例#5
0
def dcn_pack(network, weights, feat, offset, name_prefix, dim2):
    out = network.add_convolution(offset.get_output(0), 72 * 3, (3, 3),
                                  weights[name_prefix + '.conv_offset.weight'],
                                  weights[name_prefix + '.conv_offset.bias'])
    out.stride = (1, 1)
    out.padding = (1, 1)
    offset = network.add_slice(out.get_output(0), (0, 0, 0, 0, 0),
                               (1, 1, 72 * 2) + dim2, (1, 1, 1, 1, 1))
    mask = network.add_slice(out.get_output(0), (0, 0, 72 * 2, 0, 0),
                             (1, 1, 72) + dim2, (1, 1, 1, 1, 1))

    weight = network.add_constant(weights[name_prefix + '.weight'].shape,
                                  weights[name_prefix + '.weight'])
    bias = network.add_constant(weights[name_prefix + '.bias'].shape,
                                weights[name_prefix + '.bias'])
    plugin_creator = get_plugin_creator('DeformConvPlugin')
    if plugin_creator == None:
        print('Plugin DeformConvPlugin not found. Exiting')
        exit()
    print('plugin input', feat.get_output(0).shape)
    print('plugin weight', weight.get_output(0).shape)
    print('plugin bias', bias.get_output(0).shape)
    print('plugin offset', offset.get_output(0).shape)
    print('plugin mask', mask.get_output(0).shape)
    return network.add_plugin_v2([
        feat.get_output(0),
        weight.get_output(0),
        bias.get_output(0),
        offset.get_output(0),
        mask.get_output(0)
    ],
                                 plugin_creator.create_plugin(
                                     'DeformConvPlugin',
                                     tensorrt.PluginFieldCollection()))
示例#6
0
    def add_nms(self, input_tensors):
        shareLocation = trt.PluginField("shareLocation",
                                        np.array([1], dtype=np.int32),
                                        trt.PluginFieldType.INT32)
        backgroundLabelId = trt.PluginField("backgroundLabelId",
                                            np.array([-1], dtype=np.int32),
                                            trt.PluginFieldType.INT32)
        numClasses = trt.PluginField("numClasses", np.array([3],
                                                            dtype=np.int32),
                                     trt.PluginFieldType.INT32)
        topK = trt.PluginField("topK", np.array([300], dtype=np.int32),
                               trt.PluginFieldType.INT32)
        keepTopK = trt.PluginField("keepTopK", np.array([100], dtype=np.int32),
                                   trt.PluginFieldType.INT32)
        scoreThreshold = trt.PluginField("scoreThreshold",
                                         np.array([0.65], dtype=np.float32),
                                         trt.PluginFieldType.FLOAT32)
        iouThreshold = trt.PluginField("iouThreshold",
                                       np.array([0.5], dtype=np.float32),
                                       trt.PluginFieldType.FLOAT32)
        isNormalized = trt.PluginField("isNormalized",
                                       np.array([1], dtype=np.int32),
                                       trt.PluginFieldType.INT32)
        clipBoxes = trt.PluginField("clipBoxes", np.array([1], dtype=np.int32),
                                    trt.PluginFieldType.INT32)

        field_collection = trt.PluginFieldCollection([
            shareLocation, backgroundLabelId, numClasses, topK, keepTopK,
            scoreThreshold, iouThreshold, isNormalized, clipBoxes
        ])
        nms = nmsCreator.create_plugin(name='BatchedNMS_TRT',
                                       field_collection=field_collection)

        return self.network.add_plugin_v2(
            inputs=[input_tensors.get_output(x) for x in range(2)], plugin=nms)
def create_deformable_pool_plugin(layer_name, out_size, spatial_scale,
                                  sampling_ratio, gamma):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'DeformablePoolPluginDynamic', '1', '')

    if not isinstance(out_size, Iterable):
        out_size = [out_size, out_size]

    pfc = trt.PluginFieldCollection()

    pf_out_size = trt.PluginField('out_size', np.array(out_size,
                                                       dtype=np.int32),
                                  trt.PluginFieldType.INT32)
    pfc.append(pf_out_size)

    pf_spatial_scale = trt.PluginField(
        'spatial_scale', np.array([spatial_scale], dtype=np.float32),
        trt.PluginFieldType.FLOAT32)
    pfc.append(pf_spatial_scale)

    pf_sampling_ratio = trt.PluginField(
        'sampling_ratio', np.array([sampling_ratio], dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_sampling_ratio)

    pf_gamma = trt.PluginField('gamma', np.array([gamma], dtype=np.float32),
                               trt.PluginFieldType.FLOAT32)
    pfc.append(pf_gamma)

    return creator.create_plugin(layer_name, pfc)
示例#8
0
def build_engine(shape_indices):
    plugin_creator = get_plugin_creator('OnehotPlugin')
    if plugin_creator == None:
        print('OnehotPlugin plugin not found. Exiting')
        exit()

    builder = trt.Builder(logger)
    network = builder.create_network(
        flags=1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))

    tensor_indices = network.add_input('indices', trt.DataType.INT32,
                                       shape_indices)

    depth = 10
    layer = network.add_plugin_v2([tensor_indices],
                                  plugin_creator.create_plugin(
                                      'OnehotPlugin',
                                      trt.PluginFieldCollection([
                                          trt.PluginField(
                                              'depth',
                                              np.array([depth],
                                                       dtype=np.int32),
                                              trt.PluginFieldType.INT32)
                                      ])))
    network.mark_output(layer.get_output(0))

    return builder.build_engine(network, builder.create_builder_config())
示例#9
0
def create_roipool_plugin(layer_name, out_size, featmap_strides,
                          roi_scale_factor, finest_scale):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'RoiPoolPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    pf_out_size = trt.PluginField('out_size',
                                  np.array([out_size], dtype=np.int32),
                                  trt.PluginFieldType.INT32)
    pfc.append(pf_out_size)

    pf_featmap_strides = trt.PluginField(
        'featmap_strides',
        np.array(featmap_strides).astype(np.float32),
        trt.PluginFieldType.FLOAT32)
    pfc.append(pf_featmap_strides)

    pf_roi_scale_factor = trt.PluginField(
        'roi_scale_factor', np.array([roi_scale_factor], dtype=np.float32),
        trt.PluginFieldType.FLOAT32)
    pfc.append(pf_roi_scale_factor)

    pf_finest_scale = trt.PluginField('finest_scale',
                                      np.array([finest_scale], dtype=np.int32),
                                      trt.PluginFieldType.INT32)
    pfc.append(pf_finest_scale)

    return creator.create_plugin(layer_name, pfc)
示例#10
0
def getTopKAveragePlugin(nTopK, maxTopK):
    for c in trt.get_plugin_registry().plugin_creator_list:
        if c.name == 'TopKAveragePlugin':
            p0 = trt.PluginField("nTopK", np.array([nTopK], dtype=np.int32), trt.PluginFieldType.INT32)
            p1 = trt.PluginField("maxTopK", np.array([maxTopK], dtype=np.int32), trt.PluginFieldType.INT32)
            return c.create_plugin('TopKAveragePlugin', trt.PluginFieldCollection([p0, p1]))
    return None
示例#11
0
def create_layernorm_plugin(layer_name,
                            normalized_shape,
                            W,
                            B,
                            eps=1e-5,
                            type_id=trt.DataType.FLOAT):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'LayerNormPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    pf_normalized_shape = trt.PluginField(
        "normalized_shape", np.array(normalized_shape, dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_normalized_shape)

    pf_eps = trt.PluginField("eps", np.array([eps], dtype=np.float32),
                             trt.PluginFieldType.FLOAT32)
    pfc.append(pf_eps)

    pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32)
    pfc.append(pf_W)

    pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32)
    pfc.append(pf_B)

    pf_type_id = trt.PluginField("type_id", np.array([type_id],
                                                     dtype=np.int32),
                                 trt.PluginFieldType.INT32)
    pfc.append(pf_type_id)

    return creator.create_plugin(layer_name, pfc)
示例#12
0
def convert_grid_sample(ctx):
    # parse args
    input = ctx.method_args[0]
    dim = ctx.method_args[1]
    index = ctx.method_args[2]
    output = ctx.method_return

    dim = convert_dim(dim, input.dim())

    # get tensorrt input
    input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
    index_trt = add_missing_trt_tensors(ctx.network, [index])[0]

    # add tensorrt layer
    creator = trt.get_plugin_registry().get_plugin_creator(
        'GatherElementsPlugins', '1')
    assert creator is not None, 'Has no GatherElementsPlugins version 1'
    fc = []
    fc.append(
        trt.PluginField(name='dim',
                        data=np.array([dim], dtype=np.int32),
                        type=trt.PluginFieldType.INT32))
    fc = trt.PluginFieldCollection(fc)

    plugin = creator.create_plugin('GatherElementsPlugins', fc)
    layer = ctx.network.add_plugin_v2([input_trt, index_trt], plugin)

    # get tensorrt output
    output._trt = layer.get_output(0)
示例#13
0
def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
    """
    Add the attention layer
    """
    assert len(input_tensor.shape) == 5
    B, S, hidden_size, _, _ = input_tensor.shape
    num_heads = config.num_attention_heads
    head_size = int(hidden_size / num_heads)

    Wall = init_dict[prefix + WQKV]
    Ball = init_dict[prefix + BQKV]

    mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball)
    set_layer_name(mult_all, prefix, "qkv_mult")

    has_mask = imask is not None

    pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32,)
    pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32)
    pf_S = trt.PluginField("S", np.array([S], np.int32), trt.PluginFieldType.INT32)
    pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32)

    pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_S, pf_has_mask])
    qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc)

    qkv_in = [mult_all.get_output(0), imask]
    qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug)
    set_layer_name(qkv2ctx, prefix, "context_layer")
    return qkv2ctx
    def create_plugins(self):
        # create "adding positional encoding" plugin
        self.plugins['AddPosEncPlugin'] = self.get_plugin_creator(
            'AddPosEncPlugin').create_plugin('AddPosEncPlugin',
                                             trt.PluginFieldCollection())

        # create "repeat" plugin
        self.plugins['RepeatPlugin'] = self.get_plugin_creator(
            'RepeatPlugin').create_plugin(
                'RepeatPlugin',
                trt.PluginFieldCollection([
                    trt.PluginField(
                        'maxOutputLength',
                        np.array([self.trt_max_output_seq_len],
                                 dtype=np.int32), trt.PluginFieldType.INT32)
                ]))
示例#15
0
def build_engine(shape, shape2):
    plugin_creator = get_plugin_creator('RepeatPlugin')
    if plugin_creator == None:
        print('Plugin not found. Exiting')
        exit()

    builder = trt.Builder(logger)
    builder.max_batch_size = 1024
    builder.max_workspace_size = 1 << 20
    builder.fp16_mode = use_fp16
    network = builder.create_network()

    tensor = network.add_input('input1', trt.DataType.FLOAT, shape)
    tensor2 = network.add_input('input2', trt.DataType.FLOAT, shape2)
    tensor = network.add_plugin_v2([tensor, tensor2],
                                   plugin_creator.create_plugin(
                                       'RepeatPlugin',
                                       trt.PluginFieldCollection([
                                           trt.PluginField(
                                               'maxOutputLength',
                                               np.array([MAX_OUTPUT_LENGTH],
                                                        dtype=np.int32),
                                               trt.PluginFieldType.INT32)
                                       ]))).get_output(0)

    network.mark_output(tensor)

    return builder.build_cuda_engine(network)
示例#16
0
def getSortPlugin():
    for c in trt.get_plugin_registry().plugin_creator_list:
        if c.name == 'SortPlugin':
            p0 = trt.PluginField("descending", np.array([0], dtype=np.int32),
                                 trt.PluginFieldType.INT32)
            return c.create_plugin(c.name, trt.PluginFieldCollection([p0]))
    return None
def create_carafefeaturereassemble_plugin(layer_name,
                                          scale_factor,
                                          up_kernel,
                                          up_group,
                                          type_id=trt.DataType.FLOAT):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'CarafeFeatureReassemblePluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    pf_scale_factor = trt.PluginField("scale_factor",
                                      np.array([scale_factor], dtype=np.int32),
                                      trt.PluginFieldType.INT32)
    pfc.append(pf_scale_factor)

    pf_up_kernel = trt.PluginField("up_kernel",
                                   np.array([up_kernel], dtype=np.int32),
                                   trt.PluginFieldType.INT32)
    pfc.append(pf_up_kernel)

    pf_up_group = trt.PluginField("up_group",
                                  np.array([up_group], dtype=np.int32),
                                  trt.PluginFieldType.INT32)
    pfc.append(pf_up_group)

    pf_type_id = trt.PluginField("type_id", np.array([type_id],
                                                     dtype=np.int32),
                                 trt.PluginFieldType.INT32)
    pfc.append(pf_type_id)

    return creator.create_plugin(layer_name, pfc)
示例#18
0
def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None):
    """
    Add the skip layer
    """
    idims = input_tensor.shape
    assert len(idims) == 5
    hidden_size = idims[2]

    dtype = trt.float32
    if config.use_fp16:
        dtype = trt.float16
    # Skip layernorm doesn't use INT8 inputs and output by default unless it is specified.
    if config.use_int8 and config.use_int8_skipln and not config.is_calib_mode:
        dtype = trt.int8

    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
    wbeta = init_dict[prefix + "beta"]
    pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32)
    wgamma = init_dict[prefix + "gamma"]
    pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32)
    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)

    fields = [pf_ld, pf_beta, pf_gamma, pf_type ]

    if bias:
        pf_bias = trt.PluginField("bias", bias.numpy(), trt.PluginFieldType.FLOAT32)
        fields.append(pf_bias)

    pfc = trt.PluginFieldCollection(fields)
    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)

    skipln_inputs = [input_tensor, skip]
    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
    return layer
示例#19
0
def skipln(prefix, init_dict, network, input_tensor, skip):
    """
    Add the skip layer
    """
    idims = input_tensor.shape
    assert len(idims) == 5
    hidden_size = idims[2]

    pf_type_id = trt.PluginField("type_id", np.array([0], dtype=np.float32),
                                 trt.PluginFieldType.INT32)
    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32),
                            trt.PluginFieldType.INT32)
    wbeta = init_dict[prefix + "beta"]
    pf_beta = trt.PluginField("beta", wbeta.numpy(),
                              trt.PluginFieldType.FLOAT32)
    wgamma = init_dict[prefix + "gamma"]
    pf_gamma = trt.PluginField("gamma", wgamma.numpy(),
                               trt.PluginFieldType.FLOAT32)

    pfc = trt.PluginFieldCollection([pf_type_id, pf_ld, pf_beta, pf_gamma])
    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)

    skipln_inputs = [input_tensor, skip]
    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
    return layer
示例#20
0
    def get_dlrm_interactions_plugin(self, plugin_name, tableOffsets, interactionsOutputInterleaved):
        """Create a plugin layer for the DLRM Interactions plugin and return it.

        DLRM Interactions plugin takes two inputs: from bottom MLP and categorical input and looks up their embeddings.
        Since DLRM embeddings can be larger than GPU memory, the plugin keeps the most frequently used embeddings on GPU
        and rest on host and manages the lookup with good performance.
        """

        plugin = None
        for plugin_creator in trt.get_plugin_registry().plugin_creator_list:
            if plugin_creator.name == plugin_name:
                embeddingSize_field = trt.PluginField("embeddingSize", np.array([self.embedding_size], dtype=np.int32), trt.PluginFieldType.INT32)
                embeddingRows_field = trt.PluginField("embeddingRows", np.array([self.embedding_rows_total], dtype=np.int32), trt.PluginFieldType.INT32)
                reducedPrecisionIO_field = trt.PluginField("reducedPrecisionIO", np.array(
                    [0 if self.need_calibration else (1 if self.precision == "fp16" else 2)], dtype=np.int32), trt.PluginFieldType.INT32)
                embeddingWeightsOnGpuPart_field = trt.PluginField("embeddingWeightsOnGpuPart", np.array([self.embedding_weights_on_gpu_part], dtype=np.float32), trt.PluginFieldType.FLOAT32)
                interactionsOutputInterleaved_field = trt.PluginField("interactionsOutputInterleaved", np.array([1 if interactionsOutputInterleaved else 0], dtype=np.int32), trt.PluginFieldType.INT32)
                tableOffsets_field = trt.PluginField("tableOffsets", tableOffsets, trt.PluginFieldType.INT32)
                embeddingWeightsFilepath_field = trt.PluginField("embeddingWeightsFilepath", np.array(list(self.embedding_weights_binary_filepath.encode()), dtype=np.int8), trt.PluginFieldType.CHAR)
                if self.use_row_frequencies:
                    rowFrequenciesFilepath_field = trt.PluginField("rowFrequenciesFilepath", np.array(list(self.row_frequencies_binary_filepath.encode()), dtype=np.int8), trt.PluginFieldType.CHAR)
                else:
                    rowFrequenciesFilepath_field = trt.PluginField("rowFrequenciesFilepath", np.array(list("".encode()), dtype=np.int8), trt.PluginFieldType.CHAR)

                output_padding_field = trt.PluginField("outputPaddingGranularity", np.array([self.output_padding], dtype=np.int32), trt.PluginFieldType.INT32)

                field_collection = trt.PluginFieldCollection([embeddingSize_field, embeddingRows_field, reducedPrecisionIO_field, embeddingWeightsOnGpuPart_field,
                                                              interactionsOutputInterleaved_field, output_padding_field, tableOffsets_field, embeddingWeightsFilepath_field, rowFrequenciesFilepath_field])
                plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection)
        return plugin
示例#21
0
def build_engine(builder, input_shape):
    plugin_creator = get_plugin_creator('AddPlugin')
    if plugin_creator == None:
        print('Plugin not found. Exiting')
        exit()

    config = builder.create_builder_config()
    config.max_workspace_size = 1 << 20

    builder.max_batch_size = 8
    network = builder.create_network()
    tensor = network.add_input('data', tensorrt.DataType.FLOAT, input_shape)

    layer = network.add_plugin_v2([tensor],
                                  plugin_creator.create_plugin(
                                      'AddPlugin',
                                      tensorrt.PluginFieldCollection([
                                          tensorrt.PluginField(
                                              'valueToAdd',
                                              np.array([100.0],
                                                       dtype=np.float32),
                                              tensorrt.PluginFieldType.FLOAT32)
                                      ])))
    tensor = layer.get_output(0)
    network.mark_output(tensor)

    return builder.build_engine(network, config)
示例#22
0
def create_meshgrid_plugin(layer_name,
                           num_inputs,
                           slice_dims=[2, 3],
                           starts=[0., 0.],
                           strides=[1., 1.]):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'MeshGridPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    pf_num_inputs = trt.PluginField(
        "num_inputs", np.array([int(num_inputs)], dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_num_inputs)

    pf_slice_dims = trt.PluginField("slice_dims",
                                    np.array(slice_dims, dtype=np.int32),
                                    trt.PluginFieldType.INT32)
    pfc.append(pf_slice_dims)

    pf_starts = trt.PluginField("starts", np.array(starts, dtype=np.float32),
                                trt.PluginFieldType.FLOAT32)
    pfc.append(pf_starts)

    pf_strides = trt.PluginField("strides", np.array(strides,
                                                     dtype=np.float32),
                                 trt.PluginFieldType.FLOAT32)
    pfc.append(pf_strides)

    return creator.create_plugin(layer_name, pfc)
def build_engine(shape):
    plugin_creator = get_plugin_creator('AddPlugin')
    if plugin_creator == None:
        print('Plugin not found. Exiting')
        exit()

    builder = trt.Builder(logger)
    builder.max_batch_size = 1024
    builder.max_workspace_size = 1 << 20
    builder.fp16_mode = use_fp16
    network = builder.create_network()

    tensor = network.add_input('data', trt.DataType.FLOAT, shape)
    for _ in range(10):
        tensor = network.add_plugin_v2([tensor],
                                       plugin_creator.create_plugin(
                                           'AddPlugin',
                                           trt.PluginFieldCollection([
                                               trt.PluginField(
                                                   'valueToAdd',
                                                   np.array([10.0],
                                                            dtype=np.float32),
                                                   trt.PluginFieldType.FLOAT32)
                                           ]))).get_output(0)

    network.mark_output(tensor)
    return builder.build_cuda_engine(network)
示例#24
0
    def optimize(self, network, point):
        fields = trt.PluginFieldCollection()
        saved = []  #values must be alive when creating the plugin.
        inputs = [network.get_layer(point[0]).get_input(0)]
        append_fields(network, point[0], fields, saved, self.scale_map)
        append_fields(network, point[0] + 2, fields, saved, self.scale_map)
        append_fields(network, point[0] + 4, fields, saved, self.scale_map)

        plugin = self.creator.create_plugin(self.name, fields)
        if plugin is None:
            raise Exception("Plugin creation failed")

        plugin_layer = network.add_plugin_v2(inputs, plugin)
        plugin_layer.name = self.name + "_%d" % self.count
        self.count += 1
        origin_output = network.get_layer(point[1]).get_output(0)
        plugin_output = plugin_layer.get_output(0)
        assert (origin_output.name
                in self.scale_map), "%s not found!" % origin_output.name
        dynamic_range = self.scale_map[origin_output.name] * 127.0
        plugin_output.set_dynamic_range(-dynamic_range, dynamic_range)
        for j in range(network.num_layers):
            layer = network.get_layer(j)
            if layer.name == plugin_layer.name:
                continue
            for k in range(layer.num_inputs):
                if layer.get_input(k) == origin_output:
                    layer.set_input(k, plugin_output)
示例#25
0
    def add_yoloHead(self, input_tensors):
        mh = ModelData.INPUT_SHAPE[1] / 32
        mw = ModelData.INPUT_SHAPE[2] / 32
        anchors = np.array(
            [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119],
             [116, 90, 156, 198, 373, 326]],
            dtype=np.float32)
        num_cls = trt.PluginField("num_cls", np.array([3], dtype=np.int32),
                                  trt.PluginFieldType.INT32)
        max_det = trt.PluginField("max_det", np.array([3024], dtype=np.int32),
                                  trt.PluginFieldType.INT32)
        heights = trt.PluginField(
            "heights", np.array([mh * 4, mh * 2, mh], dtype=np.int32),
            trt.PluginFieldType.INT32)
        widths = trt.PluginField(
            "widths", np.array([mw * 4, mw * 2, mw], dtype=np.int32),
            trt.PluginFieldType.INT32)
        strides = trt.PluginField("strides",
                                  np.array([8, 16, 32], dtype=np.int32),
                                  trt.PluginFieldType.INT32)
        anchors = trt.PluginField("anchors", anchors,
                                  trt.PluginFieldType.FLOAT32)
        field_collection = trt.PluginFieldCollection(
            [num_cls, max_det, heights, widths, strides, anchors])
        yoloHead = YoloCreator.create_plugin(name='Yolo_TRT',
                                             field_collection=field_collection)

        return self.network.add_plugin_v2(
            inputs=[x.get_output(0) for x in input_tensors], plugin=yoloHead)
def create_torchembedding_plugin(layer_name, weight):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'TorchEmbeddingPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    num_embeddings = weight.shape[0]
    embedding_dim = weight.shape[1]

    pf_num_embeddings = trt.PluginField(
        "num_embeddings", np.array([num_embeddings], dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_num_embeddings)

    pf_embedding_dim = trt.PluginField(
        "embedding_dim", np.array([embedding_dim], dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_embedding_dim)

    pf_weight = trt.PluginField("weight", np.array(weight, dtype=np.float32),
                                trt.PluginFieldType.FLOAT32)
    pfc.append(pf_weight)

    return creator.create_plugin(layer_name, pfc)
def getLayerNormPlugin():
    for c in trt.get_plugin_registry().plugin_creator_list:
        #print(c.name)
        if c.name == 'LayerNorm':
            p0 = trt.PluginField('epsilon', np.float32(epsilon), trt.PluginFieldType.FLOAT32)
            return c.create_plugin(c.name, trt.PluginFieldCollection([p0]))
    return None
示例#28
0
def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None):
    """
    Add the skip layer
    """
    idims = input_tensor.shape
    assert len(idims) == 5
    hidden_size = idims[2]

    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32),
                            trt.PluginFieldType.INT32)
    wbeta = init_dict[prefix + "bias"]
    pf_beta = trt.PluginField("beta", wbeta.numpy(),
                              trt.PluginFieldType.FLOAT32)
    wgamma = init_dict[prefix + "weight"]
    pf_gamma = trt.PluginField("gamma", wgamma.numpy(),
                               trt.PluginFieldType.FLOAT32)
    pf_type = trt.PluginField(
        "type_id", np.array([1 if config.use_fp16 else 0], np.int32),
        trt.PluginFieldType.INT32)

    fields = [pf_ld, pf_beta, pf_gamma, pf_type]

    if bias:
        pf_bias = trt.PluginField("bias", bias.numpy(),
                                  trt.PluginFieldType.FLOAT32)
        fields.append(pf_bias)

    pfc = trt.PluginFieldCollection(fields)
    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)

    skipln_inputs = [input_tensor, skip]
    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
    return layer
示例#29
0
    def add_plugin(cls, network):
        """
        Adapted from https://github.com/jkjung-avt/tensorrt_demos
        """
        def get_plugin_creator(plugin_name):
            plugin_creators = trt.get_plugin_registry().plugin_creator_list
            for plugin_creator in plugin_creators:
                if plugin_creator.name == plugin_name:
                    return plugin_creator
            return None

        plugin_creator = get_plugin_creator("YoloLayer_TRT")
        if not plugin_creator:
            raise RuntimeError("Failed to get YoloLayer_TRT plugin creator")

        old_tensors = [
            network.get_output(i) for i in range(network.num_outputs)
        ]
        new_tensors = []
        plugin = network.add_plugin_v2(
            old_tensors,
            plugin_creator.create_plugin("YoloLayer_TRT",
                                         trt.PluginFieldCollection([])),
        )
        new_tensors = plugin.get_output(0)

        network.mark_output(new_tensors)

        for old_tensor in old_tensors:
            network.unmark_output(old_tensor)
        return network
示例#30
0
def create_torchunfold_plugin(layer_name, kernel_size, dilation, padding,
                              stride):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'TorchUnfoldPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    if isinstance(kernel_size, int):
        kernel_size = (kernel_size, kernel_size)
    pf_kernel_size = trt.PluginField('kernel_size',
                                     np.array(kernel_size, dtype=np.int32),
                                     trt.PluginFieldType.INT32)
    pfc.append(pf_kernel_size)

    if isinstance(dilation, int):
        dilation = (dilation, dilation)
    pf_dilation = trt.PluginField('dilation',
                                  np.array(dilation, dtype=np.int32),
                                  trt.PluginFieldType.INT32)
    pfc.append(pf_dilation)

    if isinstance(padding, int):
        padding = (padding, padding)
    pf_padding = trt.PluginField('padding', np.array(padding, dtype=np.int32),
                                 trt.PluginFieldType.INT32)
    pfc.append(pf_padding)

    if isinstance(stride, int):
        stride = (stride, stride)
    pf_stride = trt.PluginField('stride', np.array(stride, dtype=np.int32),
                                trt.PluginFieldType.INT32)
    pfc.append(pf_stride)

    return creator.create_plugin(layer_name, pfc)