Пример #1
0
    def __init__(self,
                 dim,
                 in_dim,
                 head_cnt=1,
                 kernel_ratio=0.5,
                 dp1=0.1,
                 dp2=0.1):
        super().__init__()
        self.emb = in_dim * head_cnt  # we use 1, so it is no need here
        self.kqv = nn.Linear(dim, 3 * self.emb)
        self.dp = nn.Dropout(dp1)
        self.proj = nn.Linear(self.emb, self.emb)
        self.head_cnt = head_cnt
        self.norm1 = nn.LayerNorm(dim)
        self.norm2 = nn.LayerNorm(self.emb)
        self.epsilon = 1e-8  # for stable in division

        self.mlp = nn.Sequential(
            nn.Linear(self.emb, 1 * self.emb),
            nn.GELU(),
            nn.Linear(1 * self.emb, self.emb),
            nn.Dropout(dp2),
        )

        self.m = int(self.emb * kernel_ratio)
        self.w = paddle.randn((self.m, self.emb))

        self.w = add_parameter(self, orthogonal_(self.w) * math.sqrt(self.m))
Пример #2
0
 def __init__(self, in_size, out_size):
     super(SimpleModel, self).__init__()
     self.linear = nn.Linear(in_size, out_size)
     self.dropout_1 = paddle.nn.Dropout(0.1)
     self.relu = nn.ReLU()
     self.dropout_2 = paddle.nn.Dropout(0.5)
     self.gelu = nn.GELU()
 def __init__(self,
              inplanes=256,
              planes=256,
              kernel_size=9,
              dilation=1,
              dropout_rate=0.1):
     super(ResnetBasicBlock, self).__init__()
     self.conv1 = nn.Conv1D(in_channels=inplanes, out_channels=planes, kernel_size=kernel_size, dilation=dilation, \
                            padding="same", data_format="NLC", weight_attr=nn.initializer.KaimingNormal())
     self.bn1 = nn.BatchNorm1D(planes, data_format="NLC")
     self.gelu1 = nn.GELU()
     self.dropout1 = nn.Dropout(p=dropout_rate)
     self.conv2 = nn.Conv1D(in_channels=planes, out_channels=planes, kernel_size=kernel_size, dilation=dilation, \
                            padding="same", data_format="NLC", weight_attr=nn.initializer.KaimingNormal())
     self.bn2 = nn.BatchNorm1D(planes, data_format="NLC")
     self.gelu2 = nn.GELU()
     self.dropout2 = nn.Dropout(p=dropout_rate)
Пример #4
0
 def __init__(self, config):
     super(LayoutXLMIntermediate, self).__init__()
     self.dense = nn.Linear(config["hidden_size"],
                            config["intermediate_size"])
     if config["hidden_act"] == "gelu":
         self.intermediate_act_fn = nn.GELU()
     else:
         assert False, "hidden_act is set as: {}, please check it..".format(
             config["hidden_act"])
Пример #5
0
    def __init__(self, n_head, hidden_size, attn_dropout, act_dropout):
        super(Plato2EncoderLayer, self).__init__()

        self.self_attn = nn.MultiHeadAttention(hidden_size, n_head,
                                               attn_dropout)
        self.pre_norm_layer = nn.LayerNorm(hidden_size)
        self.post_norm_layer = nn.LayerNorm(hidden_size)
        self.fc1 = nn.Linear(hidden_size, hidden_size * 4)
        self.fc2 = nn.Linear(hidden_size * 4, hidden_size)

        self.dropout_layer = nn.Dropout(act_dropout)
        self.gelu_layer = nn.GELU()
Пример #6
0
    def __init__(self, distilbert):
        super(DistilBertForMaskedLM, self).__init__()
        self.distilbert = distilbert
        self.vocab_transform = nn.Linear(self.distilbert.config["hidden_size"],
                                         self.distilbert.config["hidden_size"])
        self.activation = nn.GELU()
        self.vocab_layer_norm = nn.LayerNorm(
            self.distilbert.config["hidden_size"])
        self.vocab_projector = nn.Linear(self.distilbert.config["hidden_size"],
                                         self.distilbert.config["vocab_size"])

        self.apply(self.init_weights)
Пример #7
0
    def __init__(self, embedding_size, vocab_size, hidden_size):
        super(ErnieCtmMLMHead, self).__init__()
        self.layer_norm = nn.LayerNorm(embedding_size)

        self.bias = self.create_parameter(
            [vocab_size],
            is_bias=True,
            default_initializer=nn.initializer.Constant(value=0.0))
        self.dense = nn.Linear(hidden_size, embedding_size)
        self.decoder = nn.Linear(embedding_size, vocab_size)
        self.activation = nn.GELU(approximate=True)
        # Link bias
        self.decoder.bias = self.bias
Пример #8
0
    def __init__(self,
                 vocab_size,
                 embedding_size=128,
                 hidden_size=768,
                 num_hidden_layers=12,
                 num_attention_heads=12,
                 intermediate_size=3072,
                 hidden_dropout_prob=0.1,
                 attention_probs_dropout_prob=0.1,
                 max_position_embeddings=512,
                 type_vocab_size=16,
                 initializer_range=0.02,
                 pad_token_id=0,
                 use_content_summary=True,
                 content_summary_index=1,
                 cls_num=2):
        super(ErnieCtmModel, self).__init__()

        self.pad_token_id = pad_token_id
        self.content_summary_index = content_summary_index
        self.initializer_range = initializer_range
        self.embeddings = ErnieCtmEmbeddings(
            vocab_size,
            embedding_size,
            hidden_dropout_prob=hidden_dropout_prob,
            max_position_embeddings=max_position_embeddings,
            type_vocab_size=type_vocab_size,
            padding_idx=pad_token_id,
            cls_num=cls_num)
        self.embedding_hidden_mapping_in = nn.Linear(embedding_size,
                                                     hidden_size)
        encoder_layer = nn.TransformerEncoderLayer(
            hidden_size,
            num_attention_heads,
            intermediate_size,
            dropout=hidden_dropout_prob,
            activation="gelu",
            attn_dropout=attention_probs_dropout_prob,
            act_dropout=0)
        encoder_layer.activation = nn.GELU(approximate=True)

        self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)
        self.pooler = ErnieCtmPooler(hidden_size)

        self.use_content_summary = use_content_summary
        self.content_summary_index = content_summary_index
        if use_content_summary is True:
            self.feature_fuse = nn.Linear(hidden_size * 2, intermediate_size)
            self.feature_output = nn.Linear(intermediate_size, hidden_size)

        self.apply(self.init_weights)
Пример #9
0
    def __init__(self,
                 nsp_reader,
                 num_layers,
                 n_head,
                 hidden_size,
                 vocab_size=8001,
                 type_size=2,
                 latent_type_size=20,
                 max_position_seq_len=256,
                 act_dropout=0.1,
                 attn_dropout=0.1,
                 max_dec_len=64,
                 min_dec_len=1,
                 topk=10):
        super(Plato2InferModel, self).__init__()

        self.nsp_reader = nsp_reader
        self.num_layers = num_layers
        self.latent_type_size = latent_type_size
        self.max_dec_len = max_dec_len
        self.min_dec_len = min_dec_len
        self.topk = topk
        self.unk_id = 0
        self.bos_id = 1
        self.eos_id = 2
        self.mask_id = 8000
        self.after_eos = paddle.ones([vocab_size]) * -1e9
        self.after_eos[self.eos_id] = 0
        self.is_cn = False
        self.batch_size = 1

        self.latent_weight = paddle.create_parameter(
            [hidden_size, latent_type_size], 'float32')

        self.plato2_encoder = Plato2Encoder(
            vocab_size, type_size, max_position_seq_len, num_layers, n_head,
            hidden_size, attn_dropout, act_dropout)

        self.logits_fc_layer = nn.Linear(hidden_size, hidden_size)
        self.logits_layer_norm = nn.LayerNorm(hidden_size)
        self.logits_bias = paddle.create_parameter(
            [vocab_size], 'float32', is_bias=True)

        self.nsp_predictor = NSP(vocab_size, type_size, max_position_seq_len,
                                 num_layers, n_head, hidden_size, attn_dropout,
                                 act_dropout)

        self.gelu_layer = nn.GELU()
        self.softmax = nn.Softmax()
    def __init__(self,
                 vocab_size,
                 emb_dim=128,
                 hidden_size=256,
                 kernel_size=9,
                 n_layers=35,
                 padding_idx=0,
                 dropout_rate=0.1,
                 epsilon=1e-6):
        super(ResnetEncoderModel, self).__init__()

        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.token_embedding = nn.Embedding(vocab_size,
                                            emb_dim,
                                            padding_idx=padding_idx)
        max_pos_len = 3000
        self.pos_embedding = nn.Embedding(max_pos_len,
                                          emb_dim,
                                          padding_idx=padding_idx)

        self.layer_norm = nn.BatchNorm1D(emb_dim, data_format="NLC")
        self.dropout = nn.Dropout(dropout_rate)

        self.padded_conv = nn.Sequential(
            nn.Conv1D(in_channels=emb_dim, out_channels=hidden_size, kernel_size=kernel_size, padding="same", \
                      data_format="NLC", weight_attr=nn.initializer.KaimingNormal()),
            nn.BatchNorm1D(hidden_size, data_format="NLC"),
            nn.GELU(),
            nn.Dropout(p=dropout_rate)
        )
        self.residual_block_1 = ResnetBasicBlock(inplanes=hidden_size,
                                                 planes=hidden_size,
                                                 kernel_size=kernel_size,
                                                 dropout_rate=dropout_rate)
        self.residual_block_n = nn.Sequential()
        for i in range(1, n_layers):
            self.residual_block_n.add_sublayer("residual_block_%d" % i, \
                ResnetBasicBlock(inplanes=hidden_size, planes=hidden_size, kernel_size=kernel_size, dilation=2, dropout_rate=dropout_rate))

        self.apply(self.init_weights)
Пример #11
0
    def __init__(self,
                 dim,
                 num_heads=8,
                 qkv_bias=False,
                 qk_scale=None,
                 attn_drop=0.,
                 proj_drop=0.,
                 sr_ratio=1,
                 linear=False):
        super().__init__()
        assert dim % num_heads == 0

        self.dim = dim
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = qk_scale or head_dim**-0.5

        self.q = nn.Linear(dim, dim, bias_attr=qkv_bias)
        self.kv = nn.Linear(dim, dim * 2, bias_attr=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

        self.linear = linear
        self.sr_ratio = sr_ratio
        if not linear:
            if sr_ratio > 1:
                self.sr = nn.Conv2D(dim,
                                    dim,
                                    kernel_size=sr_ratio,
                                    stride=sr_ratio)
                self.norm = nn.LayerNorm(dim)
        else:
            self.pool = nn.AdaptiveAvgPool2D(7)
            self.sr = nn.Conv2D(dim, dim, kernel_size=1, stride=1)
            self.norm = nn.LayerNorm(dim)
            self.act = nn.GELU()
Пример #12
0
    def func_test_layer_str(self):
        module = nn.ELU(0.2)
        self.assertEqual(str(module), 'ELU(alpha=0.2)')

        module = nn.CELU(0.2)
        self.assertEqual(str(module), 'CELU(alpha=0.2)')

        module = nn.GELU(True)
        self.assertEqual(str(module), 'GELU(approximate=True)')

        module = nn.Hardshrink()
        self.assertEqual(str(module), 'Hardshrink(threshold=0.5)')

        module = nn.Hardswish(name="Hardswish")
        self.assertEqual(str(module), 'Hardswish(name=Hardswish)')

        module = nn.Tanh(name="Tanh")
        self.assertEqual(str(module), 'Tanh(name=Tanh)')

        module = nn.Hardtanh(name="Hardtanh")
        self.assertEqual(str(module),
                         'Hardtanh(min=-1.0, max=1.0, name=Hardtanh)')

        module = nn.PReLU(1, 0.25, name="PReLU", data_format="NCHW")
        self.assertEqual(
            str(module),
            'PReLU(num_parameters=1, data_format=NCHW, init=0.25, dtype=float32, name=PReLU)'
        )

        module = nn.ReLU()
        self.assertEqual(str(module), 'ReLU()')

        module = nn.ReLU6()
        self.assertEqual(str(module), 'ReLU6()')

        module = nn.SELU()
        self.assertEqual(
            str(module),
            'SELU(scale=1.0507009873554805, alpha=1.6732632423543772)')

        module = nn.LeakyReLU()
        self.assertEqual(str(module), 'LeakyReLU(negative_slope=0.01)')

        module = nn.Sigmoid()
        self.assertEqual(str(module), 'Sigmoid()')

        module = nn.Hardsigmoid()
        self.assertEqual(str(module), 'Hardsigmoid()')

        module = nn.Softplus()
        self.assertEqual(str(module), 'Softplus(beta=1, threshold=20)')

        module = nn.Softshrink()
        self.assertEqual(str(module), 'Softshrink(threshold=0.5)')

        module = nn.Softsign()
        self.assertEqual(str(module), 'Softsign()')

        module = nn.Swish()
        self.assertEqual(str(module), 'Swish()')

        module = nn.Tanhshrink()
        self.assertEqual(str(module), 'Tanhshrink()')

        module = nn.ThresholdedReLU()
        self.assertEqual(str(module), 'ThresholdedReLU(threshold=1.0)')

        module = nn.LogSigmoid()
        self.assertEqual(str(module), 'LogSigmoid()')

        module = nn.Softmax()
        self.assertEqual(str(module), 'Softmax(axis=-1)')

        module = nn.LogSoftmax()
        self.assertEqual(str(module), 'LogSoftmax(axis=-1)')

        module = nn.Maxout(groups=2)
        self.assertEqual(str(module), 'Maxout(groups=2, axis=1)')

        module = nn.Linear(2, 4, name='linear')
        self.assertEqual(
            str(module),
            'Linear(in_features=2, out_features=4, dtype=float32, name=linear)'
        )

        module = nn.Upsample(size=[12, 12])
        self.assertEqual(
            str(module),
            'Upsample(size=[12, 12], mode=nearest, align_corners=False, align_mode=0, data_format=NCHW)'
        )

        module = nn.UpsamplingNearest2D(size=[12, 12])
        self.assertEqual(
            str(module),
            'UpsamplingNearest2D(size=[12, 12], data_format=NCHW)')

        module = nn.UpsamplingBilinear2D(size=[12, 12])
        self.assertEqual(
            str(module),
            'UpsamplingBilinear2D(size=[12, 12], data_format=NCHW)')

        module = nn.Bilinear(in1_features=5, in2_features=4, out_features=1000)
        self.assertEqual(
            str(module),
            'Bilinear(in1_features=5, in2_features=4, out_features=1000, dtype=float32)'
        )

        module = nn.Dropout(p=0.5)
        self.assertEqual(str(module),
                         'Dropout(p=0.5, axis=None, mode=upscale_in_train)')

        module = nn.Dropout2D(p=0.5)
        self.assertEqual(str(module), 'Dropout2D(p=0.5, data_format=NCHW)')

        module = nn.Dropout3D(p=0.5)
        self.assertEqual(str(module), 'Dropout3D(p=0.5, data_format=NCDHW)')

        module = nn.AlphaDropout(p=0.5)
        self.assertEqual(str(module), 'AlphaDropout(p=0.5)')

        module = nn.Pad1D(padding=[1, 2], mode='constant')
        self.assertEqual(
            str(module),
            'Pad1D(padding=[1, 2], mode=constant, value=0.0, data_format=NCL)')

        module = nn.Pad2D(padding=[1, 0, 1, 2], mode='constant')
        self.assertEqual(
            str(module),
            'Pad2D(padding=[1, 0, 1, 2], mode=constant, value=0.0, data_format=NCHW)'
        )

        module = nn.ZeroPad2D(padding=[1, 0, 1, 2])
        self.assertEqual(str(module),
                         'ZeroPad2D(padding=[1, 0, 1, 2], data_format=NCHW)')

        module = nn.Pad3D(padding=[1, 0, 1, 2, 0, 0], mode='constant')
        self.assertEqual(
            str(module),
            'Pad3D(padding=[1, 0, 1, 2, 0, 0], mode=constant, value=0.0, data_format=NCDHW)'
        )

        module = nn.CosineSimilarity(axis=0)
        self.assertEqual(str(module), 'CosineSimilarity(axis=0, eps=1e-08)')

        module = nn.Embedding(10, 3, sparse=True)
        self.assertEqual(str(module), 'Embedding(10, 3, sparse=True)')

        module = nn.Conv1D(3, 2, 3)
        self.assertEqual(str(module),
                         'Conv1D(3, 2, kernel_size=[3], data_format=NCL)')

        module = nn.Conv1DTranspose(2, 1, 2)
        self.assertEqual(
            str(module),
            'Conv1DTranspose(2, 1, kernel_size=[2], data_format=NCL)')

        module = nn.Conv2D(4, 6, (3, 3))
        self.assertEqual(str(module),
                         'Conv2D(4, 6, kernel_size=[3, 3], data_format=NCHW)')

        module = nn.Conv2DTranspose(4, 6, (3, 3))
        self.assertEqual(
            str(module),
            'Conv2DTranspose(4, 6, kernel_size=[3, 3], data_format=NCHW)')

        module = nn.Conv3D(4, 6, (3, 3, 3))
        self.assertEqual(
            str(module),
            'Conv3D(4, 6, kernel_size=[3, 3, 3], data_format=NCDHW)')

        module = nn.Conv3DTranspose(4, 6, (3, 3, 3))
        self.assertEqual(
            str(module),
            'Conv3DTranspose(4, 6, kernel_size=[3, 3, 3], data_format=NCDHW)')

        module = nn.PairwiseDistance()
        self.assertEqual(str(module), 'PairwiseDistance(p=2.0)')

        module = nn.InstanceNorm1D(2)
        self.assertEqual(str(module),
                         'InstanceNorm1D(num_features=2, epsilon=1e-05)')

        module = nn.InstanceNorm2D(2)
        self.assertEqual(str(module),
                         'InstanceNorm2D(num_features=2, epsilon=1e-05)')

        module = nn.InstanceNorm3D(2)
        self.assertEqual(str(module),
                         'InstanceNorm3D(num_features=2, epsilon=1e-05)')

        module = nn.GroupNorm(num_channels=6, num_groups=6)
        self.assertEqual(
            str(module),
            'GroupNorm(num_groups=6, num_channels=6, epsilon=1e-05)')

        module = nn.LayerNorm([2, 2, 3])
        self.assertEqual(
            str(module),
            'LayerNorm(normalized_shape=[2, 2, 3], epsilon=1e-05)')

        module = nn.BatchNorm1D(1)
        self.assertEqual(
            str(module),
            'BatchNorm1D(num_features=1, momentum=0.9, epsilon=1e-05, data_format=NCL)'
        )

        module = nn.BatchNorm2D(1)
        self.assertEqual(
            str(module),
            'BatchNorm2D(num_features=1, momentum=0.9, epsilon=1e-05)')

        module = nn.BatchNorm3D(1)
        self.assertEqual(
            str(module),
            'BatchNorm3D(num_features=1, momentum=0.9, epsilon=1e-05, data_format=NCDHW)'
        )

        module = nn.SyncBatchNorm(2)
        self.assertEqual(
            str(module),
            'SyncBatchNorm(num_features=2, momentum=0.9, epsilon=1e-05)')

        module = nn.LocalResponseNorm(size=5)
        self.assertEqual(
            str(module),
            'LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=1.0)')

        module = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
        self.assertEqual(str(module),
                         'AvgPool1D(kernel_size=2, stride=2, padding=0)')

        module = nn.AvgPool2D(kernel_size=2, stride=2, padding=0)
        self.assertEqual(str(module),
                         'AvgPool2D(kernel_size=2, stride=2, padding=0)')

        module = nn.AvgPool3D(kernel_size=2, stride=2, padding=0)
        self.assertEqual(str(module),
                         'AvgPool3D(kernel_size=2, stride=2, padding=0)')

        module = nn.MaxPool1D(kernel_size=2, stride=2, padding=0)
        self.assertEqual(str(module),
                         'MaxPool1D(kernel_size=2, stride=2, padding=0)')

        module = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
        self.assertEqual(str(module),
                         'MaxPool2D(kernel_size=2, stride=2, padding=0)')

        module = nn.MaxPool3D(kernel_size=2, stride=2, padding=0)
        self.assertEqual(str(module),
                         'MaxPool3D(kernel_size=2, stride=2, padding=0)')

        module = nn.AdaptiveAvgPool1D(output_size=16)
        self.assertEqual(str(module), 'AdaptiveAvgPool1D(output_size=16)')

        module = nn.AdaptiveAvgPool2D(output_size=3)
        self.assertEqual(str(module), 'AdaptiveAvgPool2D(output_size=3)')

        module = nn.AdaptiveAvgPool3D(output_size=3)
        self.assertEqual(str(module), 'AdaptiveAvgPool3D(output_size=3)')

        module = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True)
        self.assertEqual(
            str(module), 'AdaptiveMaxPool1D(output_size=16, return_mask=True)')

        module = nn.AdaptiveMaxPool2D(output_size=3, return_mask=True)
        self.assertEqual(str(module),
                         'AdaptiveMaxPool2D(output_size=3, return_mask=True)')

        module = nn.AdaptiveMaxPool3D(output_size=3, return_mask=True)
        self.assertEqual(str(module),
                         'AdaptiveMaxPool3D(output_size=3, return_mask=True)')

        module = nn.SimpleRNNCell(16, 32)
        self.assertEqual(str(module), 'SimpleRNNCell(16, 32)')

        module = nn.LSTMCell(16, 32)
        self.assertEqual(str(module), 'LSTMCell(16, 32)')

        module = nn.GRUCell(16, 32)
        self.assertEqual(str(module), 'GRUCell(16, 32)')

        module = nn.PixelShuffle(3)
        self.assertEqual(str(module), 'PixelShuffle(upscale_factor=3)')

        module = nn.SimpleRNN(16, 32, 2)
        self.assertEqual(
            str(module),
            'SimpleRNN(16, 32, num_layers=2\n  (0): RNN(\n    (cell): SimpleRNNCell(16, 32)\n  )\n  (1): RNN(\n    (cell): SimpleRNNCell(32, 32)\n  )\n)'
        )

        module = nn.LSTM(16, 32, 2)
        self.assertEqual(
            str(module),
            'LSTM(16, 32, num_layers=2\n  (0): RNN(\n    (cell): LSTMCell(16, 32)\n  )\n  (1): RNN(\n    (cell): LSTMCell(32, 32)\n  )\n)'
        )

        module = nn.GRU(16, 32, 2)
        self.assertEqual(
            str(module),
            'GRU(16, 32, num_layers=2\n  (0): RNN(\n    (cell): GRUCell(16, 32)\n  )\n  (1): RNN(\n    (cell): GRUCell(32, 32)\n  )\n)'
        )

        module1 = nn.Sequential(
            ('conv1', nn.Conv2D(1, 20, 5)), ('relu1', nn.ReLU()),
            ('conv2', nn.Conv2D(20, 64, 5)), ('relu2', nn.ReLU()))
        self.assertEqual(
            str(module1),
            'Sequential(\n  '\
            '(conv1): Conv2D(1, 20, kernel_size=[5, 5], data_format=NCHW)\n  '\
            '(relu1): ReLU()\n  '\
            '(conv2): Conv2D(20, 64, kernel_size=[5, 5], data_format=NCHW)\n  '\
            '(relu2): ReLU()\n)'
        )

        module2 = nn.Sequential(
            nn.Conv3DTranspose(4, 6, (3, 3, 3)),
            nn.AvgPool3D(kernel_size=2, stride=2, padding=0),
            nn.Tanh(name="Tanh"), module1, nn.Conv3D(4, 6, (3, 3, 3)),
            nn.MaxPool3D(kernel_size=2, stride=2, padding=0), nn.GELU(True))
        self.assertEqual(
            str(module2),
            'Sequential(\n  '\
            '(0): Conv3DTranspose(4, 6, kernel_size=[3, 3, 3], data_format=NCDHW)\n  '\
            '(1): AvgPool3D(kernel_size=2, stride=2, padding=0)\n  '\
            '(2): Tanh(name=Tanh)\n  '\
            '(3): Sequential(\n    (conv1): Conv2D(1, 20, kernel_size=[5, 5], data_format=NCHW)\n    (relu1): ReLU()\n'\
            '    (conv2): Conv2D(20, 64, kernel_size=[5, 5], data_format=NCHW)\n    (relu2): ReLU()\n  )\n  '\
            '(4): Conv3D(4, 6, kernel_size=[3, 3, 3], data_format=NCDHW)\n  '\
            '(5): MaxPool3D(kernel_size=2, stride=2, padding=0)\n  '\
            '(6): GELU(approximate=True)\n)'
        )
Пример #13
0
    def __init__(self, _emb_size, _n_layer, _n_head, _voc_size,
                 _max_position_seq_len, _sent_types, hidden_act, _dropout,
                 _attention_dropout, initializer_range):
        super(BertModel, self).__init__()
        self._emb_size = _emb_size
        self._n_layer = _n_layer
        self._n_head = _n_head
        self._voc_size = _voc_size
        self._max_position_seq_len = _max_position_seq_len
        self._sent_types = _sent_types
        hidden_act = hidden_act
        if hidden_act == "gelu":
            self._hidden_act = nn.GELU()
        else:
            self._hidden_act = nn.ReLU()
        self._dropout = _dropout
        self._attention_dropout = _attention_dropout

        self._word_emb_name = "word_embedding"
        self._pos_emb_name = "pos_embedding"
        self._sent_emb_name = "sent_embedding"
        self._dtype = "float32"

        self._param_initializer = nn.initializer.TruncatedNormal(
            std=initializer_range)

        self.word_emb = nn.Embedding(
            num_embeddings=self._voc_size,
            embedding_dim=self._emb_size,
            name=self._word_emb_name,
            weight_attr=paddle.ParamAttr(initializer=self._param_initializer),
            sparse=False)
        self.position_emb = nn.Embedding(
            num_embeddings=self._max_position_seq_len,
            embedding_dim=self._emb_size,
            weight_attr=paddle.ParamAttr(name=self._pos_emb_name,
                                         initializer=self._param_initializer),
            sparse=False)
        self.sent_emb = nn.Embedding(num_embeddings=self._sent_types,
                                     embedding_dim=self._emb_size,
                                     weight_attr=paddle.ParamAttr(
                                         name=self._sent_emb_name,
                                         initializer=self._param_initializer),
                                     sparse=False)
        self.enc_pre_process_layer = NormalizeDropLayer(self._dropout,
                                                        self._emb_size,
                                                        name='pre_encoder')
        self._enc_out_layer = Encoder(
            n_layer=self._n_layer,
            n_head=self._n_head,
            d_key=self._emb_size // self._n_head,
            d_value=self._emb_size // self._n_head,
            d_model=self._emb_size,
            d_inner_hid=self._emb_size * 4,
            attention_dropout=self._attention_dropout,
            hidden_act=self._hidden_act,
            param_initializer=self._param_initializer,
            name='encoder')
        self.mask_trans_feat = nn.Linear(
            in_features=self._emb_size,
            out_features=self._emb_size,
            weight_attr=paddle.ParamAttr(name="mask_lm_trans_fc.w_0",
                                         initializer=self._param_initializer),
            bias_attr=paddle.ParamAttr(name='mask_lm_trans_fc.b_0'))
        self.mask_trans_act = self._hidden_act
        self.mask_post_process_layer = NormalizeLayer(self._emb_size,
                                                      name='mask_lm_trans')
        self.mask_lm_out_bias = self.create_parameter(
            shape=[self._voc_size],
            dtype=self._dtype,
            attr=paddle.ParamAttr(
                name="mask_lm_out_fc.b_0",
                initializer=paddle.nn.initializer.Constant(value=0.0)),
            is_bias=True)
Пример #14
0
# See the License for the specific language governing permissions and
# limitations under the License.

import math
import paddle
from paddle import nn
from .. import PretrainedModel, register_base_model

__all__ = [
    'SqueezeBertModel',
    'SqueezeBertForSequenceClassification',
    'SqueezeBertForTokenClassification',
    'SqueezeBertForQuestionAnswering',
]

ACT2FN = {'gelu': nn.GELU()}


def _convert_attention_mask(attention_mask, inputs):
    if attention_mask.dim() == 3:
        extended_attention_mask = attention_mask.unsqueeze(1)
    elif attention_mask.dim() == 2:
        # extended_attention_mask = attention_mask[:, None, None, :]
        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(1)
    extended_attention_mask = paddle.cast(extended_attention_mask,
                                          inputs.dtype)  # fp16 compatibility
    extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
    return extended_attention_mask


class SqueezeBertEmbeddings(nn.Layer):