Exemplo n.º 1
0
 def forward(self, token_inp, pos_inp, type_inp, turn_inp):
     embed = self.token_embedding(token_inp) + \
         self.pos_embedding(pos_inp) + \
         self.type_embedding(type_inp) + \
         self.turn_embedding(turn_inp)
     embed = F.dropout(embed, self.dropout)
     return embed
Exemplo n.º 2
0
    def forward(self, inp, mask=None, cache=None):
        """
        Forward process on one transformer layer.

        @param : x
        @type : Variable(shape: [batch_size, seq_len, hidden_size])

        @param : memory
        @type : Variable(shape: [batch_size, seq_len, hidden_size])

        @param : mask

        @param : cache
        """
        attn_out = self.attn(inp, mask, cache)
        attn_out = F.dropout(attn_out, self.dropout)
        attn_out = self.attn_norm(attn_out + inp)

        ff_out = self.ff(attn_out)
        ff_out = F.dropout(ff_out, self.dropout)
        ff_out = self.ff_norm(ff_out + attn_out)

        return ff_out
Exemplo n.º 3
0
    def _attn(self, query, key, value, mask):
        # shape: [batch_size, num_head, seq_len, seq_len]
        scores = layers.matmul(x=query, y=key, alpha=self.scale)

        if mask is not None:
            mask = F.unsqueeze(mask, [1])
            mask = layers.expand(mask, [1, self.num_heads, 1, 1])
            mask.stop_gradient = True
            scores = (1 - mask) * scores + layers.scale(mask, scale=-1e10)

        attn = layers.softmax(scores, axis=-1)
        attn = F.dropout(attn, self.dropout)

        if mask is not None:
            attn = (1 - mask) * attn

        out = layers.matmul(x=attn, y=value)
        return out
Exemplo n.º 4
0
 def forward(self, x):
     out = self.linear_hidden(x)
     out = F.dropout(out, self.dropout)
     out = self.linear_out(out)
     return out