示例#1
0
 def __init__(self, dim, head_count, hidden_size, dropout, max_size=400):
     super(TransformerDecoderLayer, self).__init__()
     self.self_attn = attention.MultiHeadedAttention(head_count, dim, dropout)
     self.context_attn = attention.MultiHeadedAttention(head_count, dim, dropout)
     self.feed_forward = modules.PositionwiseFeedForward(dim, hidden_size, dropout)
     self.layer_norm1 = modules.LayerNorm(dim)
     self.layer_norm2 = modules.LayerNorm(dim)
     self.dropout = dropout
     self.drop = nn.Dropout(dropout)
     mask = self._get_attn_subsequent_mask(max_size)
     self.register_buffer('mask', mask)
示例#2
0
 def __init__(self, embeddings, num_layers, head_count, hidden_size, dropout):
     super(TransformerDecoder, self).__init__()
     self.num_layers = num_layers
     self.embeddings = embeddings
     dim = embeddings[0].weight.shape[1]
     self.transformer_layers = nn.ModuleList([TransformerDecoderLayer(dim, head_count, hidden_size, dropout) for _ in range(num_layers)])
     self.layer_norm = modules.LayerNorm(dim)
 def __init__(self, dim, head_count, hidden_size, dropout):
     super(TransformerEncoderLayer, self).__init__()
     self.self_attn = attention.MultiHeadedAttention(
         head_count, dim, dropout)
     self.feed_forward = modules.PositionwiseFeedForward(
         dim, hidden_size, dropout)
     self.layer_norm = modules.LayerNorm(dim)
     self.dropout = nn.Dropout(dropout)