Exemplo n.º 1
0
 def __init__(self, d_model, heads, dropout=0.1):
     super(EncoderLayer, self).__init__()
     
     self.norm = Norm(d_model)
     self.dropout = nn.Dropout(dropout)
     self.attention_layer = MultiHeadedSelfAttention(heads, d_model, dropout=dropout)
     self.ffnn_layer = FeedForward(d_model, dropout=dropout)
Exemplo n.º 2
0
 def __init__(self, d_model, heads, dropout=0.1):
     super().__init__()
     self.norm_1 = Norm(d_model)
     self.norm_2 = Norm(d_model)
     self.attn = MultiHeadAttention(heads, d_model, dropout=dropout)
     self.ff = FeedForward(d_model, dropout=dropout)
     self.dropout_1 = nn.Dropout(dropout)
     self.dropout_2 = nn.Dropout(dropout)
Exemplo n.º 3
0
 def __init__(self, d_model, heads, dropout=0.1):
     super(DecoderLayer, self).__init__()
     
     self.norm = Norm(d_model)
     self.dropout = nn.Dropout(dropout)
     
     # in the decoder, the self-attention layer is only allowed to attend to earlier positions in the output sequence
     # this is different than the encoder counterparts
     self.attention_layer = MultiHeadedSelfAttention(heads, d_model, dropout=dropout)
     self.encoder_decoder_attention_layer = MultiHeadedSelfAttention(heads, d_model, dropout=dropout)
     self.ffnn_layer = FeedForward(d_model, dropout=dropout)
Exemplo n.º 4
0
    def __init__(self, d_model, heads, decoder_extra_layers, dropout=0.1):
        super().__init__()
        self.decoder_extra_layers = decoder_extra_layers

        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)
        self.norm_3 = Norm(d_model)

        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        self.dropout_3 = nn.Dropout(dropout)

        self.attn_1 = MultiHeadAttention(heads, d_model, dropout=dropout)
        self.attn_2 = MultiHeadAttention(heads, d_model, dropout=dropout)
        self.ff = FeedForward(d_model, dropout=dropout)
Exemplo n.º 5
0
 def __init__(self, d_model, dropout=0.1):
     super().__init__()
     self.ff = FeedForward(d_model, dropout=dropout)