Пример #1
0
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(DecoderLayer, self).__init__()

        self.mha1 = MultiHeadAttention(d_model, num_heads)
        self.mha2 = MultiHeadAttention(d_model, num_heads)

        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.dropout3 = tf.keras.layers.Dropout(rate)
Пример #2
0
 def __init__(self,
              d_model=512,
              d_k=64,
              d_v=64,
              h=8,
              d_ff=2048,
              dropout=.1,
              identity_map_reordering=False,
              attention_module=None,
              attention_module_kwargs=None):
     super(EncoderLayer, self).__init__()
     self.identity_map_reordering = identity_map_reordering
     self.mhatt = MultiHeadAttention(
         d_model,
         d_k,
         d_v,
         h,
         dropout,
         identity_map_reordering=identity_map_reordering,
         attention_module=attention_module,
         attention_module_kwargs=attention_module_kwargs)
     self.pwff = PositionWiseFeedForward(
         d_model,
         d_ff,
         dropout,
         identity_map_reordering=identity_map_reordering)
Пример #3
0
    def __init__(self, d_model=512, d_k=64, d_v=64, h=8, d_ff=2048, dropout=.1, self_att_module=None,
                 enc_att_module=None, self_att_module_kwargs=None, enc_att_module_kwargs=None):
        super(MeshedDecoderLayer, self).__init__()
        self.self_att = MultiHeadAttention(d_model, d_k, d_v, h, dropout, can_be_stateful=True,
                                           attention_module=self_att_module,
                                           attention_module_kwargs=self_att_module_kwargs)
        self.enc_att = MultiHeadAttention(d_model, d_k, d_v, h, dropout, can_be_stateful=False,
                                          attention_module=enc_att_module,
                                          attention_module_kwargs=enc_att_module_kwargs)
        self.pwff = PositionWiseFeedForward(d_model, d_ff, dropout)

        self.fc_alpha1 = nn.Linear(d_model + d_model, d_model)
        self.fc_alpha2 = nn.Linear(d_model + d_model, d_model)
        self.fc_alpha3 = nn.Linear(d_model + d_model, d_model)

        self.init_weights()
Пример #4
0
    def __init__(self,
                 hid_dim,
                 emb_dim,
                 score_fn=F.softmax,
                 scaling_energy=False,
                 multi_head=False,
                 d_keys_values=64,
                 dropout=0.):
        super().__init__()
        self.score_fn = score_fn
        self.scale = torch.sqrt(torch.FloatTensor([0.5]))
        self.multi_head = multi_head

        self.scaling_energy = torch.sqrt(torch.Tensor(
            [emb_dim])) if scaling_energy else 1

        self.attention_hid2emb = nn.Linear(hid_dim, emb_dim)
        self.attention_emb2hid = nn.Linear(emb_dim, hid_dim)
        self.p_attention_emb2hid = nn.Linear(emb_dim, hid_dim)

        if multi_head:
            self.multi_head_att = MultiHeadAttention(emb_dim,
                                                     d_keys_values,
                                                     d_keys_values,
                                                     emb_dim // d_keys_values,
                                                     dropout=dropout)
Пример #5
0
  def __init__(self, d_model, num_heads, dff, rate=0.1):
    super(LastDecoderLayer, self).__init__()

    self.mha1 = MultiHeadAttention(d_model, num_heads)
    self.mha2 = PointerMultiHeadAttention(d_model, num_heads)

    self.pointer_attention = PointerAttention()

    self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    
    self.dropout1 = tf.keras.layers.Dropout(rate)
Пример #6
0
 def __init__(self,
              d_model=512,
              d_k=64,
              d_v=64,
              h=8,
              d_ff=2048,
              dropout=.1,
              identity_map_reordering=False,
              self_att_module=None,
              enc_att_module=None,
              self_att_module_kwargs=None,
              enc_att_module_kwargs=None):
     super(DecoderLayer, self).__init__()
     self.self_att = MultiHeadAttention(
         d_model,
         d_k,
         d_v,
         h,
         dropout,
         can_be_stateful=True,
         identity_map_reordering=identity_map_reordering,
         attention_module=self_att_module,
         attention_module_kwargs=self_att_module_kwargs)
     self.enc_att = MultiHeadAttention(
         d_model,
         d_k,
         d_v,
         h,
         dropout,
         can_be_stateful=False,
         identity_map_reordering=identity_map_reordering,
         attention_module=enc_att_module,
         attention_module_kwargs=enc_att_module_kwargs)
     self.pwff = PositionWiseFeedForward(
         d_model,
         d_ff,
         dropout,
         identity_map_reordering=identity_map_reordering)