def __init__(self, d_sentence_len: int, d_model: int, q: int, v: int, h: int, attention_size: int = None, dropout: float = 0.3): """Initialize the Decoder block""" super().__init__() chunk_mode_modules = { 'chunk': MultiHeadAttentionChunk, 'window': MultiHeadAttentionWindow, } MHA = MultiHeadAttention self._selfAttention = MHA(d_sentence_len, d_model, q, v, h) self._encoderDecoderAttention = MHA(d_sentence_len, d_model, q, v, h) self._feedForward = PositionwiseFeedForward(d_model) self._layerNorm1 = nn.LayerNorm(d_model) self._layerNorm2 = nn.LayerNorm(d_model) self._layerNorm3 = nn.LayerNorm(d_model) self._dopout = nn.Dropout(p=dropout)
def __init__(self, d_model: int, q: int, v: int, h: int, attention_size: int = None, dropout: float = 0.3, chunk_mode: str = 'chunk'): """Initialize the Decoder block""" super().__init__() chunk_mode_modules = { 'chunk': MultiHeadAttentionChunk, 'window': MultiHeadAttentionWindow, } if chunk_mode in chunk_mode_modules.keys(): MHA = chunk_mode_modules[chunk_mode] elif chunk_mode is None: MHA = MultiHeadAttention else: raise NameError( f'chunk_mode "{chunk_mode}" not understood. Must be one of {", ".join(chunk_mode_modules.keys())} or None.') self._selfAttention = MHA(d_model, q, v, h, attention_size=attention_size) self._encoderDecoderAttention = MHA(d_model, q, v, h, attention_size=attention_size) self._feedForward = PositionwiseFeedForward(d_model) self._layerNorm1 = nn.LayerNorm(d_model) self._layerNorm2 = nn.LayerNorm(d_model) self._layerNorm3 = nn.LayerNorm(d_model) self._dopout = nn.Dropout(p=dropout)
def __init__(self, d_model: int, q: int, v: int, h: int, dropout: float = 0.3): """Initialize the Encoder block""" super().__init__() MHA = MultiHeadAttention self._selfAttention = MHA(d_model, q, v, h) self._feedForward = PositionwiseFeedForward(d_model) # 归一化层 不过不是将数据限定在 0-1 之间或者 高斯分布(正态分布)上 self._layerNorm1 = nn.LayerNorm(d_model) self._layerNorm2 = nn.LayerNorm(d_model) # Dropout 源码就是调用了 function中的dropout函数 self._dropout = nn.Dropout(p=dropout)