示例#1
0
    def __init__(self,
                 size,
                 dropout,
                 head_count=8,
                 hidden_size=2048,
                 context_size=3,
                 padding_idx=1):
        super(HierarchicalContext, self).__init__()

        self.context_size = context_size
        self.padding_idx = padding_idx

        self.layer_norm_query_word = onmt.modules.LayerNorm(size)
        self.layer_norm_query_sent = onmt.modules.LayerNorm(size)
        self.layer_norm_word = onmt.modules.LayerNorm(size)
        self.layer_norm_sent = onmt.modules.LayerNorm(size)

        self.dropout = nn.Dropout(dropout)

        self.sent_attn = onmt.modules.MultiHeadedAttention(head_count,
                                                           size,
                                                           dropout=dropout)
        self.word_attn = onmt.modules.MultiHeadedAttention(head_count,
                                                           size,
                                                           dropout=dropout)

        self.linear = nn.Linear(2 * size, size)
        self.sigmoid = nn.Sigmoid()

        self.feed_forward = PositionwiseFeedForward(size, hidden_size, dropout)
示例#2
0
文件: Transformer.py 项目: mmjaz/hnmt
    def __init__(self, size, dropout, head_count=8, hidden_size=2048):
        super(TransformerEncoderLayer, self).__init__()

        self.self_attn = onmt.modules.MultiHeadedAttention(head_count,
                                                           size,
                                                           dropout=dropout)
        self.feed_forward = PositionwiseFeedForward(size, hidden_size, dropout)
        self.layer_norm = onmt.modules.LayerNorm(size)
        self.dropout = nn.Dropout(dropout)
示例#3
0
文件: Transformer.py 项目: mmjaz/hnmt
 def __init__(self, size, dropout, head_count=8, hidden_size=2048):
     super(TransformerDecoderLayer, self).__init__()
     self.self_attn = onmt.modules.MultiHeadedAttention(head_count,
                                                        size,
                                                        dropout=dropout)
     self.context_attn = onmt.modules.MultiHeadedAttention(head_count,
                                                           size,
                                                           dropout=dropout)
     self.feed_forward = PositionwiseFeedForward(size, hidden_size, dropout)
     self.layer_norm_1 = onmt.modules.LayerNorm(size)
     self.layer_norm_2 = onmt.modules.LayerNorm(size)
     self.dropout = dropout
     self.drop = nn.Dropout(dropout)
     mask = self._get_attn_subsequent_mask(MAX_SIZE)
     # Register self.mask as a buffer in TransformerDecoderLayer, so
     # it gets TransformerDecoderLayer's cuda behavior automatically.
     self.register_buffer('mask', mask)