def __init__(self, input_size, n_heads, drop_rate):
        super(TransformerBlock, self).__init__()
        # multi-head attention
        self.attentionMH = MultiHeadedAttention(n_heads, input_size, drop_rate)
        # layer normalization
        self.norm1 = LayerNormalization(input_size)
        self.norm2 = LayerNormalization(input_size)
        # layer feed-forward
        self.layer_ff = PositionwiseFeedForward(input_size, input_size * 4,
                                                input_size, drop_rate)

        self.drop = torch.nn.Dropout(drop_rate)
示例#2
0
    def __init__(self,
                 input_size,
                 n_heads,
                 drop_rate,
                 device=torch.device("cpu")):
        super().__init__()
        # multi-head attention
        self.attnSelf = MultiHeadedAttention_Basic(n_heads, input_size,
                                                   drop_rate).to(device)
        self.attnEnc = MultiHeadedAttention_Basic(n_heads, input_size,
                                                  drop_rate).to(device)
        # layer normalization
        self.norm1 = LayerNormalization(input_size).to(device)
        self.norm2 = LayerNormalization(input_size).to(device)
        self.norm3 = LayerNormalization(input_size).to(device)
        # layer feed-forward
        self.pos_ff = PositionwiseFeedForward_Basic(input_size, input_size * 4,
                                                    input_size,
                                                    drop_rate).to(device)

        self.drop = torch.nn.Dropout(drop_rate).to(device)
示例#3
0
    def __init__(self,
                 vocab_size,
                 hidden_size,
                 factor_size,
                 device=torch.device("cpu")):
        super().__init__()

        self.word_embeddings = torch.nn.Embedding(vocab_size, factor_size)
        self.word_trans = torch.nn.Linear(factor_size, hidden_size)
        self.position_embeddings = PositionalEmbedding(factor_size, device)
        self.position_trans = torch.nn.Linear(factor_size, hidden_size)
        self.norm = LayerNormalization(hidden_size)