Пример #1
0
    def __init__(self,
                 input_size,
                 n_heads,
                 filter_size,
                 hidden_size,
                 dropout = None) -> None:
        super().__init__()
        self.self_norm = nn.LayerNorm(input_size)
        self.self_attention = MultiHeadAttention(n_heads,[input_size,input_size])

        self.cross_attention = MultiHeadAttention(n_heads,[input_size,input_size])
        self.cross_norm_source = nn.LayerNorm(input_size)
        self.cross_norm_target = nn.LayerNorm(input_size)
        self.feed_forward = TransformerFeedForward(input_size, filter_size, hidden_size, dropout)
 def __init__(self,
              n_heads,
              filter_size,
              hidden_size,
              dropout = None) -> None:
     super().__init__()
     self.norm = LayerNorm()
     self.self_attention = MultiHeadAttention(n_heads)
     self.feed_forward = TransformerFeedForward(filter_size, hidden_size, dropout)