Пример #1
0
    def forward(self, x):

        attention = self.attention(x, x, x)
        output_1 = x + attention
        output_1 = LayerNorm(output_1.size()[1:])(output_1)

        feed_forward = self.feed_forward(output_1)
        transformed_skip = output_1 + feed_forward

        return LayerNorm(transformed_skip.size()[1:])(transformed_skip)
Пример #2
0
    def forward(self, x, y):
        """ Forward pass of one decoder stack

        Args:
            x (torch.tensor): The encoded sequence (B, T, dmodel)
            y (torch.tensor): The shifted decoded sequence (B, T, dmodel)
        """
        attention_1 = self.attention_1(y, y, y, maskout=True)
        output_1 = LayerNorm(attention_1.size()[1:])(attention_1 + y)

        attention_2 = self.attention_2(output_1, x, x)
        output_2 = LayerNorm(attention_2.size()[1:])(attention_2 + output_1)

        ff = self.feed_forward(output_2)
        transformed_skip = LayerNorm(output_2.size()[1:])(ff + output_2)

        return x, transformed_skip