示例#1
0
    def __init__(
        self,
        attention_dropout,
        decoder_attention_heads,
        self_attention_heads,
        decoder_conv_dim,
        # ARBABU: need to remove these two type parameters
        decoder_conv_type,
        attention_type,
        self_attention_type,
        decoder_embed_dim,
        decoder_ffn_embed_dim,
        decoder_glu,
        decoder_normalize_before,
        dropout,
        input_dropout,
        relu_dropout,
        need_attention,
        convolution_type,
        conv=None,
        self_attention=None,
        attention=None,
    ):
        super().__init__()
        self.embed_dim = decoder_embed_dim
        self.conv_dim = decoder_conv_dim
        if decoder_glu:
            self.linear1 = Linear(self.embed_dim, 2 * self.conv_dim)
            self.act = nn.GLU()
        else:
            self.linear1 = Linear(self.embed_dim, self.conv_dim)
            self.act = PlaceholderIdentity()
        self.conv = conv
        self.linear2 = Linear(self.conv_dim, self.embed_dim)

        self.dropout = dropout
        self.relu_dropout = relu_dropout
        self.input_dropout = input_dropout
        self.normalize_before = decoder_normalize_before
        self.conv_layer_norm = LayerNorm(self.embed_dim)

        if attention is None:
            self.no_encoder_attn = True
            self.encoder_attn = PlaceholderAttentionIdentity()
            self.encoder_attn_layer_norm = PlaceholderIdentity()
        else:
            self.no_encoder_attn = False
            self.encoder_attn = attention
            self.encoder_attn_layer_norm = LayerNorm(self.embed_dim)
        if self_attention is None:
            self.has_self_attn = False
            self.self_attn = PlaceholderAttentionIdentity()
        else:
            self.has_self_attn = True
            self.self_attn = self_attention
        self.fc1 = Linear(self.embed_dim, decoder_ffn_embed_dim)
        self.fc2 = Linear(decoder_ffn_embed_dim, self.embed_dim)

        self.final_layer_norm = LayerNorm(self.embed_dim)
        self.need_attn = need_attention
示例#2
0
    def __init__(self, target_dict, embed_tokens, layers, decoder_config):
        super().__init__()
        self.dropout = decoder_config.dropout

        input_embed_dim = embed_tokens.embedding_dim
        embed_dim = decoder_config.decoder_embed_dim
        output_embed_dim = decoder_config.decoder_output_dim

        padding_idx = target_dict.get_pad_index()
        self.max_target_positions = decoder_config.max_target_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)  # todo: try with input_embed_dim
        self.padding_idx = padding_idx

        self.no_token_positional_embeddings = (
            decoder_config.no_token_positional_embeddings
        )
        # creating this is also conditional
        self.project_in_dim = (
            Linear(input_embed_dim, embed_dim)
            if embed_dim != input_embed_dim
            else PlaceholderIdentity()
        )
        self.embed_layer_norm = LayerNorm(embed_dim)
        self.combine_pos_embed = decoder_config.combine_pos_embed.value
        self.embed_positions = build_positional_embedding(
            positional_embedding_type=decoder_config.positional_embedding_type,
            combine_pos_embed=decoder_config.combine_pos_embed,
            max_target_positions=decoder_config.max_target_positions,
            input_embed_dim=input_embed_dim,
            embed_dim=embed_dim,
            padding_idx=padding_idx,
            no_token_positional_embeddings=decoder_config.no_token_positional_embeddings,
        )

        self.layers = nn.ModuleList(layers)

        self.project_out_dim = (
            Linear(embed_dim, output_embed_dim, bias=False)
            if embed_dim != output_embed_dim
            else PlaceholderIdentity()
        )

        self.normalize = decoder_config.decoder_normalize_before
        if self.normalize:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = PlaceholderIdentity()
示例#3
0
 def __init__(self, src_dict, dst_dict, out_embed_dim=512, *args, **kwargs):
     super().__init__()
     self.linear_projection = Linear(out_embed_dim, len(dst_dict))
     self.reset_parameters()
示例#4
0
    def __init__(self, target_dict, embed_tokens, layers, decoder_config):
        super().__init__()
        self.dropout = decoder_config.dropout

        input_embed_dim = embed_tokens.embedding_dim
        embed_dim = decoder_config.decoder_embed_dim
        output_embed_dim = decoder_config.decoder_output_dim

        padding_idx = target_dict.get_pad_index()
        self.max_target_positions = decoder_config.max_target_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(
            embed_dim)  # todo: try with input_embed_dim
        self.padding_idx = padding_idx

        self.no_token_positional_embeddings = (
            decoder_config.no_token_positional_embeddings)
        # creating this is also conditional
        self.project_in_dim = (Linear(input_embed_dim, embed_dim)
                               if embed_dim != input_embed_dim else
                               PlaceholderIdentity())
        self.embed_layer_norm = LayerNorm(embed_dim)
        self.combine_pos_embed = decoder_config.combine_pos_embed.value
        if decoder_config.combine_pos_embed == PostionalEmbedCombine.SUM:
            pos_embed_dim = embed_dim
        elif decoder_config.combine_pos_embed == PostionalEmbedCombine.CONCAT:
            pos_embed_dim = embed_dim - input_embed_dim
        else:
            raise NotImplementedError
        if not decoder_config.no_token_positional_embeddings:
            if decoder_config.positional_embedding_type == PostionalEmbedType.LEARNED:
                self.embed_positions = PositionalEmbedding(
                    decoder_config.max_target_positions,
                    pos_embed_dim,
                    padding_idx,
                )
            elif (decoder_config.positional_embedding_type
                  == PostionalEmbedType.SINUSOIDAL
                  or decoder_config.positional_embedding_type
                  == PostionalEmbedType.HYBRID):
                self.embed_positions = SinusoidalPositionalEmbedding(
                    pos_embed_dim,
                    padding_idx,
                    init_size=decoder_config.max_target_positions,
                    learned_embed=decoder_config.positional_embedding_type ==
                    PostionalEmbedType.HYBRID,
                )
            else:
                raise NotImplementedError(
                    "Positional embedding type not supported")
        else:
            self.embed_positions = PlaceholderIdentity()

        self.layers = nn.ModuleList(layers)

        self.project_out_dim = (Linear(embed_dim, output_embed_dim, bias=False)
                                if embed_dim != output_embed_dim else
                                PlaceholderIdentity())

        self.normalize = decoder_config.decoder_normalize_before
        if self.normalize:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = PlaceholderIdentity()