def __init__( self, attention_dropout, decoder_attention_heads, self_attention_heads, decoder_conv_dim, # ARBABU: need to remove these two type parameters decoder_conv_type, attention_type, self_attention_type, decoder_embed_dim, decoder_ffn_embed_dim, decoder_glu, decoder_normalize_before, dropout, input_dropout, relu_dropout, need_attention, convolution_type, conv=None, self_attention=None, attention=None, ): super().__init__() self.embed_dim = decoder_embed_dim self.conv_dim = decoder_conv_dim if decoder_glu: self.linear1 = Linear(self.embed_dim, 2 * self.conv_dim) self.act = nn.GLU() else: self.linear1 = Linear(self.embed_dim, self.conv_dim) self.act = PlaceholderIdentity() self.conv = conv self.linear2 = Linear(self.conv_dim, self.embed_dim) self.dropout = dropout self.relu_dropout = relu_dropout self.input_dropout = input_dropout self.normalize_before = decoder_normalize_before self.conv_layer_norm = LayerNorm(self.embed_dim) if attention is None: self.no_encoder_attn = True self.encoder_attn = PlaceholderAttentionIdentity() self.encoder_attn_layer_norm = PlaceholderIdentity() else: self.no_encoder_attn = False self.encoder_attn = attention self.encoder_attn_layer_norm = LayerNorm(self.embed_dim) if self_attention is None: self.has_self_attn = False self.self_attn = PlaceholderAttentionIdentity() else: self.has_self_attn = True self.self_attn = self_attention self.fc1 = Linear(self.embed_dim, decoder_ffn_embed_dim) self.fc2 = Linear(decoder_ffn_embed_dim, self.embed_dim) self.final_layer_norm = LayerNorm(self.embed_dim) self.need_attn = need_attention
def __init__(self, target_dict, embed_tokens, layers, decoder_config): super().__init__() self.dropout = decoder_config.dropout input_embed_dim = embed_tokens.embedding_dim embed_dim = decoder_config.decoder_embed_dim output_embed_dim = decoder_config.decoder_output_dim padding_idx = target_dict.get_pad_index() self.max_target_positions = decoder_config.max_target_positions self.embed_tokens = embed_tokens self.embed_scale = math.sqrt(embed_dim) # todo: try with input_embed_dim self.padding_idx = padding_idx self.no_token_positional_embeddings = ( decoder_config.no_token_positional_embeddings ) # creating this is also conditional self.project_in_dim = ( Linear(input_embed_dim, embed_dim) if embed_dim != input_embed_dim else PlaceholderIdentity() ) self.embed_layer_norm = LayerNorm(embed_dim) self.combine_pos_embed = decoder_config.combine_pos_embed.value self.embed_positions = build_positional_embedding( positional_embedding_type=decoder_config.positional_embedding_type, combine_pos_embed=decoder_config.combine_pos_embed, max_target_positions=decoder_config.max_target_positions, input_embed_dim=input_embed_dim, embed_dim=embed_dim, padding_idx=padding_idx, no_token_positional_embeddings=decoder_config.no_token_positional_embeddings, ) self.layers = nn.ModuleList(layers) self.project_out_dim = ( Linear(embed_dim, output_embed_dim, bias=False) if embed_dim != output_embed_dim else PlaceholderIdentity() ) self.normalize = decoder_config.decoder_normalize_before if self.normalize: self.layer_norm = LayerNorm(embed_dim) else: self.layer_norm = PlaceholderIdentity()
def __init__(self, src_dict, dst_dict, out_embed_dim=512, *args, **kwargs): super().__init__() self.linear_projection = Linear(out_embed_dim, len(dst_dict)) self.reset_parameters()
def __init__(self, target_dict, embed_tokens, layers, decoder_config): super().__init__() self.dropout = decoder_config.dropout input_embed_dim = embed_tokens.embedding_dim embed_dim = decoder_config.decoder_embed_dim output_embed_dim = decoder_config.decoder_output_dim padding_idx = target_dict.get_pad_index() self.max_target_positions = decoder_config.max_target_positions self.embed_tokens = embed_tokens self.embed_scale = math.sqrt( embed_dim) # todo: try with input_embed_dim self.padding_idx = padding_idx self.no_token_positional_embeddings = ( decoder_config.no_token_positional_embeddings) # creating this is also conditional self.project_in_dim = (Linear(input_embed_dim, embed_dim) if embed_dim != input_embed_dim else PlaceholderIdentity()) self.embed_layer_norm = LayerNorm(embed_dim) self.combine_pos_embed = decoder_config.combine_pos_embed.value if decoder_config.combine_pos_embed == PostionalEmbedCombine.SUM: pos_embed_dim = embed_dim elif decoder_config.combine_pos_embed == PostionalEmbedCombine.CONCAT: pos_embed_dim = embed_dim - input_embed_dim else: raise NotImplementedError if not decoder_config.no_token_positional_embeddings: if decoder_config.positional_embedding_type == PostionalEmbedType.LEARNED: self.embed_positions = PositionalEmbedding( decoder_config.max_target_positions, pos_embed_dim, padding_idx, ) elif (decoder_config.positional_embedding_type == PostionalEmbedType.SINUSOIDAL or decoder_config.positional_embedding_type == PostionalEmbedType.HYBRID): self.embed_positions = SinusoidalPositionalEmbedding( pos_embed_dim, padding_idx, init_size=decoder_config.max_target_positions, learned_embed=decoder_config.positional_embedding_type == PostionalEmbedType.HYBRID, ) else: raise NotImplementedError( "Positional embedding type not supported") else: self.embed_positions = PlaceholderIdentity() self.layers = nn.ModuleList(layers) self.project_out_dim = (Linear(embed_dim, output_embed_dim, bias=False) if embed_dim != output_embed_dim else PlaceholderIdentity()) self.normalize = decoder_config.decoder_normalize_before if self.normalize: self.layer_norm = LayerNorm(embed_dim) else: self.layer_norm = PlaceholderIdentity()