def __init__(self, dictionary, lang_dictionary, embedding, lang_embedding, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True, encoder_output_units=1024, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, lang_embedding_size=32): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.lang_embeddings_size = lang_embedding_size self.lang_dictionary = lang_dictionary self.embed_langs = lang_embedding self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = embedding self.encoder_output_units = encoder_output_units self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size) self.encoder_cell_proj = Linear(encoder_output_units, hidden_size) self.layers = nn.ModuleList([ LSTMCell( input_size=hidden_size + embed_dim + self.lang_embeddings_size + self.encoder_output_units if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) self.attention = None if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, hidden_size, adaptive_softmax_cutoff, dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__( self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, encoder_output_units=0, attn_type=None, attn_dim=0, need_attn=False, residual=False, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, ): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed if attn_type is None or attn_type.lower() == 'none': # no attention, no encoder output needed (language model case) need_attn = False encoder_output_units = 0 self.need_attn = need_attn self.residual = residual self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units self.layers = nn.ModuleList([ LSTMCell( input_size=encoder_output_units + (embed_dim if layer == 0 else hidden_size), hidden_size=hidden_size, ) for layer in range(num_layers) ]) if attn_type is None or attn_type.lower() == 'none': self.attention = None elif attn_type.lower() == 'bahdanau': self.attention = speech_attention.BahdanauAttention( hidden_size, encoder_output_units, attn_dim, ) elif attn_type.lower() == 'luong': self.attention = speech_attention.LuongAttention( hidden_size, encoder_output_units, ) else: raise ValueError('unrecognized attention type.') if hidden_size + encoder_output_units != out_embed_dim: self.additional_fc = Linear(hidden_size + encoder_output_units, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, hidden_size, adaptive_softmax_cutoff, dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__( self, dictionary: Dictionary, embed_dim: int = 512, hidden_size: int = 512, out_embed_dim: int = 512, num_layers: int = 1, dropout_in: float = 0.1, dropout_out: float = 0.1, attention: bool = True, encoder_embed_dim: int = 512, encoder_output_units: int = 512, pretrained_embed: Optional[nn.Embedding] = None, share_input_output_embed: bool = False, adaptive_softmax_cutoff: Optional[int] = None, ): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.need_attn = True self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units self.layers = nn.ModuleList([ LSTMCell( input_size=hidden_size + embed_dim if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) self.attention = AttentionLayer(hidden_size, encoder_output_units, hidden_size) if attention else None if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, embed_dim, adaptive_softmax_cutoff, dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__(self, dictionary, lang_dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True, encoder_output_units=512, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, lang_embedding_size=32): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.lang_embedding_size = lang_embedding_size self.lang_dictionary = lang_dictionary self.embed_langs = nn.Embedding(len(lang_dictionary), lang_embedding_size) self.need_attn = False self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) self.encoder_output_units = encoder_output_units self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size) self.encoder_cell_proj = Linear(encoder_output_units, hidden_size) input_size = hidden_size + embed_dim + lang_embedding_size + encoder_output_units self.layers = nn.ModuleList([ LSTMCell( input_size=input_size if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) self.attention = None if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) if not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__( self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True, encoder_embed_dim=512, encoder_output_units=512, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, ): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.need_attn = True self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units assert encoder_output_units == hidden_size, \ 'encoder_output_units ({}) != hidden_size ({})'.format(encoder_output_units, hidden_size) # TODO another Linear layer if not equal self.layers = nn.ModuleList([ LSTMCell( input_size=encoder_output_units + embed_dim if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) self.attention_1 = AttentionLayer(encoder_output_units, hidden_size) if attention else None self.attention_2 = AttentionLayer(encoder_output_units, hidden_size) if attention else None # self.attention_combine_fc = Linear(2 * hidden_size, hidden_size) if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, embed_dim, adaptive_softmax_cutoff, dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__( self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, encoder_output_units=0, attn_type=None, attn_dim=0, need_attn=False, residual=False, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, max_target_positions=DEFAULT_MAX_TARGET_POSITIONS, scheduled_sampling_rate_scheduler=None, ): super().__init__(dictionary) self.dropout_in_module = FairseqDropout( dropout_in * 1.0, module_name=self.__class__.__name__ ) self.dropout_out_module = FairseqDropout( dropout_out * 1.0, module_name=self.__class__.__name__ ) self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed if attn_type is None or str(attn_type).lower() == "none": # no attention, no encoder output needed (language model case) need_attn = False encoder_output_units = 0 self.need_attn = need_attn self.residual = residual self.max_target_positions = max_target_positions self.num_layers = num_layers self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units self.layers = nn.ModuleList( [ LSTMCell( input_size=encoder_output_units + (embed_dim if layer == 0 else hidden_size), hidden_size=hidden_size, ) for layer in range(num_layers) ] ) if attn_type is None or str(attn_type).lower() == "none": self.attention = None elif str(attn_type).lower() == "bahdanau": self.attention = speech_attention.BahdanauAttention( hidden_size, encoder_output_units, attn_dim, ) elif str(attn_type).lower() == "luong": self.attention = speech_attention.LuongAttention( hidden_size, encoder_output_units, ) else: raise ValueError("unrecognized attention type.") if hidden_size + encoder_output_units != out_embed_dim: self.additional_fc = Linear( hidden_size + encoder_output_units, out_embed_dim ) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax( num_embeddings, hidden_size, adaptive_softmax_cutoff, dropout=dropout_out, ) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out) self.scheduled_sampling_rate_scheduler = scheduled_sampling_rate_scheduler
def __init__(self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True, encoder_output_units=512, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, use_scratchpad=False, residual=False): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.need_attn = True self.use_scratchpad = use_scratchpad self.residual = residual self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units if encoder_output_units != hidden_size: self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size) self.encoder_cell_proj = Linear(encoder_output_units, hidden_size) else: self.encoder_hidden_proj = self.encoder_cell_proj = None self.layers = nn.ModuleList([ LSTMCell( input_size=hidden_size + embed_dim if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) if attention: # TODO make bias configurable self.attention = AttentionLayer(hidden_size, encoder_output_units, hidden_size, bias=False) else: self.attention = None if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, embed_dim, adaptive_softmax_cutoff, dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out) #EDITED if self.use_scratchpad: self.attentive_writer = AttentiveWriter(hidden_size, encoder_output_units, encoder_output_units)