def __init__( self, conv_layers_before=None, input_size=83, hidden_size=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False, residual=False, left_pad=False, pretrained_embed=None, padding_value=0., ): super().__init__(None) # no src dictionary self.conv_layers_before = conv_layers_before self.num_layers = num_layers self.dropout_in = dropout_in self.dropout_out = dropout_out self.bidirectional = bidirectional self.hidden_size = hidden_size self.residual = residual self.lstm = nn.ModuleList([ LSTM( input_size=input_size if layer == 0 else 2 * hidden_size if self.bidirectional else hidden_size, hidden_size=hidden_size, bidirectional=bidirectional, ) for layer in range(num_layers) ]) self.left_pad = left_pad self.padding_value = padding_value self.output_units = hidden_size if bidirectional: self.output_units *= 2
def __init__( self, conv_layers_before=None, input_size=83, hidden_size=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False, residual=False, left_pad=False, padding_value=0., src_bucketed=False, max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS, ): super().__init__(None) # no src dictionary self.conv_layers_before = conv_layers_before self.num_layers = num_layers self.dropout_in_module = FairseqDropout(dropout_in, module_name=self.__class__.__name__) self.dropout_out_module = FairseqDropout(dropout_out, module_name=self.__class__.__name__) self.bidirectional = bidirectional self.hidden_size = hidden_size self.residual = residual self.max_source_positions = max_source_positions self.lstm = nn.ModuleList([ LSTM( input_size=input_size if layer == 0 else 2 * hidden_size if self.bidirectional else hidden_size, hidden_size=hidden_size, bidirectional=bidirectional, ) for layer in range(num_layers) ]) self.left_pad = left_pad self.padding_value = padding_value self.src_bucketed = src_bucketed self.output_units = hidden_size if bidirectional: self.output_units *= 2
def __init__( self, dictionary, embed_dim=512, hidden_size=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False, left_pad_source=True, pretrained_embed=None, padding_idx=None, max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS, rnn_type="gru" ): super().__init__(dictionary) self.num_layers = num_layers self.dropout_in_module = FairseqDropout( dropout_in, module_name=self.__class__.__name__ ) self.dropout_out_module = FairseqDropout( dropout_out, module_name=self.__class__.__name__ ) self.bidirectional = bidirectional self.hidden_size = hidden_size self.max_source_positions = max_source_positions num_embeddings = len(dictionary) self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad() if pretrained_embed is None: self.embed_tokens = torch.nn.Embedding(num_embeddings, embed_dim, self.padding_idx) else: self.embed_tokens = pretrained_embed self.rnn_type = rnn_type if rnn_type == "gru": self.hidden = GRU( input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, dropout=self.dropout_out_module.p if num_layers > 1 else 0.0, bidirectional=bidirectional, ) elif rnn_type == "lstm": self.hidden = LSTM( input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, dropout=self.dropout_out_module.p if num_layers > 1 else 0.0, bidirectional=bidirectional, ) self.left_pad_source = left_pad_source self.output_units = hidden_size if bidirectional: self.bidir_dense = torch.nn.Linear(2, 1)
def __init__( self, dictionary, embed_dim, hidden_size=512, bidirectional=True, num_layers=2, no_token_rnn=False, ): super().__init__(dictionary) self.need_rnn = not no_token_rnn self.hidden_size = hidden_size self.bidirectional = bidirectional self.num_layers = num_layers self.rnn = LSTM( input_size=embed_dim, hidden_size=self.hidden_size, num_layers=self.num_layers, bidirectional=self.bidirectional, ) if self.need_rnn else None hidden_size = self.hidden_size if self.need_rnn else embed_dim self.classifier = nn.Linear(hidden_size, len(dictionary))
def __init__( self, dictionary, embed_dim=512, hidden_size=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False, left_pad=True, pretrained_embed=None, padding_value=0., ): super().__init__(dictionary) self.num_layers = num_layers self.dropout_in = dropout_in self.dropout_out = dropout_out self.bidirectional = bidirectional self.hidden_size = hidden_size num_embeddings1 = len(dictionary[0]) num_embeddings2 = len(dictionary[1]) self.padding_idx_1 = dictionary[0].pad() self.padding_idx_2 = dictionary[1].pad() if pretrained_embed is None: self.embed_tokens_1 = Embedding(num_embeddings1, embed_dim, self.padding_idx_1) if bidirectional: self.embed_tokens_2 = Embedding(num_embeddings2, 2 * embed_dim, self.padding_idx_2) else: self.embed_tokens_2 = Embedding(num_embeddings2, embed_dim, self.padding_idx_2) else: self.embed_tokens_1, self.embed_tokens_2 = pretrained_embed self.lstm1 = LSTM( input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, dropout=self.dropout_out if num_layers > 1 else 0., bidirectional=bidirectional, ) # self.lstm2 = LSTM( # input_size=embed_dim, # hidden_size=hidden_size, # num_layers=num_layers, # dropout=self.dropout_out if num_layers > 1 else 0., # bidirectional=bidirectional, # ) if self.bidirectional: self.fconv2 = FConvEncoder(dictionary[1], 2 * embed_dim, convolutions=[(512, 3)] * 15, dropout=dropout_in, left_pad=left_pad) else: self.fconv2 = FConvEncoder(dictionary[1], embed_dim, convolutions=[(512, 3)] * 15, dropout=dropout_in, left_pad=left_pad) self.fconv2.num_attention_layers = 1 self.left_pad = left_pad self.padding_value = padding_value self.output_units = hidden_size if bidirectional: self.output_units *= 2
def __init__( self, pre_encoder=None, input_size=83, hidden_size=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False, residual=False, left_pad=False, padding_value=0.0, src_bucketed=False, max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS, multilayer_rnn_as_single_module=False, ): super().__init__(None) # no src dictionary self.pre_encoder = pre_encoder self.num_layers = num_layers self.dropout_in_module = FairseqDropout( dropout_in * 1.0, module_name=self.__class__.__name__ ) self.dropout_out_module = FairseqDropout( dropout_out * 1.0, module_name=self.__class__.__name__ ) self.bidirectional = bidirectional self.hidden_size = hidden_size self.residual = residual self.max_source_positions = max_source_positions # enforce deterministic behavior (https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html) os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8" self.multilayer_rnn_as_single_module = multilayer_rnn_as_single_module if self.multilayer_rnn_as_single_module: self.lstm = LSTM( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=self.dropout_out_module.p if num_layers > 1 else 0.0, bidirectional=bidirectional, ) else: self.lstm = nn.ModuleList( [ LSTM( input_size=input_size if layer == 0 else 2 * hidden_size if self.bidirectional else hidden_size, hidden_size=hidden_size, bidirectional=bidirectional, ) for layer in range(num_layers) ] ) self.left_pad = left_pad self.padding_value = padding_value self.src_bucketed = src_bucketed self.output_units = hidden_size if bidirectional: self.output_units *= 2
def __init__(self, dictionary, embed_tokens, embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False, left_pad=False, padding_value=0., adaptive_softmax=False, adaptive_softmax_cutoff=[], adaptive_softmax_dropout=0.1, adaptive_softmax_factor=None): super(LSTMTaggerDecoder, self).__init__(dictionary=dictionary) if hasattr(embed_tokens, "embedded_dim"): self.in_embed_dim = embed_tokens.embedded_dim elif hasattr(embed_tokens, "embed_dim"): self.in_embed_dim = embed_tokens.embed_dim elif hasattr(embed_tokens, "embedding_dim"): self.in_embed_dim = embed_tokens.embedding_dim else: raise Exception self.output_units = self.embed_dim = embed_dim self.out_embed_dim = len(dictionary) self.num_layers = num_layers self.dropout_in = dropout_in self.dropout_out = dropout_out self.bidirectional = bidirectional if self.bidirectional: #self.output_units *= 2 pass self.padding_idx = dictionary.pad() self.padding_value = 0. self.left_pad = left_pad self.embed_tokens = embed_tokens self.fc_in = self.fc_out1 = self.fc_out2 = None if self.in_embed_dim != self.embed_dim: self.fc_in = Linear(self.in_embed_dim, self.embed_dim) if self.output_units != self.embed_dim: self.fc_out1 = Linear(self.output_units, self.embed_dim) if self.embed_dim != self.out_embed_dim: self.fc_out2 = Linear(self.embed_dim, self.out_embed_dim) self.lstm = LSTM( input_size=embed_dim, hidden_size=embed_dim, num_layers=num_layers, dropout=self.dropout_out if num_layers > 1 else 0., bidirectional=bidirectional, ) self.adaptive_softmax = None if adaptive_softmax: self.adaptive_softmax = AdaptiveSoftmax( len(dictionary), self.embed_dim, adaptive_softmax_cutoff, dropout=adaptive_softmax_dropout, adaptive_inputs=None, factor=adaptive_softmax_factor, tie_proj=False, )
def __init__( self, dictionary, rnn_type="lstm", embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention_type="luong-dot", encoder_output_units=512, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, max_target_positions=DEFAULT_MAX_TARGET_POSITIONS, residuals=False, ): super().__init__(dictionary) self.dropout_in_module = FairseqDropout( dropout_in, module_name=self.__class__.__name__ ) self.dropout_out_module = FairseqDropout( dropout_out, module_name=self.__class__.__name__ ) self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.need_attn = True self.max_target_positions = max_target_positions self.residuals = residuals self.num_layers = num_layers self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = torch.nn.Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units if encoder_output_units != hidden_size and encoder_output_units != 0: self.encoder_hidden_proj = torch.nn.Linear(encoder_output_units, hidden_size) self.encoder_cell_proj = torch.nn.Linear(encoder_output_units, hidden_size) else: self.encoder_hidden_proj = self.encoder_cell_proj = None # input feeding is described in arxiv.org/abs/1508.04025 input_feed_size = 0 if encoder_output_units == 0 else hidden_size # For Bahdanau, we compute the context on the input feed bahd_factor = hidden_size \ if attention_type in ["bahdanau-dot", "bahdanau-concat", "bahdanau-general", "bahdanau"] \ else 0 self.rnn_type = rnn_type if rnn_type == "lstm": self.layers = LSTM( input_size=input_feed_size + embed_dim + bahd_factor, hidden_size=hidden_size, num_layers=num_layers ) else: self.layers = GRU( input_size=input_feed_size + embed_dim + bahd_factor, hidden_size=hidden_size, num_layers=num_layers ) if attention_type == "none": self.attention_type = "none" self.attention = None else: self.attention_type = attention_type self.attention = Attention(self.attention_type, hidden_size) if hidden_size != out_embed_dim: self.additional_fc = torch.nn.Linear(hidden_size, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax( num_embeddings, hidden_size, adaptive_softmax_cutoff, dropout=dropout_out, ) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)