def test_convert_padding_direction(self): pad = 1 left_pad = torch.LongTensor([ [2, 3, 4, 5, 6], [1, 7, 8, 9, 10], [1, 1, 1, 11, 12], ]) right_pad = torch.LongTensor([ [2, 3, 4, 5, 6], [7, 8, 9, 10, 1], [11, 12, 1, 1, 1], ]) self.assertAlmostEqual( right_pad, utils.convert_padding_direction( left_pad, pad, left_to_right=True, ), ) self.assertAlmostEqual( left_pad, utils.convert_padding_direction( right_pad, pad, right_to_left=True, ), )
def forward(self, src_tokens, src_lengths): # Task, and in particular the ``'net_input'`` key in each # mini-batch. We discuss Tasks in the next tutorial, but for now just # know that *src_tokens* has shape `(batch, src_len)` and *src_lengths* # has shape `(batch)`. # Note that the source is typically padded on the left. This can be # configured by adding the `--left-pad-source "False"` command-line # argument, but here we'll make the Encoder handle either kind of # padding by converting everything to be right-padded. if self.args.left_pad_source: # Convert left-padding to right-padding. src_tokens = utils.convert_padding_direction( src_tokens, padding_idx=self.dictionary.pad(), left_to_right=True) # Return the Encoder's output. This can be any object and will be # passed directly to the Decoder. debug_out = self.dictionary.string(src_tokens, bpe_symbol=None, escape_unk=False) batch_penalties = [] for line in debug_out.split("\n"): penalties = make_word_penalties_tokens( line=line, vocab=self.vocab_set, mapx=self.mapx, dictionary=self.dictionary, ) batch_penalties.append(penalties) return batch_penalties
def forward(self, src_tokens, src_lengths): # The inputs to the ``forward()`` function are determined by the # Task, and in particular the ``'net_input'`` key in each # mini-batch. We discuss Tasks in the next tutorial, but for now just # know that *src_tokens* has shape `(batch, src_len)` and *src_lengths* # has shape `(batch)`. # Note that the source is typically padded on the left. This can be # configured by adding the `--left-pad-source "False"` command-line # argument, but here we'll make the Encoder handle either kind of # padding by converting everything to be right-padded. # 全弄成右padding if self.args.left_pad_source: # Convert left-padding to right-padding. src_tokens = utils.convert_padding_direction( src_tokens, padding_idx=self.dictionary.pad(), left_to_right=True ) # Embed the source. x = self.embed_tokens(src_tokens) # Apply dropout. x = self.dropout(x) # Pack the sequence into a PackedSequence object to feed to the LSTM. x = nn.utils.rnn.pack_padded_sequence(x, src_lengths, batch_first=True) # Get the output from the LSTM. _outputs, (final_hidden, _final_cell) = self.lstm(x)
def forward(self, src_tokens, src_lengths): if self.left_pad: # nn.utils.rnn.pack_padded_sequence requires right-padding; # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, self.padding_idx, left_to_right=True, ) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist()) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.new_zeros(*state_size) c0 = x.new_zeros(*state_size) packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_value) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: def combine_bidir(outs): out = outs.view(self.num_layers, 2, bsz, -1).transpose(1, 2).contiguous() return out.view(self.num_layers, bsz, -1) final_hiddens = combine_bidir(final_hiddens) final_cells = combine_bidir(final_cells) encoder_padding_mask = src_tokens.eq(self.padding_idx).t() # saving the output to a file if self._encoder_states_dir: self._save_encoder_state(x, "batch-%s.pt") return { 'encoder_out': (x, final_hiddens, final_cells), 'encoder_padding_mask': encoder_padding_mask if encoder_padding_mask.any() else None }
def _make_sample(self, batch=None, xs=None, ys=None): """Generate a sample object that Fairseq expects.""" # add extra info to samples if batch is None and xs is None: raise ValueError("Must supply either batch or xs") if batch is None and ys is None: raise ValueError("Must supply either batch or ys") if xs is None: xs = batch.text_vec if ys is None: ys = batch.label_vec repadded = convert_padding_direction(xs, self.dict.pad(), right_to_left=True) sample = {} sample["id"] = torch.arange(len(xs) - 1) sample["net_input"] = { "src_tokens": repadded, "src_lengths": self._seq_length(xs), } if ys is not None: sample["target"] = ys sample["ntokens"] = sum(self._seq_length(ys)).item() sample["net_input"]["prev_output_tokens"] = self._right_shifted_ys( ys) return sample
def forward(self, src_tokens, src_lengths): if self.left_pad: # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, self.padding_idx, left_to_right=True, ) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist()) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.data.new(*state_size).zero_() c0 = x.data.new(*state_size).zero_() packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_value) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: def combine_bidir(outs): return torch.cat([ torch.cat([outs[2 * i], outs[2 * i + 1]], dim=0).view( 1, bsz, self.output_units) for i in range(self.num_layers) ], dim=0) final_hiddens = combine_bidir(final_hiddens) final_cells = combine_bidir(final_cells) encoder_padding_mask = src_tokens.eq(self.padding_idx).t() return { 'encoder_out': (x, final_hiddens, final_cells), 'encoder_padding_mask': encoder_padding_mask if encoder_padding_mask.any() else None }
def forward(self, src_tokens, src_lengths): # The inputs to the ``forward()`` function are determined by the # Task, and in particular the ``'net_input'`` key in each # mini-batch. We discuss Tasks in the next tutorial, but for now just # know that *src_tokens* has shape `(batch, src_len)` and *src_lengths* # has shape `(batch)`. # Note that the source is typically padded on the left. This can be # configured by adding the `--left-pad-source "False"` command-line # argument, but here we'll make the Encoder handle either kind of # padding by converting everything to be right-padded. if self.args.left_pad_source: # Convert left-padding to right-padding. src_tokens = utils.convert_padding_direction( src_tokens, padding_idx=self.dictionary.pad(), left_to_right=True ) bsz, seqlen = src_tokens.size() # Embed the source. x = self.embed_tokens(src_tokens) # Apply dropout. x = self.dropout(x) # Pack the sequence into a PackedSequence object to feed to the LSTM. x = nn.utils.rnn.pack_padded_sequence(x, src_lengths, batch_first=True) # Get the output from the LSTM. _outputs, (_final_hidden, _final_cell) = self.lstm(x) x, _ = nn.utils.rnn.pad_packed_sequence(_outputs, padding_value=0) assert list(x.size()) == [seqlen, bsz, 2*self.hidden_dim] final_hidden = torch.mean(x, dim=0) assert list(final_hidden.size()) == [bsz, 2*self.hidden_dim] mu = self.context_to_mu(final_hidden) logvar = self.context_to_logvar(final_hidden) std = torch.exp(0.5 * logvar) z = torch.randn(mu.size()) if torch.cuda.is_available(): z = z.cuda() z = z * std + mu # Return the Encoder's output. This can be any object and will be # passed directly to the Decoder. return { # this will have shape `(bsz, hidden_dim)` 'final_hidden': z, 'logvar': logvar, 'mu': mu }
def forward(self, src_tokens, src_lengths): src_tokens = src_tokens.t() if self.left_pad: # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, self.padding_idx, left_to_right=True, ) # src_tokens: B x T x = F.dropout(self.embed_tokens(src_tokens), p=self.dropout, training=self.training) # pack embedded source tokens into a PackedSequence if not hasattr( self, 'lstm_state') or self.lstm_state[0].size(1) != x.size(1): self.lstm_state = tuple((x.new_zeros(self.recurrent_layers * 2, x.size(1), x.size(2)), x.new_zeros(self.recurrent_layers * 2, x.size(1), x.size(2)))) for state in self.lstm_state: nn.init.normal_(state, mean=0, std=0.1) self.lstm_state = tuple( (Parameter(state, requires_grad=self.learn_initial) for state in self.lstm_state)) x, s = self.recurrent(x, self.lstm_state) # unpack outputs and apply dropout x = F.dropout(x, p=self.dropout, training=self.training) encoder_padding_mask = self.create_mask(src_lengths) if encoder_padding_mask is not None: x = x.masked_fill_( encoder_padding_mask.transpose(0, 1).unsqueeze(-1), 0.0).type_as(x) if self.last_state == 'last': encoder_hiddens = self.reshape_bidirectional_encoder_state( s[0][-2:, ::]) encoder_cells = self.reshape_bidirectional_encoder_state( s[1][-2:, ::]) elif self.last_state == 'avg': encoder_hiddens = x.sum(dim=0) / x.size(0) encoder_cells = self.reshape_bidirectional_encoder_state( s[1][-2:, ::]) else: raise NotImplementedError() return { 'encoder_out': (x, encoder_hiddens, encoder_cells), 'encoder_padding_mask': encoder_padding_mask, # B x T }
def forward(self, src_tokens, src_lengths): if self.args.left_pad_source: src_tokens = utils.convert_padding_direction( src_tokens, left_to_right=True, padding_idx=self.dictionary.pad()) x = self.embed_tokens(src_tokens) x = self.dropout(x) x = nn.utils.rnn.pack_padded_sequence(x, src_lengths, batch_first=True) _outputs, (final_hidden, _final_cell) = self.lstm(x) return {'final_hidden': final_hidden.squeeze(0)}
def forward(self, src_tokens, src_lengths): if self.left_pad: src_tokens = utils.convert_padding_direction(src_tokens, self.padding_idx, left_to_right=True) # Fetch the batch size and the sequence length bsz, seqlen = src_tokens.size() # Embed the tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # Pack the embedded source tokens into a packed sequence instance packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist()) # Finally, apply the rnn layers if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.new_zeros(*state_size) c0 = x.new_zeros(*state_size) packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # Unpack the outputs x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_value) x = F.dropout(x, p=self.dropout_out, training=self.training) if self.bidirectional: def combine_bidirectional_output(outs): out = outs.view(self.num_layers, 2, bsz, -1).transpose(1, 2).contiguous() return out.view(self.num_layers, -1) final_hiddens = combine_bidirectional_output(final_hiddens) final_cells = combine_bidirectional_output(final_cells) encoder_padding_mask = src_tokens.eq(self.padding_idx).t() return { 'encoder_out': (x, final_hiddens, final_cells), 'encoder_padding_mask': encoder_padding_mask if encoder_padding_mask.any() else None }
def forward(self, src_tokens, src_lengths): if self.left_pad_source: # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, self.padding_idx, left_to_right=True, ) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist()) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = Variable(x.data.new(*state_size).zero_()) c0 = Variable(x.data.new(*state_size).zero_()) packed_outs, (final_hiddens, final_cells) = self.lstm( packed_x, (h0, c0), ) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_value) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: bi_final_hiddens, bi_final_cells = [], [] for i in range(self.num_layers): bi_final_hiddens.append( torch.cat((final_hiddens[2 * i], final_hiddens[2 * i + 1]), dim=0).view(bsz, self.output_units)) bi_final_cells.append( torch.cat((final_cells[2 * i], final_cells[2 * i + 1]), dim=0).view(bsz, self.output_units)) return x, bi_final_hiddens, bi_final_cells return x, final_hiddens, final_cells
def forward(self, src_tokens, src_lengths: Tensor): if self.left_pad: # nn.utils.rnn.pack_padded_sequence requires right-padding; # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, torch.zeros_like(src_tokens).fill_(self.padding_idx), left_to_right=True, ) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.new_zeros(*state_size) c0 = x.new_zeros(*state_size) packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_idx * 1.0) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: final_hiddens = self.combine_bidir(final_hiddens, bsz) final_cells = self.combine_bidir(final_cells, bsz) encoder_padding_mask = src_tokens.eq(self.padding_idx).t() return { 'encoder_out': (x, final_hiddens, final_cells), 'encoder_padding_mask': (encoder_padding_mask, torch.empty(0), torch.empty(0)), }
def _make_sample(self, xs, ys): """Generates a sample object that Fairseq expects.""" # add extra info to samples # TODO: should the right/left padding thing be in torch agent? repadded = convert_padding_direction(xs, self.dict.pad(), right_to_left=True) sample = {} sample["net_input"] = { "src_tokens": repadded, "src_lengths": self._seq_length(xs), } if ys is not None: sample["target"] = ys sample["ntokens"] = sum(self._seq_length(ys)).item() sample["net_input"]["prev_output_tokens"] = self._right_shifted_ys(ys) return sample
def forward(self, src_tokens, src_lengths): if LanguagePairDataset.LEFT_PAD_SOURCE: # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, src_lengths, self.padding_idx, left_to_right=True, ) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) embed_dim = x.size(2) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist()) # apply LSTM h0 = Variable(x.data.new(self.num_layers, bsz, embed_dim).zero_()) c0 = Variable(x.data.new(self.num_layers, bsz, embed_dim).zero_()) packed_outs, (final_hiddens, final_cells) = self.lstm( packed_x, (h0, c0), ) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence(packed_outs, padding_value=0.) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, embed_dim] return x, final_hiddens, final_cells
def p_choose( self, query, key, key_padding_mask=None, attn_mask=None, incremental_state=None ): """ query: bsz, tgt_len key: bsz, src_len key_padding_mask: bsz, src_len """ src_len, bsz, _ = key.size() tgt_len, bsz, _ = query.size() p_choose = query.new_ones(bsz, tgt_len, src_len) p_choose = torch.tril(p_choose, diagonal=self.waitk_lagging - 1) p_choose = torch.triu(p_choose, diagonal=self.waitk_lagging - 1) if key_padding_mask is not None and key_padding_mask[:, 0].eq(1).any(): # Left pad source # add -1 to the end p_choose = p_choose.masked_fill( key_padding_mask.float().flip(1).unsqueeze(1).bool(), -1 ) p_choose = convert_padding_direction( p_choose.view(-1, src_len).long(), padding_idx=-1, right_to_left=True ) p_choose = p_choose.view(bsz, tgt_len, src_len).type_as(query) # remove -1 p_choose[p_choose.eq(-1)] = 0 # Extend to each head p_choose = ( p_choose.contiguous() .unsqueeze(1) .expand(-1, self.num_heads, -1, -1) .contiguous() .view(-1, tgt_len, src_len) ) return p_choose
def forward(self, src_tokens, src_lengths): src_tokens1, src_tokens2 = src_tokens src_lengths1, src_lengths2 = src_lengths if self.left_pad: # convert left-padding to right-padding src_tokens1 = utils.convert_padding_direction( src_tokens1, self.padding_idx_1, left_to_right=True, ) # src_tokens2 = utils.convert_padding_direction( # src_tokens2, # self.padding_idx_2, # left_to_right=True, # ) bsz1, seqlen1 = src_tokens1.size() # bsz2, seqlen2 = src_tokens2.size() # embed tokens x1 = self.embed_tokens_1(src_tokens1) x1 = F.dropout(x1, p=self.dropout_in, training=self.training) # x2 = self.embed_tokens_2(src_tokens2) # x2 = F.dropout(x2, p=self.dropout_in, training=self.training) fconv_dict = self.fconv2(src_tokens2, src_lengths2) x2 = fconv_dict["encoder_out"][0] # B x T x C -> T x B x C x1 = x1.transpose(0, 1) x2 = x2.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x1 = nn.utils.rnn.pack_padded_sequence( x1, src_lengths1.data.tolist()) # packed_x2 = nn.utils.rnn.pack_padded_sequence(x2, src_lengths2.data.tolist()) # apply LSTM if self.bidirectional: state_size1 = 2 * self.num_layers, bsz1, self.hidden_size # state_size2 = 2 * self.num_layers, bsz2, self.hidden_size else: state_size1 = self.num_layers, bsz1, self.hidden_size # state_size2 = self.num_layers, bsz2, self.hidden_size h01 = x1.data.new(*state_size1).zero_() c01 = x1.data.new(*state_size1).zero_() packed_outs1, (final_hiddens1, final_cells1) = self.lstm1(packed_x1, (h01, c01)) # h02 = x2.data.new(*state_size2).zero_() # c02 = x2.data.new(*state_size2).zero_() # packed_outs2, (final_hiddens2, final_cells2) = self.lstm2(packed_x2, (h02, c02)) # unpack outputs and apply dropout x1, _ = nn.utils.rnn.pad_packed_sequence( packed_outs1, padding_value=self.padding_value) x1 = F.dropout(x1, p=self.dropout_out, training=self.training) assert list(x1.size()) == [seqlen1, bsz1, self.output_units] # x2, _ = nn.utils.rnn.pad_packed_sequence(packed_outs2, padding_value=self.padding_value) # x2 = F.dropout(x2, p=self.dropout_out, training=self.training) # assert list(x2.size()) == [seqlen2, bsz2, self.output_units] if self.bidirectional: def combine_bidir_1(outs): return outs.view(self.num_layers, 2, bsz1, -1).transpose(1, 2).contiguous().view( self.num_layers, bsz1, -1) # def combine_bidir_2(outs): # return outs.view(self.num_layers, 2, bsz2, -1).transpose(1, 2).contiguous().view(self.num_layers, bsz2, -1) final_hiddens_1 = combine_bidir_1(final_hiddens1) final_cells_1 = combine_bidir_1(final_cells1) # final_hiddens_2 = combine_bidir_2(final_hiddens2) # final_cells_2 = combine_bidir_2(final_cells2) encoder_padding_mask_1 = src_tokens1.eq(self.padding_idx_1).t() encoder_padding_mask_2 = src_tokens2.eq(self.padding_idx_2).t() x = torch.cat([x1, x2]) encoder_padding_mask = torch.cat( [encoder_padding_mask_1, encoder_padding_mask_2]) # HACK: pass hidden state of source 1 (title) to decoder return { 'encoder_out': (x, final_hiddens_1, final_cells_1), 'encoder_padding_mask': encoder_padding_mask if encoder_padding_mask.any() else None, 'segments': [x1.shape[0]] }
def forward(self, src_tokens, src_lengths): if self.left_pad and not self.sde: # nn.utils.rnn.pack_padded_sequence requires right-padding; # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, self.padding_idx, left_to_right=True, ) if self.sde: bsz = len(src_tokens) else: bsz, seqlen = src_tokens.size() encoder_padding_mask = src_tokens.eq(self.padding_idx).t() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) if self.sde: seqlen = x.size(0) encoder_padding_mask = [] for s in src_tokens: encoder_padding_mask.append( [0 for _ in range(len(s))] + [1 for _ in range(seqlen - len(s))]) encoder_padding_mask = torch.tensor(encoder_padding_mask, device=x.device).byte().t() # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist()) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.new_zeros(*state_size) c0 = x.new_zeros(*state_size) packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_value) x = F.dropout(x, p=self.dropout_out, training=self.training) #assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: def combine_bidir(outs): out = outs.view(self.num_layers, 2, bsz, -1).transpose(1, 2).contiguous() return out.view(self.num_layers, bsz, -1) final_hiddens = combine_bidir(final_hiddens) final_cells = combine_bidir(final_cells) return { 'encoder_out': (x, final_hiddens, final_cells), 'encoder_padding_mask': encoder_padding_mask if encoder_padding_mask.any() else None }
def forward(self, src_tokens, src_lengths): if self.left_pad: # convert left-padding to right-padding src_tokens = utils.convert_padding_direction(src_tokens, self.padding_idx, left_to_right=True) if self.word_dropout_module is not None: src_tokens = self.word_dropout_module(src_tokens) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # Generate packed seq to deal with varying source seq length packed_input, batch_sizes = pack_padded_sequence(x, src_lengths) final_hiddens, final_cells = [], [] next_hiddens = [] for i, rnn_layer in enumerate(self.layers): current_hidden_size = (self.hidden_dim // 2 if rnn_layer.is_bidirectional else self.hidden_dim) if self.cell_type in ["lstm", "milstm", "layer_norm_lstm"]: prev_hidden = ( x.new(bsz, current_hidden_size).zero_(), x.new(bsz, current_hidden_size).zero_(), ) else: raise Exception(f"{self.cell_type} not implemented") hidden, current_output = rnn_layer.forward(packed_input, prev_hidden, batch_sizes) next_hiddens.append(hidden) prev_hidden = next_hiddens[-1] if self.dropout_out != 0: current_output = F.dropout(current_output, p=self.dropout_out, training=self.training) if self.residual_level is not None and i >= self.residual_level: packed_input = packed_input.clone() + current_output else: packed_input = current_output final_hiddens, final_cells = zip(*next_hiddens) # Reshape to [num_layer, batch_size, hidden_dim] final_hiddens = torch.cat(final_hiddens, dim=0).view(self.num_layers, *final_hiddens[0].size()) final_cells = torch.cat(final_cells, dim=0).view(self.num_layers, *final_cells[0].size()) # [max_seqlen, batch_size, hidden_dim] unpacked_output, _ = pad_packed_sequence( PackedSequence(packed_input, batch_sizes), padding_value=self.padding_value) return (unpacked_output, final_hiddens, final_cells, src_lengths, src_tokens)
def forward(self, src_tokens, src_lengths): if self.left_pad: # convert left-padding to right-padding src_tokens = utils.convert_padding_direction(src_tokens, self.padding_idx, left_to_right=True) # If we're generating adversarial examples we need to keep track of # some internal variables self.tracker.reset() if self.word_dropout_module is not None: src_tokens = self.word_dropout_module(src_tokens) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) # Track token embeddings self.tracker.track(x, "token_embeddings", retain_grad=self.track_gradients) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # Allows compatibility with Caffe2 inputs for tracing (int32) # as well as the current format of Fairseq-Py inputs (int64) if src_lengths.dtype is torch.int64: src_lengths = src_lengths.int() # Generate packed seq to deal with varying source seq length # packed_input is of type PackedSequence, which consists of: # element [0]: a tensor, the packed data, and # element [1]: a list of integers, the batch size for each step packed_input = pack_padded_sequence(x, src_lengths) final_hiddens, final_cells = [], [] for i, rnn_layer in enumerate(self.layers): if self.bidirectional and i == 0: h0 = x.new(2, bsz, self.hidden_dim // 2).zero_() c0 = x.new(2, bsz, self.hidden_dim // 2).zero_() else: h0 = x.new(1, bsz, self.hidden_dim).zero_() c0 = x.new(1, bsz, self.hidden_dim).zero_() # apply LSTM along entire sequence current_output, (h_last, c_last) = rnn_layer(packed_input, (h0, c0)) # final state shapes: (bsz, hidden_dim) if self.bidirectional and i == 0: # concatenate last states for forward and backward LSTM h_last = torch.cat((h_last[0, :, :], h_last[1, :, :]), dim=1) c_last = torch.cat((c_last[0, :, :], c_last[1, :, :]), dim=1) else: h_last = h_last.squeeze(dim=0) c_last = c_last.squeeze(dim=0) final_hiddens.append(h_last) final_cells.append(c_last) if self.residual_level is not None and i >= self.residual_level: packed_input[0] = packed_input.clone()[0] + current_output[0] else: packed_input = current_output # Reshape to [num_layer, batch_size, hidden_dim] final_hiddens = torch.cat(final_hiddens, dim=0).view(self.num_layers, *final_hiddens[0].size()) final_cells = torch.cat(final_cells, dim=0).view(self.num_layers, *final_cells[0].size()) # [max_seqlen, batch_size, hidden_dim] unpacked_output, _ = pad_packed_sequence( packed_input, padding_value=self.padding_value) return (unpacked_output, final_hiddens, final_cells, src_lengths, src_tokens)
def forward(self, src_tokens, src_lengths: Tensor): if self.left_pad: # nn.utils.rnn.pack_padded_sequence requires right-padding; # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, torch.zeros_like(src_tokens).fill_(self.padding_idx), left_to_right=True, ) bsz, seqlen = src_tokens.size() # MIE MODIFICHE x = [] lengths = [] toks = [] for line in src_tokens: records = [] words = self.dictionary.string(line).split() couples = list(map(' '.join, zip(words[0::2], words[1::2]))) for couple in couples: encoded_feature = self.dictionary.encode_line(couple).tolist() records.append(encoded_feature[0:-1]) lengths.append(len(records)) toks.append([0] * len(records)) x.append(records) device = 'cuda' if torch.cuda.is_available() else 'cpu' x = torch.tensor(x, dtype=torch.float32, device=torch.device(device)) src_tokens = torch.tensor(toks, dtype=torch.int32, device=torch.device(device)) src_lengths = torch.tensor(lengths, dtype=torch.int32, device=torch.device(device)) seqlen = src_lengths[0] # FINO A QUA # embed tokens #x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.new_zeros(*state_size) c0 = x.new_zeros(*state_size) packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_idx * 1.0) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: final_hiddens = self.combine_bidir(final_hiddens, bsz) final_cells = self.combine_bidir(final_cells, bsz) encoder_padding_mask = src_tokens.eq(self.padding_idx).t() return tuple(( x, # seq_len x batch x hidden final_hiddens, # num_layers x batch x num_directions*hidden final_cells, # num_layers x batch x num_directions*hidden encoder_padding_mask, # seq_len x batch ))
def forward(self, src_tokens=None, src_lengths=None, token_embeds=None): if self.left_pad: # nn.utils.rnn.pack_padded_sequence requires right-padding; # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, self.padding_idx, left_to_right=True, ) if token_embeds is None: bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) else: x = token_embeds bsz, seqlen, embed_dim = token_embeds.shape assert embed_dim == self.embed_dim x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.cpu(), enforce_sorted=False) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.new_zeros(*state_size) c0 = x.new_zeros(*state_size) packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_value) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: def combine_bidir(outs): out = (outs.view(self.num_layers, 2, bsz, -1).transpose(1, 2).contiguous()) return out.view(self.num_layers, bsz, -1) final_hiddens = combine_bidir(final_hiddens) final_cells = combine_bidir(final_cells) if src_tokens is not None: encoder_padding_mask = src_tokens.eq(self.padding_idx).t() else: encoder_padding_mask = None return { "encoder_out": (x, final_hiddens, final_cells), "encoder_padding_mask": (encoder_padding_mask if encoder_padding_mask is not None and encoder_padding_mask.any() else None), }
def forward( self, src_tokens: Tensor, src_lengths: Tensor, enforce_sorted: bool = False, ): """ Args: src_tokens (LongTensor): tokens in the source language of shape `(batch, src_len)` src_lengths (LongTensor): lengths of each source sentence of shape `(batch)` enforce_sorted (bool, optional): if True, `src_tokens` is expected to contain sequences sorted by length in a decreasing order. If False, this condition is not required. Default: True. """ if self.left_pad_source: # nn.utils.rnn.pack_padded_sequence requires right-padding; # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, torch.zeros_like(src_tokens).fill_(self.padding_idx), left_to_right=True, ) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = self.dropout_in_module(x) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence( x, src_lengths.cpu(), enforce_sorted=enforce_sorted, batch_first=True ) packed_outs, hidden = self.hidden(packed_x) outputs, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_idx * 1.0, batch_first=True ) if self.bidirectional: fwd_final, bwd_final = outputs.view(bsz, max(src_lengths), self.hidden_size, 2).permute(3, 0, 1, 2) outputs = torch.cat((fwd_final.unsqueeze(-1), bwd_final.unsqueeze(-1)), -1) outputs = self.bidir_dense(outputs).squeeze(-1) outputs = self.dropout_out_module(outputs) if self.rnn_type == "lstm": final_hiddens = self.reshape_state(hidden[0], bsz) final_cells = self.reshape_state(hidden[1], bsz) else: final_hiddens, final_cells = self.reshape_state(hidden, bsz), None assert list(outputs.size()) == [bsz, seqlen, self.output_units] encoder_padding_mask = src_tokens.eq(self.padding_idx).t() return tuple( ( outputs, # batch x seq_len x hidden final_hiddens, # num_layers x batch x num_directions*hidden final_cells, # num_layers x batch x num_directions*hidden encoder_padding_mask, # seq_len x batch ) )
def forward(self, src_tokens, src_lengths): if LanguagePairDataset.LEFT_PAD_SOURCE: # convert left-padding to right-padding src_tokens.data = utils.convert_padding_direction( src_tokens.data, src_lengths.data, self.padding_idx, left_to_right=True) if self.word_dropout_module is not None: src_tokens.data = self.word_dropout_module(src_tokens.data) if self.char_rnn_params is not None: # x.shape: (max_num_words, batch_size, word_dim) x, src_lengths = self.char_rnn_encoder(src_tokens, src_lengths) seqlen, bsz, _ = x.size() # temporarily sort in descending word-length order src_lengths, word_len_order = torch.sort(src_lengths, descending=True) x = x[:, word_len_order, :] _, inverted_word_len_order = torch.sort(word_len_order) else: bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # Allows compatibility with Caffe2 inputs for tracing (int32) # as well as the current format of Fairseq-Py inputs (int64) if src_lengths.dtype is torch.int64: src_lengths = src_lengths.int() # Generate packed seq to deal with varying source seq length # packed_input is of type PackedSequence, which consists of: # element [0]: a tensor, the packed data, and # element [1]: a list of integers, the batch size for each step packed_input = pack_padded_sequence(x, src_lengths) final_hiddens, final_cells = [], [] for i, rnn_layer in enumerate(self.layers): if self.bidirectional and i == 0: h0 = x.data.new(2, bsz, self.hidden_dim // 2).zero_() c0 = x.data.new(2, bsz, self.hidden_dim // 2).zero_() else: h0 = x.data.new(1, bsz, self.hidden_dim).zero_() c0 = x.data.new(1, bsz, self.hidden_dim).zero_() # apply LSTM along entire sequence current_output, (h_last, c_last) = rnn_layer(packed_input, (h0, c0)) # final state shapes: (bsz, hidden_dim) if self.bidirectional and i == 0: # concatenate last states for forward and backward LSTM h_last = torch.cat((h_last[0, :, :], h_last[1, :, :]), dim=1) c_last = torch.cat((c_last[0, :, :], c_last[1, :, :]), dim=1) else: h_last = h_last.squeeze(dim=0) c_last = c_last.squeeze(dim=0) final_hiddens.append(h_last) final_cells.append(c_last) if self.residual_level is not None and i >= self.residual_level: packed_input[0] = packed_input.clone()[0] + current_output[0] else: packed_input = current_output # Reshape to [num_layer, batch_size, hidden_dim] final_hiddens = torch.cat(final_hiddens, dim=0).view(self.num_layers, *final_hiddens[0].size()) final_cells = torch.cat(final_cells, dim=0).view(self.num_layers, *final_cells[0].size()) # [max_seqlen, batch_size, hidden_dim] padding_value = -np.inf if self.add_encoder_output_as_decoder_input else 0 unpacked_output, _ = pad_packed_sequence(packed_input, padding_value=padding_value) if self.char_rnn_params is not None: unpacked_output = unpacked_output[:, inverted_word_len_order, :] final_hiddens = final_hiddens[:, inverted_word_len_order, :] final_cells = final_cells[:, inverted_word_len_order, :] src_lengths = src_lengths[inverted_word_len_order] src_tokens = src_tokens[inverted_word_len_order, :] return (unpacked_output, final_hiddens, final_cells, src_lengths, src_tokens)
def forward(self, tokens, lengths=None, precomputed_embedded=None, **kwargs): bsz, seqlen = tokens.size() if self.left_pad: # convert left-padding to right-padding tokens = utils.convert_padding_direction( tokens, self.padding_idx, left_to_right=True, ) if lengths is None: lengths = (tokens != self.padding_idx).sum(1) if precomputed_embedded is None: x = self.embed_tokens(tokens) else: x = precomputed_embedded x = F.dropout(x, p=self.dropout_in, training=self.training) if self.fc_in: x = self.fc_in(x) # sorting sequences by len otherwise pack_padded_sequence will complain lengths_sorted, perm_index = lengths.sort(0, descending=True) if (lengths_sorted != lengths).sum(): needs_perm = True x = x[perm_index] lengths = lengths_sorted else: needs_perm = False # B x T x C -> T x B x C x = x.transpose(0, 1) packed_x = torch.nn.utils.rnn.pack_padded_sequence( x, lengths.data.tolist()) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.embed_dim else: state_size = self.num_layers, bsz, self.embed_dim h0 = x.data.new(*state_size).zero_() c0 = x.data.new(*state_size).zero_() packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # unpack outputs and apply dropout x, _ = torch.nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_value) x = F.dropout(x, p=self.dropout_out, training=self.training) #assert list(x.size()) == [seqlen, bsz, self.output_units] # T x B x C -> B x T x C x = x.transpose(0, 1) # restoring original order if needs_perm: odx = perm_index.view(-1, 1).unsqueeze(1).expand_as(x) x = x.gather(0, odx) if self.bidirectional: def combine_bidir(outs): return outs.view(self.num_layers, 2, bsz, -1).transpose(1, 2).contiguous().view( self.num_layers, bsz, -1) final_hiddens = combine_bidir(final_hiddens) final_cells = combine_bidir(final_cells) x = x.view(x.size(0), x.size(1), 2, -1).sum(2) if self.fc_out1 is not None: x = self.fc_out1(x) if self.adaptive_softmax is None and self.fc_out2 is not None: x = self.fc_out2(x) return x, {'hidden_states': (final_hiddens, final_cells)}
def forward( self, src_tokens: Tensor, src_lengths: Tensor, enforce_sorted: bool = True, ): """ Args: src_tokens (LongTensor): tokens in the source language of shape `(batch, src_len)` src_lengths (LongTensor): lengths of each source sentence of shape `(batch)` enforce_sorted (bool, optional): if True, `src_tokens` is expected to contain sequences sorted by length in a decreasing order. If False, this condition is not required. Default: True. """ if self.left_pad: # nn.utils.rnn.pack_padded_sequence requires right-padding; # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, torch.zeros_like(src_tokens).fill_(self.padding_idx), left_to_right=True, ) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence( x, src_lengths.data, enforce_sorted=enforce_sorted) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.new_zeros(*state_size) c0 = x.new_zeros(*state_size) packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_idx * 1.0) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: final_hiddens = self.combine_bidir(final_hiddens, bsz) final_cells = self.combine_bidir(final_cells, bsz) encoder_padding_mask = src_tokens.eq(self.padding_idx).t() return tuple(( x, # seq_len x batch x hidden final_hiddens, # num_layers x batch x num_directions*hidden final_cells, # num_layers x batch x num_directions*hidden encoder_padding_mask, # seq_len x batch ))
def forward(self, src_tokens, src_lengths): if self.left_pad: # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, self.padding_idx, left_to_right=True, ) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) # bert embedding segments_tensors = torch.zeros_like(src_tokens).to(src_tokens.device) # self.bert.eval() # with torch.no_grad(): encoded_layers, _ = self.bert( src_tokens, segments_tensors) # (bsz, length, dimension) x = torch.cat((x, encoded_layers[self.layer]), 2) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist()) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.new_zeros(*state_size) c0 = x.new_zeros(*state_size) packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_value) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: def combine_bidir(outs): out = outs.view(self.num_layers, 2, bsz, -1).transpose(1, 2).contiguous() return out.view(self.num_layers, bsz, -1) final_hiddens = combine_bidir(final_hiddens) final_cells = combine_bidir(final_cells) encoder_padding_mask = src_tokens.eq(self.padding_idx).t() return { 'encoder_out': (x, final_hiddens, final_cells), 'encoder_padding_mask': encoder_padding_mask if encoder_padding_mask.any() else None }
def forward(self, src_tokens, src_lengths, word_tokens=None, bert_repre=None, **kwargs): words = None chars = None if word_tokens is not None and self.embed_words is not None: words = self.embed_words(word_tokens).squeeze().detach() if self.char_embed is not None: chars = self.char_embed(word_tokens).squeeze() if self.left_pad: # nn.utils.rnn.pack_padded_sequence requires right-padding; # convert left-padding to right-padding src_tokens = utils.convert_padding_direction(src_tokens, self.padding_idx, left_to_right=True) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist(), enforce_sorted=False) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.new_zeros(*state_size) if self.rnn_type == "lstm": c0 = x.new_zeros(*state_size) packed_outs, _ = self.rnn(packed_x, (h0, c0)) else: packed_outs, _ = self.rnn(packed_x, h0) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_value) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, self.output_units] encoder_padding_mask = src_tokens.eq(self.padding_idx).t() if word_tokens is not None: encoder_summary = self.summary_network(x, words, encoder_padding_mask) else: encoder_summary = None return { "encoder_out": (x, ), "encoder_padding_mask": encoder_padding_mask if encoder_padding_mask.any() else None, "encoder_summary": encoder_summary, "words": words, "chars": chars, 'bert_repre': bert_repre }
def forward(self, src_tokens, src_lengths): if self.left_pad: # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, self.padding_idx, left_to_right=True, ) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist()) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.data.new(*state_size).zero_() c0 = x.data.new(*state_size).zero_() packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_value) x = F.dropout(x, p=self.dropout_out, training=self.training) assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: def combine_bidir(outs): return torch.cat([ torch.cat([outs[2 * i], outs[2 * i + 1]], dim=0).view( 1, bsz, self.output_units) for i in range(self.num_layers) ], dim=0) final_hiddens = combine_bidir(final_hiddens) final_cells = combine_bidir(final_cells) encoder_padding_mask = src_tokens.eq(self.padding_idx).t() # Set padded outputs to -inf so they are not selected by max-pooling padding_mask = src_tokens.eq(self.padding_idx).t().unsqueeze(-1) if padding_mask.any(): x = x.float().masked_fill_(padding_mask, float('-inf')).type_as(x) # Build the sentence embedding by max-pooling over the encoder outputs #liwei comment should try self-attention here sentemb = x.max(dim=0)[0] return { 'sentemb': sentemb, 'encoder_out': (x, final_hiddens, final_cells), 'encoder_padding_mask': encoder_padding_mask if encoder_padding_mask.any() else None }
def forward( self, src_tokens: Tensor, src_lengths: Tensor, enforce_sorted: bool = True, ): """ Args: src_tokens (LongTensor): tokens in the source language of shape `(batch, src_len)` src_lengths (LongTensor): lengths of each source sentence of shape `(batch)` enforce_sorted (bool, optional): if True, `src_tokens` is expected to contain sequences sorted by length in a decreasing order. If False, this condition is not required. Default: True. """ if self.left_pad: # nn.utils.rnn.pack_padded_sequence requires right-padding; # convert left-padding to right-padding src_tokens = utils.convert_padding_direction( src_tokens, torch.zeros_like(src_tokens).fill_(self.padding_idx), left_to_right=True, ) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Xử lí cộng thông tin synset được mã hóa # Bước 5: duyệt qua tập các từ, lấy ra danh sách các synset của nó, mặc định chọn synset đầu tiên( improve) # Bước 6: Ánh xạ synset_id của mỗi từ ra index tương ứng. self.to(device) src_emb = [] # document: https://stackoverflow.com/questions/15388831/what-are-all-possible-pos-tags-of-nltk for sentence in src_tokens: s = [self.dictionary[idx] for idx in sentence] s_pos = nltk.pos_tag(s) wrd_pos = [ self.wnl.lemmatize(w) + '\t' + map_treebankTags_to_wn(pos) for w, pos in s_pos ] emb_sentence = [] for w in wrd_pos: pos = w.split('\t')[1] if pos != 'None': try: synset_name = self.word_synset[w][0][ 1] # lấy synset id đầu tiên và ánh xạ ra synset_name # Ánh xạ từ synset_name ra cluster id cluster_name = self.synset_to_clusterID_per_pos[pos][ synset_name] except: cluster_name = 'None' cluster_id = self.cluster2idx_per_pos[pos][cluster_name] cluster_id = torch.tensor(cluster_id).to(device) emb_sentence.append( self.embed_cluster_per_pos[pos](cluster_id)) else: cluster_id = len(self.cluster2idx_per_pos['n']) - 1 cluster_id = torch.tensor(cluster_id).to(device) emb_sentence.append( self.embed_cluster_per_pos['n'](cluster_id)) src_emb.append(torch.stack(emb_sentence)) # Bước 7: lấy emb tương ứng của mỗi synset dựa vào embedding đã tạo trước đó. x_emb = torch.stack(src_emb).to(device) # Bước 8: cộng ma trận embedding này vào biến x theo kiểu concat vào. x = torch.cat((x, x_emb), 2) x = self.dropout_in_module(x) # B x T x C -> T x B x C x = x.transpose(0, 1) # pack embedded source tokens into a PackedSequence packed_x = nn.utils.rnn.pack_padded_sequence( x, src_lengths.cpu(), enforce_sorted=enforce_sorted) # apply LSTM if self.bidirectional: state_size = 2 * self.num_layers, bsz, self.hidden_size else: state_size = self.num_layers, bsz, self.hidden_size h0 = x.new_zeros(*state_size) c0 = x.new_zeros(*state_size) packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) # er # unpack outputs and apply dropout x, _ = nn.utils.rnn.pad_packed_sequence( packed_outs, padding_value=self.padding_idx * 1.0) x = self.dropout_out_module(x) assert list(x.size()) == [seqlen, bsz, self.output_units] if self.bidirectional: final_hiddens = self.combine_bidir(final_hiddens, bsz) final_cells = self.combine_bidir(final_cells, bsz) encoder_padding_mask = src_tokens.eq(self.padding_idx).t() return tuple(( x, # seq_len x batch x hidden final_hiddens, # num_layers x batch x num_directions*hidden final_cells, # num_layers x batch x num_directions*hidden encoder_padding_mask, # seq_len x batch ))
def forward(self, src_tokens, src_lengths): if LanguagePairDataset.LEFT_PAD_SOURCE: # convert left-padding to right-padding src_tokens.data = utils.convert_padding_direction( src_tokens.data, src_lengths.data, self.padding_idx, left_to_right=True, ) if self.word_dropout_module is not None: src_tokens.data = self.word_dropout_module(src_tokens.data) bsz, seqlen = src_tokens.size() # embed tokens x = self.embed_tokens(src_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # Generate packed seq to deal with varying source seq length packed_input, batch_sizes = pack_padded_sequence( x, src_lengths, ) final_hiddens, final_cells = [], [] next_hiddens = [] for i, rnn_layer in enumerate(self.layers): current_hidden_size = self.hidden_dim // 2 if \ rnn_layer.is_bidirectional else self.hidden_dim if self.cell_type in ['lstm', 'milstm', 'layer_norm_lstm']: prev_hidden = ( x.data.new(bsz, current_hidden_size).zero_(), x.data.new(bsz, current_hidden_size).zero_(), ) else: raise Exception('{} not implemented'.format(self.cell_type)) hidden, current_output = rnn_layer.forward( packed_input, prev_hidden, batch_sizes, ) next_hiddens.append(hidden) prev_hidden = next_hiddens[-1] if self.dropout_out != 0: current_output = F.dropout( current_output, p=self.dropout_out, training=self.training, ) if self.residual_level is not None and i >= self.residual_level: packed_input = packed_input.clone() + current_output else: packed_input = current_output final_hiddens, final_cells = zip(*next_hiddens) # Reshape to [num_layer, batch_size, hidden_dim] final_hiddens = torch.cat( final_hiddens, dim=0, ).view(self.num_layers, *final_hiddens[0].size()) final_cells = torch.cat( final_cells, dim=0, ).view(self.num_layers, *final_cells[0].size()) # [max_seqlen, batch_size, hidden_dim] padding_value = -np.inf if self.add_encoder_output_as_decoder_input else 0 unpacked_output, _ = pad_packed_sequence( PackedSequence(packed_input, batch_sizes), padding_value=padding_value, ) return ( unpacked_output, final_hiddens, final_cells, src_lengths, src_tokens, )