示例#1
0
    def encode(
        self, source_batch
    ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor], torch.Tensor]:
        input_embedded = self.source_embedder(source_batch)

        enc_masks = source_batch.sum(dim=2) != 0
        lengths = enc_masks.sum(dim=1)

        input_embedded_sorted, new_lengths, rest_idxs, _ = sort_batch_by_length(
            input_embedded, lengths)

        # TODO: remove assertion
        assert torch.equal(input_embedded, input_embedded_sorted[rest_idxs])

        enc_hiddens, (last_hidden, last_cell) = self.encoder(
            pack_padded_sequence(input_embedded_sorted,
                                 new_lengths,
                                 batch_first=True))
        enc_hiddens, _ = pad_packed_sequence(enc_hiddens,
                                             padding_value=0,
                                             batch_first=True)

        # restore original ordering
        enc_hiddens = enc_hiddens[rest_idxs]
        last_hidden = last_hidden[:, rest_idxs, :]
        last_cell = last_cell[:, rest_idxs, :]

        init_decoder_hidden = self.h_projection(
            torch.cat([last_hidden[0], last_hidden[1]], dim=1))
        init_decoder_cell = self.c_projection(
            torch.cat([last_cell[0], last_cell[1]], dim=1))
        dec_init_state = (init_decoder_hidden, init_decoder_cell)
        return enc_hiddens, dec_init_state, enc_masks
    def test_forward_pulls_out_correct_tensor_with_unsorted_batches(self):
        lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True)
        encoder = PytorchSeq2VecWrapper(lstm)

        input_tensor = torch.rand([5, 7, 3])
        input_tensor[0, 3:, :] = 0
        input_tensor[1, 4:, :] = 0
        input_tensor[2, 2:, :] = 0
        input_tensor[3, 6:, :] = 0
        mask = torch.ones(5, 7)
        mask[0, 3:] = 0
        mask[1, 4:] = 0
        mask[2, 2:] = 0
        mask[3, 6:] = 0

        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices, _ = sort_batch_by_length(input_tensor,
                                                                                              sequence_lengths)
        packed_sequence = pack_padded_sequence(sorted_inputs,
                                               sorted_sequence_lengths.tolist(),
                                               batch_first=True)
        _, state = lstm(packed_sequence)
        # Transpose output state, extract the last forward and backward states and
        # reshape to be of dimension (batch_size, 2 * hidden_size).
        sorted_transposed_state = state[0].transpose(0, 1).index_select(0, restoration_indices)
        reshaped_state = sorted_transposed_state[:, -2:, :].contiguous()
        explicitly_concatenated_state = torch.cat([reshaped_state[:, 0, :].squeeze(1),
                                                   reshaped_state[:, 1, :].squeeze(1)], -1)
        encoder_output = encoder(input_tensor, mask)
        assert_almost_equal(encoder_output.data.numpy(), explicitly_concatenated_state.data.numpy())
示例#3
0
    def sort_and_run_forward(
            self,
            module: Callable[[PackedSequence, Optional[RnnState]],
                             Tuple[Union[PackedSequence, torch.Tensor],
                                   RnnState]],
            inputs: torch.Tensor,
            mask: torch.Tensor,
            hidden_states: Optional[RnnState] = None,
            reset_hidden_state=False):
        # First count how many sequences are empty.
        batch_size = mask.size(0)
        num_valid = torch.sum(mask[:, 0]).int().item()

        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices, sorting_indices =\
            sort_batch_by_length(inputs, sequence_lengths)

        # Now create a PackedSequence with only the non-empty, sorted sequences.
        packed_sequence_input = pack_padded_sequence(
            sorted_inputs[:num_valid, :, :],
            sorted_sequence_lengths[:num_valid].data.tolist(),
            batch_first=True)
        # Prepare the initial states.
        initial_states, hidden_states = self._get_initial_states(
            batch_size, num_valid, sorting_indices, hidden_states)

        if reset_hidden_state:
            initial_states = None

        # Actually call the module on the sorted PackedSequence.
        module_output, final_states = module(packed_sequence_input,
                                             initial_states)

        return module_output, final_states, restoration_indices, hidden_states
示例#4
0
    def test_sort_tensor_by_length(self):
        tensor = torch.rand([5, 7, 9])
        tensor[0, 3:, :] = 0
        tensor[1, 4:, :] = 0
        tensor[2, 1:, :] = 0
        tensor[3, 5:, :] = 0

        tensor = Variable(tensor)
        sequence_lengths = Variable(torch.LongTensor([3, 4, 1, 5, 7]))
        sorted_tensor, sorted_lengths, reverse_indices, _ = util.sort_batch_by_length(
            tensor, sequence_lengths)

        # Test sorted indices are padded correctly.
        numpy.testing.assert_array_equal(sorted_tensor[1, 5:, :].data.numpy(),
                                         0.0)
        numpy.testing.assert_array_equal(sorted_tensor[2, 4:, :].data.numpy(),
                                         0.0)
        numpy.testing.assert_array_equal(sorted_tensor[3, 3:, :].data.numpy(),
                                         0.0)
        numpy.testing.assert_array_equal(sorted_tensor[4, 1:, :].data.numpy(),
                                         0.0)

        assert sorted_lengths.data.equal(torch.LongTensor([7, 5, 4, 3, 1]))

        # Test restoration indices correctly recover the original tensor.
        assert sorted_tensor.index_select(0, reverse_indices).data.equal(
            tensor.data)
    def test_forward_pulls_out_correct_tensor_for_unsorted_batches(self):
        lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True)
        encoder = PytorchSeq2SeqWrapper(lstm)
        input_tensor = torch.rand([5, 7, 3])
        input_tensor[0, 3:, :] = 0
        input_tensor[1, 4:, :] = 0
        input_tensor[2, 2:, :] = 0
        input_tensor[3, 6:, :] = 0
        mask = torch.ones(5, 7)
        mask[0, 3:] = 0
        mask[1, 4:] = 0
        mask[2, 2:] = 0
        mask[3, 6:] = 0

        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices, _ = sort_batch_by_length(input_tensor,
                                                                                              sequence_lengths)
        packed_sequence = pack_padded_sequence(sorted_inputs,
                                               sorted_sequence_lengths.data.tolist(),
                                               batch_first=True)
        lstm_output, _ = lstm(packed_sequence)
        encoder_output = encoder(input_tensor, mask)
        lstm_tensor, _ = pad_packed_sequence(lstm_output, batch_first=True)
        assert_almost_equal(encoder_output.data.numpy(),
                            lstm_tensor.index_select(0, restoration_indices).data.numpy())
    def setUp(self):
        super(TestEncoderBase, self).setUp()
        self.lstm = LSTM(bidirectional=True,
                         num_layers=3,
                         input_size=3,
                         hidden_size=7,
                         batch_first=True)
        self.encoder_base = _EncoderBase(stateful=True)

        tensor = Variable(torch.rand([5, 7, 3]))
        tensor[1, 6:, :] = 0
        tensor[3, 2:, :] = 0
        self.tensor = tensor
        mask = Variable(torch.ones(5, 7))
        mask[1, 6:] = 0
        mask[2, :] = 0  # <= completely masked
        mask[3, 2:] = 0
        mask[4, :] = 0  # <= completely masked
        self.mask = mask

        self.batch_size = 5
        self.num_valid = 3
        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        _, _, restoration_indices, sorting_indices = sort_batch_by_length(
            tensor, sequence_lengths)
        self.sorting_indices = sorting_indices
        self.restoration_indices = restoration_indices
示例#7
0
    def forward(self, seq, seq_lens):
        if not self.batch_first:
            raise ConfigurationError(
                "Our encoder semantics assumes batch is always first!")

        non_zero_length_mask = seq_lens.ne(0).float()
        # make zero lengths into length=1
        seq_lens = seq_lens + seq_lens.eq(0).float()

        sorted_inputs, sorted_sequence_lengths, restoration_indices, sorting_indices =\
                    sort_batch_by_length(seq, seq_lens)

        packed_input = pack(sorted_inputs,
                            sorted_sequence_lengths.data.long().tolist(),
                            batch_first=True)
        outputs, final_states = super(ProperLSTM, self).forward(packed_input)

        unpacked_sequence, _ = unpack(outputs, batch_first=True)
        outputs = unpacked_sequence.index_select(0, restoration_indices)
        new_unsorted_states = [
            self.fix_hidden(state.index_select(1, restoration_indices))
            for state in final_states
        ]

        # To deal with zero length inputs
        outputs = outputs * non_zero_length_mask.view(-1, 1,
                                                      1).expand_as(outputs)
        new_unsorted_states[
            0] = new_unsorted_states[0] * non_zero_length_mask.view(
                1, -1, 1).expand_as(new_unsorted_states[0])
        new_unsorted_states[
            1] = new_unsorted_states[1] * non_zero_length_mask.view(
                1, -1, 1).expand_as(new_unsorted_states[1])

        return outputs, new_unsorted_states
示例#8
0
    def test_augmented_lstm_computes_same_function_as_pytorch_lstm(self):
        augmented_lstm = AugmentedLstm(10, 11)
        pytorch_lstm = LSTM(10, 11, num_layers=1, batch_first=True)
        # Initialize all weights to be == 1.
        initializer = InitializerApplicator([(".*", lambda tensor: torch.nn.init.constant_(tensor, 1.))])
        initializer(augmented_lstm)
        initializer(pytorch_lstm)

        initial_state = torch.zeros([1, 5, 11])
        initial_memory = torch.zeros([1, 5, 11])

        # Use bigger numbers to avoid floating point instability.
        sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(self.random_tensor * 5., self.sequence_lengths)
        lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True)

        augmented_output, augmented_state = augmented_lstm(lstm_input, (initial_state, initial_memory))
        pytorch_output, pytorch_state = pytorch_lstm(lstm_input, (initial_state, initial_memory))
        pytorch_output_sequence, _ = pad_packed_sequence(pytorch_output, batch_first=True)
        augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True)

        numpy.testing.assert_array_almost_equal(pytorch_output_sequence.data.numpy(),
                                                augmented_output_sequence.data.numpy(), decimal=4)
        numpy.testing.assert_array_almost_equal(pytorch_state[0].data.numpy(),
                                                augmented_state[0].data.numpy(), decimal=4)
        numpy.testing.assert_array_almost_equal(pytorch_state[1].data.numpy(),
                                                augmented_state[1].data.numpy(), decimal=4)
    def test_forward_pulls_out_correct_tensor_with_unsorted_batches(self):
        lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True)
        encoder = PytorchSeq2VecWrapper(lstm)

        tensor = torch.rand([5, 7, 3])
        tensor[0, 3:, :] = 0
        tensor[1, 4:, :] = 0
        tensor[2, 2:, :] = 0
        tensor[3, 6:, :] = 0
        mask = torch.ones(5, 7)
        mask[0, 3:] = 0
        mask[1, 4:] = 0
        mask[2, 2:] = 0
        mask[3, 6:] = 0

        input_tensor = Variable(tensor)
        mask = Variable(mask)
        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices = sort_batch_by_length(input_tensor,
                                                                                           sequence_lengths)
        packed_sequence = pack_padded_sequence(sorted_inputs,
                                               sorted_sequence_lengths.data.tolist(),
                                               batch_first=True)
        _, state = lstm(packed_sequence)
        # Transpose output state, extract the last forward and backward states and
        # reshape to be of dimension (batch_size, 2 * hidden_size).
        sorted_transposed_state = state[0].transpose(0, 1).index_select(0, restoration_indices)
        reshaped_state = sorted_transposed_state[:, -2:, :].contiguous()
        explicitly_concatenated_state = torch.cat([reshaped_state[:, 0, :].squeeze(1),
                                                   reshaped_state[:, 1, :].squeeze(1)], -1)
        encoder_output = encoder(input_tensor, mask)
        assert_almost_equal(encoder_output.data.numpy(), explicitly_concatenated_state.data.numpy())
示例#10
0
    def test_forward_pulls_out_correct_tensor_for_unsorted_batches(self):
        lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True)
        encoder = PytorchSeq2SeqWrapper(lstm)
        tensor = torch.rand([5, 7, 3])
        tensor[0, 3:, :] = 0
        tensor[1, 4:, :] = 0
        tensor[2, 2:, :] = 0
        tensor[3, 6:, :] = 0
        mask = torch.ones(5, 7)
        mask[0, 3:] = 0
        mask[1, 4:] = 0
        mask[2, 2:] = 0
        mask[3, 6:] = 0

        input_tensor = Variable(tensor)
        mask = Variable(mask)
        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices, _ = sort_batch_by_length(input_tensor,
                                                                                              sequence_lengths)
        packed_sequence = pack_padded_sequence(sorted_inputs,
                                               sorted_sequence_lengths.data.tolist(),
                                               batch_first=True)
        lstm_output, _ = lstm(packed_sequence)
        encoder_output = encoder(input_tensor, mask)
        lstm_tensor, _ = pad_packed_sequence(lstm_output, batch_first=True)
        assert_almost_equal(encoder_output.data.numpy(),
                            lstm_tensor.index_select(0, restoration_indices).data.numpy())
示例#11
0
 def forward(self, inputs, lengths):
     # 1. run LSTM
     # apply dropout to the input
     # Shape of inputs: (batch_size, sequence_length, embedding_dim)
     embedded_input = self.dropout_on_input_to_LSTM(inputs)
     # Sort the embedded inputs by decreasing order of input length. [ this is done for batching ]
     # sorted_input shape: (batch_size, sequence_length, embedding_dim)
     (sorted_input, sorted_lengths, input_unsort_indices,
      _) = sort_batch_by_length(embedded_input, lengths)
     # Pack the sorted inputs with pack_padded_sequence.
     packed_input = pack_padded_sequence(sorted_input,
                                         sorted_lengths.data.tolist(),
                                         batch_first=True)
     # Run the input through the RNN.
     packed_sorted_output, _ = self.rnn(packed_input)
     # Unpack (pad) the input with pad_packed_sequence
     # Shape: (batch_size, sequence_length, hidden_size)
     sorted_output, _ = pad_packed_sequence(packed_sorted_output,
                                            batch_first=True)
     # Re-sort the packed sequence to restore the initial ordering
     # Shape: (batch_size, sequence_length, hidden_size)
     output = sorted_output[input_unsort_indices]
     # 2. run linear layer
     # apply dropout to input to the linear layer
     # (batch_size, sequence_length, hidden_size)
     input_encoding = self.dropout_on_input_to_linear_layer(output)
     # Run the RNN encoding of the input through the output projection
     # to get scores for each of the classes.
     # (batch_size, sequence_length, 2)
     unnormalized_output = self.output_projection(input_encoding)
     # Normalize with log softmax
     output_distribution = F.log_softmax(unnormalized_output, dim=-1)
     return output_distribution
示例#12
0
    def forward(
            self,  # pylint: disable=arguments-differ
            inputs: torch.Tensor,
            mask: torch.Tensor,
            hidden_state: torch.Tensor = None) -> torch.Tensor:

        if mask is None:
            return self._module(inputs, hidden_state)[0]

        # In some circumstances you may have sequences of zero length.
        # ``pack_padded_sequence`` requires all sequence lengths to be > 0, so here we
        # adjust the ``mask`` so that every sequence has length at least 1. Then after
        # running the RNN we zero out the corresponding rows in the result.

        # First count how many sequences are empty.
        batch_size, total_sequence_length = mask.size()
        num_valid = torch.sum(mask[:, 0]).int().data[0]

        # Force every sequence to be length at least one. Need to `.clone()` the mask
        # to avoid a RuntimeError from shared storage.
        if num_valid < batch_size:
            mask = mask.clone()
            mask[:, 0] = 1

        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices = sort_batch_by_length(
            inputs, sequence_lengths)
        packed_sequence_input = pack_padded_sequence(
            sorted_inputs,
            sorted_sequence_lengths.data.tolist(),
            batch_first=True)

        # Actually call the module on the sorted PackedSequence.
        packed_sequence_output, _ = self._module(packed_sequence_input,
                                                 hidden_state)
        unpacked_sequence_tensor, _ = pad_packed_sequence(
            packed_sequence_output, batch_first=True)

        # We sorted by length, so if there are invalid rows that need to be zeroed out
        # they will be at the end.
        if num_valid < batch_size:
            unpacked_sequence_tensor[num_valid:, :, :] = 0.

        # It's possible to need to pass sequences which are padded to longer than the
        # max length of the sequence to a Seq2SeqEncoder. However, packing and unpacking
        # the sequences mean that the returned tensor won't include these dimensions, because
        # the RNN did not need to process them. We add them back on in the form of zeros here.
        sequence_length_difference = total_sequence_length - unpacked_sequence_tensor.size(
            1)
        if sequence_length_difference > 0:
            zeros = unpacked_sequence_tensor.data.new(
                batch_size, sequence_length_difference,
                unpacked_sequence_tensor.size(-1)).fill_(0)
            zeros = torch.autograd.Variable(zeros)
            unpacked_sequence_tensor = torch.cat(
                [unpacked_sequence_tensor, zeros], 1)

        # Restore the original indices and return the sequence.
        return unpacked_sequence_tensor.index_select(0, restoration_indices)
示例#13
0
 def test_augmented_lstm_works_with_highway_connections(self):
     augmented_lstm = AugmentedLstm(10, 11, use_highway=True)
     sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(
         self.random_tensor, self.sequence_lengths)
     lstm_input = pack_padded_sequence(sorted_tensor,
                                       sorted_sequence.data.tolist(),
                                       batch_first=True)
     augmented_lstm(lstm_input)
示例#14
0
    def test_variable_length_sequences_run_backward_return_correctly_padded_outputs(self):
        sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(self.random_tensor, self.sequence_lengths)
        tensor = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True)
        lstm = AugmentedLstm(10, 11, go_forward=False)
        output, _ = lstm(tensor)
        output_sequence, _ = pad_packed_sequence(output, batch_first=True)

        numpy.testing.assert_array_equal(output_sequence.data[1, 6:, :].numpy(), 0.0)
        numpy.testing.assert_array_equal(output_sequence.data[2, 4:, :].numpy(), 0.0)
        numpy.testing.assert_array_equal(output_sequence.data[3, 3:, :].numpy(), 0.0)
        numpy.testing.assert_array_equal(output_sequence.data[4, 2:, :].numpy(), 0.0)
示例#15
0
 def test_biaugmented_lstm(self):
     for bidirectional in [True, False]:
         bi_augmented_lstm = BiAugmentedLstm(
             10, 11, 3, recurrent_dropout_probability=0.1, bidirectional=bidirectional
         )
         sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(
             self.random_tensor, self.sequence_lengths
         )
         lstm_input = pack_padded_sequence(
             sorted_tensor, sorted_sequence.data.tolist(), batch_first=True
         )
         bi_augmented_lstm(lstm_input)
    def document_representation(self, document_tensor):
        """
        Compute the sentence representation, D.
        :param document_tensor:
            Stacked tensors of the sentences given throughout the document.
            Assumes document_tensor is wrapped with Variable.
        :return: D: The average pooled representation of the document.
        """

        # 1. Pad variable lengths sentences to prevent the model from learning
        #    from the padding.

        # Collect lengths for sorting and padding.
        # Shape: (batch_size,)
        document_mask = (document_tensor != 0)
        sentence_lengths = Variable(document_mask.sum(dim=1))

        # Shape: (batch_size x max sentence length x embedding size)
        embedded_sentences = self.embedding(Variable(document_tensor))
        sorted_embeddings, sorted_lengths, restore_index, permute_index \
            = sort_batch_by_length(embedded_sentences, sentence_lengths)

        sorted_lengths = list(sorted_lengths.data.long())

        packed_sentences = nn.utils.rnn.pack_padded_sequence(sorted_embeddings,
                                                             sorted_lengths,
                                                             batch_first=True)

        # 2. Encode the sentences at the word level.
        # Shape: (batch_size x max sentence length x bidirectional hidden)
        #        (batch_size x bidirectional hidden)
        sentences_out, sentences_hidden = self.word_rnn(packed_sentences)

        padded_sentences, padded_sentences_lengths = \
            nn.utils.rnn.pad_packed_sequence(sentences_out, batch_first=True)

        # Restore order for predictions.
        encoded_sentences_restored = padded_sentences[restore_index]

        # 3. Pool along the length dimension.
        sentence_representations = torch.mean(encoded_sentences_restored, 1)

        # 4. Encode the document at the sentence level.
        doc_out, doc_hiddens = self.sentence_rnn(
            sentence_representations.unsqueeze(0))

        # 4. Average the sentence representations and push through affine.
        pooled_doc_out = torch.mean(doc_out.squeeze(), 0)
        doc_rep = self.encode_document(pooled_doc_out)

        return sentence_representations, doc_rep
示例#17
0
    def forward(self, inputs, lengths):
        # 1. run LSTM
        # apply dropout to the input
        # Shape of inputs: (batch_size, sequence_length, embedding_dim)
        embedded_input = self.dropout_on_input_to_LSTM(inputs)
        # Sort the embedded inputs by decreasing order of input length.
        # sorted_input shape: (batch_size, sequence_length, embedding_dim)
        (sorted_input, sorted_lengths, input_unsort_indices,
         _) = sort_batch_by_length(embedded_input, lengths)
        # Pack the sorted inputs with pack_padded_sequence.
        packed_input = pack_padded_sequence(sorted_input,
                                            sorted_lengths.data.tolist(),
                                            batch_first=True)
        # Run the input through the RNN.
        packed_sorted_output, _ = self.rnn(packed_input)
        # Unpack (pad) the input with pad_packed_sequence
        # Shape: (batch_size, sequence_length, hidden_size)
        sorted_output, _ = pad_packed_sequence(packed_sorted_output,
                                               batch_first=True)
        # Re-sort the packed sequence to restore the initial ordering
        # Shape: (batch_size, sequence_length, hidden_size)
        output = sorted_output[input_unsort_indices]

        # 2. use attention
        # Shape: (batch_size, sequence_length, 1)
        # Shape: (batch_size, sequence_length) after squeeze
        attention_logits = self.attention_weights(output).squeeze(dim=-1)
        mask_attention_logits = (attention_logits != 0).type(
            torch.cuda.FloatTensor if inputs.is_cuda else torch.FloatTensor)
        # Shape: (batch_size, sequence_length)
        softmax_attention_logits = masked_softmax(attention_logits,
                                                  mask_attention_logits)
        # Shape: (batch_size, 1, sequence_length)
        softmax_attention_logits = softmax_attention_logits.unsqueeze(dim=1)
        # Shape of input_encoding: (batch_size, 1, hidden_size )
        #    output: (batch_size, sequence_length, hidden_size)
        #    softmax_attention_logits: (batch_size, 1, sequence_length)
        input_encoding = torch.bmm(softmax_attention_logits, output)
        # Shape: (batch_size, hidden_size)
        input_encoding = input_encoding.squeeze(dim=1)

        # 3. run linear layer
        # apply dropout to input to the linear layer
        input_encoding = self.dropout_on_input_to_linear_layer(input_encoding)
        # Run the RNN encoding of the input through the output projection
        # to get scores for each of the classes.
        unnormalized_output = self.output_projection(input_encoding)
        # Normalize with log softmax
        output_distribution = F.log_softmax(unnormalized_output, dim=-1)
        return output_distribution
示例#18
0
    def forward(self, frames: torch.FloatTensor, frame_lens: torch.LongTensor):
        """
        frames: (batch_size, seq_len, num_lmks, lmk_dim)
        frame_lens: (batch_size, )
        """
        if self.frame_processing == 'flatten':
            frames = frames.reshape(frames.shape[0], frames.shape[1], -1)

        # Reverse sorts the batch by unpadded seq_len.
        (sorted_frames, sorted_frame_lens, restoration_indices,
         _) = sort_batch_by_length(frames, frame_lens)

        # Returns a PackedSequence.
        packed_frames = nn.utils.rnn.pack_padded_sequence(
            sorted_frames,
            sorted_frame_lens.data.cpu().numpy()
            if sorted_frame_lens.is_cuda else sorted_frame_lens.data.numpy(),
            batch_first=True)

        # Encoder: feed frames to the model, output hidden states.
        # final_state: (num_layers * num_dir, batch_size, hidden_size) (*2 if LSTM)
        packed_hidden_states, final_state = self.rnn(packed_frames)

        # Unpack encoding, the hidden states, a Tensor.
        # (batch_size, seq_len, num_dir * hidden_size)
        hidden_states, _ = nn.utils.rnn.pad_packed_sequence(
            packed_hidden_states, batch_first=True)

        # (num_layers, batch_size, hidden_size * num_dir) (*2 if LSTM)
        if self.bidirectional:
            final_state = self._cat_directions(final_state)

        hidden_states = hidden_states.index_select(0, restoration_indices)
        if isinstance(final_state, tuple):  # LSTM
            final_state = (final_state[0].index_select(1, restoration_indices),
                           final_state[1].index_select(1, restoration_indices))
        else:
            final_state = final_state.index_select(1, restoration_indices)

        if self.enable_ctc:
            output_logits = self.output_proj(hidden_states)
            output_log_probs = masked_log_softmax(output_logits,
                                                  self.output_mask.expand(
                                                      output_logits.shape[0],
                                                      self.adj_vocab_size),
                                                  dim=-1)
            return output_log_probs, hidden_states, final_state
        else:
            return hidden_states, final_state
示例#19
0
    def forward(
            self,  # pylint: disable=arguments-differ
            inputs: torch.Tensor,
            mask: torch.Tensor,
            hidden_state: torch.Tensor = None) -> torch.Tensor:

        if mask is None:
            # If a mask isn't passed, there is no padding in the batch of instances, so we can just
            # return the last sequence output as the state.  This doesn't work in the case of
            # variable length sequences, as the last state for each element of the batch won't be
            # at the end of the max sequence length, so we have to use the state of the RNN below.
            return self._module(inputs, hidden_state)[0][:, -1, :]
        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices = sort_batch_by_length(
            inputs, sequence_lengths)
        packed_sequence_input = pack_padded_sequence(
            sorted_inputs,
            sorted_sequence_lengths.data.tolist(),
            batch_first=True)

        # Actually call the module on the sorted PackedSequence.
        _, state = self._module(packed_sequence_input, hidden_state)

        # Deal with the fact the LSTM state is a tuple of (state, memory).
        if isinstance(state, tuple):
            state = state[0]

        # Restore the original indices and return the final state of the
        # top layer. Pytorch's recurrent layers return state in the form
        # (num_layers * num_directions, batch_size, hidden_size) regardless
        # of the 'batch_first' flag, so we transpose, extract the relevant
        # layer state (both forward and backward if using bidirectional layers)
        # and return them as a single (batch_size, self.get_output_dim()) tensor.

        # now of shape: (batch_size, num_layers * num_directions, hidden_size).
        unsorted_state = state.transpose(0, 1).index_select(
            0, restoration_indices)

        # Extract the last hidden vector, including both forward and backward states
        # if the cell is bidirectional. Then reshape by concatenation (in the case
        # we have bidirectional states) or just squash the 1st dimension in the non-
        # bidirectional case. Return tensor has shape (batch_size, hidden_size * num_directions).
        try:
            last_state_index = 2 if self._module.bidirectional else 1
        except AttributeError:
            last_state_index = 1
        last_layer_state = unsorted_state[:, -last_state_index:, :]
        return last_layer_state.contiguous().view([-1, self.get_output_dim()])
示例#20
0
    def test_dropout_version_is_different_to_no_dropout(self):
        augmented_lstm = AugmentedLstm(10, 11)
        dropped_augmented_lstm = AugmentedLstm(
            10, 11, recurrent_dropout_probability=0.9)
        # Initialize all weights to be == 1.
        constant_init = Initializer.from_params(
            Params({
                "type": "constant",
                "val": 0.5
            }))
        initializer = InitializerApplicator([(".*", constant_init)])
        initializer(augmented_lstm)
        initializer(dropped_augmented_lstm)

        initial_state = torch.randn([1, 5, 11])
        initial_memory = torch.randn([1, 5, 11])

        # If we use too bigger number like in the PyTorch test the dropout has no affect
        sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(
            self.random_tensor, self.sequence_lengths)
        lstm_input = pack_padded_sequence(sorted_tensor,
                                          sorted_sequence.data.tolist(),
                                          batch_first=True)

        augmented_output, augmented_state = augmented_lstm(
            lstm_input, (initial_state, initial_memory))
        dropped_output, dropped_state = dropped_augmented_lstm(
            lstm_input, (initial_state, initial_memory))
        dropped_output_sequence, _ = pad_packed_sequence(dropped_output,
                                                         batch_first=True)
        augmented_output_sequence, _ = pad_packed_sequence(augmented_output,
                                                           batch_first=True)
        with pytest.raises(AssertionError):
            numpy.testing.assert_array_almost_equal(
                dropped_output_sequence.data.numpy(),
                augmented_output_sequence.data.numpy(),
                decimal=4)
        with pytest.raises(AssertionError):
            numpy.testing.assert_array_almost_equal(
                dropped_state[0].data.numpy(),
                augmented_state[0].data.numpy(),
                decimal=4)
        with pytest.raises(AssertionError):
            numpy.testing.assert_array_almost_equal(
                dropped_state[1].data.numpy(),
                augmented_state[1].data.numpy(),
                decimal=4)
示例#21
0
    def run_lstm(lstm, inputs, lengths):
        """
        Run inputs through a LSTM.

        Args:
            lstm (LSTM): LSTM to use
            inputs (FloatTensor): word embeddings
            lengths (LongTensor): vector with sentence lengths
        """
        inputs, lengths, unsort_idx, _ = sort_batch_by_length(inputs, lengths)
        inputs = pack_padded_sequence(inputs,
                                      lengths.data.tolist(),
                                      batch_first=True)
        lstm.flatten_parameters()
        packed_sorted_output, _ = lstm(inputs)
        sort, _ = pad_packed_sequence(packed_sorted_output, batch_first=True)
        return sort[unsort_idx]
示例#22
0
    def forward(self,  # pylint: disable=arguments-differ
                inputs: torch.Tensor,
                sequence_lengths: torch.LongTensor = None,
                hidden_state: torch.Tensor = None) -> torch.Tensor:

        if sequence_lengths is None:
            return self._module(inputs, hidden_state)[0]
        sorted_inputs, sorted_sequence_lengths, restoration_indices = sort_batch_by_length(inputs,
                                                                                           sequence_lengths)
        packed_sequence_input = pack_padded_sequence(sorted_inputs,
                                                     sorted_sequence_lengths.data.tolist(),
                                                     batch_first=True)

        # Actually call the module on the sorted PackedSequence.
        packed_sequence_output, _ = self._module(packed_sequence_input, hidden_state)
        unpacked_sequence_tensor, _ = pad_packed_sequence(packed_sequence_output, batch_first=True)
        # Restore the original indices and return the sequence.
        return unpacked_sequence_tensor.index_select(0, restoration_indices)
示例#23
0
    def forward(self, inputs, lengths):

        embedded_input = self.dropout_on_input_to_LSTM(inputs)

        (sorted_input, sorted_lengths, input_unsort_indices,
         _) = sort_batch_by_length(embedded_input, lengths)

        packed_input = pack_padded_sequence(sorted_input,
                                            sorted_lengths.data.tolist(),
                                            batch_first=True)

        packed_sorted_output, _ = self.rnn(packed_input)

        sorted_output, _ = pad_packed_sequence(packed_sorted_output,
                                               batch_first=True)

        output = sorted_output[input_unsort_indices]

        att_vec_dim = int(output.size(2) / 2)
        query_l2r = self.get_query(output[:, :, :att_vec_dim])
        query_r2l = self.get_query(output[:, :, att_vec_dim:])
        context_l2r, context_r2l = self.get_context(output[:, :, :att_vec_dim],
                                                    output[:, :, att_vec_dim:],
                                                    window=3)

        att_l2r = self.multiheadcontextattention(query_l2r, context_l2r,
                                                 context_l2r).view(
                                                     output.size(0),
                                                     output.size(1), -1)
        att_r2l = self.multiheadcontextattention(query_r2l, context_r2l,
                                                 context_r2l).view(
                                                     output.size(0),
                                                     output.size(1), -1)

        att = torch.cat([att_l2r, att_r2l], -1)

        output_cat = torch.cat([output, att], -1)

        input_encoding = self.dropout_on_input_to_linear_layer(output_cat)

        unnormalized_output = self.output_to_label(input_encoding)

        output_distribution = F.log_softmax(unnormalized_output, dim=-1)
        return output_distribution
示例#24
0
    def test_stacked_bidirectional_lstm_dropout_version_is_different(self, dropout_name: str):
        stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11,
                                                num_layers=3)
        if dropout_name == 'layer_dropout_probability':
            dropped_stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11,
                                                            num_layers=3,
                                                            layer_dropout_probability=0.9)
        elif dropout_name == 'recurrent_dropout_probability':
            dropped_stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11,
                                                            num_layers=3,
                                                            recurrent_dropout_probability=0.9)
        else:
            raise ValueError('Do not recognise the following dropout name '
                             f'{dropout_name}')
        # Initialize all weights to be == 1.
        constant_init = Initializer.from_params(Params({"type": "constant", "val": 0.5}))
        initializer = InitializerApplicator([(".*", constant_init)])
        initializer(stacked_lstm)
        initializer(dropped_stacked_lstm)

        initial_state = torch.randn([3, 5, 11])
        initial_memory = torch.randn([3, 5, 11])

        tensor = torch.rand([5, 7, 10])
        sequence_lengths = torch.LongTensor([7, 7, 7, 7, 7])

        sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(tensor, sequence_lengths)
        lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True)

        stacked_output, stacked_state = stacked_lstm(lstm_input, (initial_state, initial_memory))
        dropped_output, dropped_state = dropped_stacked_lstm(lstm_input, (initial_state, initial_memory))
        dropped_output_sequence, _ = pad_packed_sequence(dropped_output, batch_first=True)
        stacked_output_sequence, _ = pad_packed_sequence(stacked_output, batch_first=True)
        if dropout_name == 'layer_dropout_probability':
            with pytest.raises(AssertionError):
                numpy.testing.assert_array_almost_equal(dropped_output_sequence.data.numpy(),
                                                        stacked_output_sequence.data.numpy(), decimal=4)
        if dropout_name == 'recurrent_dropout_probability':
            with pytest.raises(AssertionError):
                numpy.testing.assert_array_almost_equal(dropped_state[0].data.numpy(),
                                                        stacked_state[0].data.numpy(), decimal=4)
            with pytest.raises(AssertionError):
                numpy.testing.assert_array_almost_equal(dropped_state[1].data.numpy(),
                                                        stacked_state[1].data.numpy(), decimal=4)
示例#25
0
    def forward(self, inputs, lengths, char_seqs):

        char_emb_seq = self.char_emb(char_seqs)

        glove_part = inputs[:,:,:300]
        elmo_part = inputs[:,:,300:1324]
        pos_part = inputs[:,:,1324:]

        inputs = torch.cat((glove_part, char_emb_seq), dim=-1)

        inputs = self.highway(inputs)

        inputs = torch.cat([inputs, elmo_part, pos_part], dim=-1)

        embedded_input = self.dropout_on_input_to_LSTM(inputs)
        # Sort the embedded inputs by decreasing order of input length.
        # sorted_input shape: (batch_size, sequence_length, embedding_dim)
        (sorted_input, sorted_lengths, input_unsort_indices, _) = sort_batch_by_length(embedded_input, lengths)
        # Pack the sorted inputs with pack_padded_sequence.
        packed_input = pack_padded_sequence(sorted_input, sorted_lengths.data.tolist(), batch_first=True)
        # Run the input through the RNN.
        packed_sorted_output, _ = self.rnn(packed_input)
        # Unpack (pad) the input with pad_packed_sequence
        # Shape: (batch_size, sequence_length, hidden_size)
        sorted_output, _ = pad_packed_sequence(packed_sorted_output, batch_first=True)
        # Re-sort the packed sequence to restore the initial ordering
        # Shape: (batch_size, sequence_length, hidden_size)
        output = sorted_output[input_unsort_indices]

        input_encoding = self.dropout_on_input_to_linear_layer(output)

        if self.name == 'vua':
            projected_output = self.transform(inputs)
            multiplied_output = projected_output * input_encoding

            features = self.features(multiplied_output)

            unnormalized_output = self.output_projection(features)
        else:
            unnormalized_output = self.output_projection(input_encoding)

        output_distribution = F.log_softmax(unnormalized_output, dim=-1)
        return output_distribution, input_encoding, unnormalized_output
    def run_rnn(self, embedded_input, batch, rnn):
        """
        Run embeddings through RNN and return the output.

        Args:
            embedded_input (torch.FloatTensor): batch x seq x dim
            batch (Batch): batch object containing .lengths tensor
            param (torch.nn.LSTM): LSTM to run the embeddings through

        Returns:
            torch.FloatTensor: hidden states output of LSTM, batch x seq x dim
        """
        (sorted_input, sorted_lengths, input_unsort_indices, _) = \
            sort_batch_by_length(embedded_input, batch.lengths)
        packed_input = pack(sorted_input, sorted_lengths.data.tolist(),
                            batch_first=True)
        rnn.flatten_parameters()
        packed_sorted_output, _ = rnn(packed_input)
        sorted_output, _ = pad(packed_sorted_output, batch_first=True)
        return sorted_output[input_unsort_indices]
示例#27
0
    def test_sort_tensor_by_length(self):
        tensor = torch.rand([5, 7, 9])
        tensor[0, 3:, :] = 0
        tensor[1, 4:, :] = 0
        tensor[2, 1:, :] = 0
        tensor[3, 5:, :] = 0

        sequence_lengths = torch.LongTensor([3, 4, 1, 5, 7])
        sorted_tensor, sorted_lengths, reverse_indices, _ = util.sort_batch_by_length(tensor, sequence_lengths)

        # Test sorted indices are padded correctly.
        numpy.testing.assert_array_equal(sorted_tensor[1, 5:, :].data.numpy(), 0.0)
        numpy.testing.assert_array_equal(sorted_tensor[2, 4:, :].data.numpy(), 0.0)
        numpy.testing.assert_array_equal(sorted_tensor[3, 3:, :].data.numpy(), 0.0)
        numpy.testing.assert_array_equal(sorted_tensor[4, 1:, :].data.numpy(), 0.0)

        assert sorted_lengths.data.equal(torch.LongTensor([7, 5, 4, 3, 1]))

        # Test restoration indices correctly recover the original tensor.
        assert sorted_tensor.index_select(0, reverse_indices).data.equal(tensor.data)
示例#28
0
    def forward(self,  # pylint: disable=arguments-differ
                inputs: torch.Tensor,
                mask: torch.Tensor,
                hidden_state: torch.Tensor = None) -> torch.Tensor:

        if mask is None:
            return self._module(inputs, hidden_state)[0]

        # In some circumstances you may have sequences of zero length.
        # ``pack_padded_sequence`` requires all sequence lengths to be > 0, so here we
        # adjust the ``mask`` so that every sequence has length at least 1. Then after
        # running the RNN we zero out the corresponding rows in the result.

        # First count how many sequences are empty.
        batch_size = mask.size()[0]
        num_valid = torch.sum(mask[:, 0]).int().data[0]

        # Force every sequence to be length at least one. Need to `.clone()` the mask
        # to avoid a RuntimeError from shared storage.
        if num_valid < batch_size:
            mask = mask.clone()
            mask[:, 0] = 1

        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices = sort_batch_by_length(inputs,
                                                                                           sequence_lengths)
        packed_sequence_input = pack_padded_sequence(sorted_inputs,
                                                     sorted_sequence_lengths.data.tolist(),
                                                     batch_first=True)

        # Actually call the module on the sorted PackedSequence.
        packed_sequence_output, _ = self._module(packed_sequence_input, hidden_state)
        unpacked_sequence_tensor, _ = pad_packed_sequence(packed_sequence_output, batch_first=True)

        # We sorted by length, so if there are invalid rows that need to be zeroed out
        # they will be at the end.
        if num_valid < batch_size:
            unpacked_sequence_tensor[num_valid:, :, :] = 0.

        # Restore the original indices and return the sequence.
        return unpacked_sequence_tensor.index_select(0, restoration_indices)
示例#29
0
 def test_dropout_is_not_applied_to_output_or_returned_hidden_states(self):
     sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(
         self.random_tensor, self.sequence_lengths
     )
     tensor = pack_padded_sequence(
         sorted_tensor, sorted_sequence.data.tolist(), batch_first=True
     )
     lstm = AugmentedLstm(10, 11, recurrent_dropout_probability=0.5)
     output, (hidden_state, _) = lstm(tensor)
     output_sequence, _ = pad_packed_sequence(output, batch_first=True)
     # Test returned output sequence
     num_hidden_dims_zero_across_timesteps = ((output_sequence.sum(1) == 0).sum()).item()
     # If this is not True then dropout has been applied to the output of the LSTM
     assert not num_hidden_dims_zero_across_timesteps
     # Should not have dropout applied to the last hidden state as this is not used
     # within the LSTM and makes it more consistent with the `torch.nn.LSTM` where
     # dropout is not applied to any of it's output. This would also make it more
     # consistent with the Keras LSTM implementation as well.
     hidden_state = hidden_state.squeeze()
     num_hidden_dims_zero_across_timesteps = ((hidden_state == 0).sum()).item()
     assert not num_hidden_dims_zero_across_timesteps
示例#30
0
    def forward(self,
                feature: torch.Tensor,
                feature_length: torch.Tensor,
                txt_label: Dict[str, torch.Tensor] = None,
                txt_length: torch.Tensor = None,
                meta_data: List[Dict[str, Any]] = None,
                **args: Any) -> Dict[str, torch.Tensor]:
        """
        Parameters:
            feature: (batch, T, feature)
            feature_length: (batch)
            txt_label: {
                        "character": (batch, max_label_length)
                    }
            txt_length: (batch)
        """
        if txt_label is not None:
            txt_label = txt_label['character'].view(-1)
        sorted_feature, sorted_feature_length, restore_idx, _ = sort_batch_by_length(
            feature, feature_length)
        sorted_feature = sorted_feature.transpose(-2, -1).unsqueeze(
            1)  # (batch, 1, feature, T)
        logits, output_lengths = self.speech_model(sorted_feature,
                                                   sorted_feature_length)
        logits = logits.index_select(0, restore_idx)  # (batch, T, num_class)
        output_lengths = output_lengths.index_select(0, restore_idx)  # (batch)
        prob = F.log_softmax(logits, dim=-1)  # (batch, T, num_class)
        output_dict = {}
        if txt_label is not None and txt_length is not None:
            import pdb
            pdb.set_trace()
            txt_label = txt_label[txt_label.nonzero().squeeze(
                dim=-1)]  # (sum(txt_label))
            loss = F.ctc_loss(log_probs=prob.transpose(0, 1),
                              targets=txt_label.int(),
                              input_lengths=output_lengths.int(),
                              target_lengths=txt_length.int())

            output_dict['loss'] = loss
        return output_dict
    def setUp(self):
        super(TestEncoderBase, self).setUp()
        self.lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True)
        self.encoder_base = _EncoderBase(stateful=True)

        tensor = Variable(torch.rand([5, 7, 3]))
        tensor[1, 6:, :] = 0
        tensor[3, 2:, :] = 0
        self.tensor = tensor
        mask = Variable(torch.ones(5, 7))
        mask[1, 6:] = 0
        mask[2, :] = 0  # <= completely masked
        mask[3, 2:] = 0
        mask[4, :] = 0  # <= completely masked
        self.mask = mask

        self.batch_size = 5
        self.num_valid = 3
        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        _, _, restoration_indices, sorting_indices = sort_batch_by_length(tensor, sequence_lengths)
        self.sorting_indices = sorting_indices
        self.restoration_indices = restoration_indices
示例#32
0
    def sort_and_run_forward(self,
                             module: Callable[[PackedSequence, Optional[RnnState]],
                                              Tuple[Union[PackedSequence, torch.Tensor], RnnState]],
                             inputs: torch.Tensor,
                             mask: torch.Tensor,
                             hidden_state: Optional[RnnState] = None):
        """
        This function exists because Pytorch RNNs require that their inputs be sorted
        before being passed as input. As all of our Seq2xxxEncoders use this functionality,
        it is provided in a base class. This method can be called on any module which
        takes as input a ``PackedSequence`` and some ``hidden_state``, which can either be a
        tuple of tensors or a tensor.

        As all of our Seq2xxxEncoders have different return types, we return `sorted`
        outputs from the module, which is called directly. Additionally, we return the
        indices into the batch dimension required to restore the tensor to it's correct,
        unsorted order and the number of valid batch elements (i.e the number of elements
        in the batch which are not completely masked). This un-sorting and re-padding
        of the module outputs is left to the subclasses because their outputs have different
        types and handling them smoothly here is difficult.

        Parameters
        ----------
        module : ``Callable[[PackedSequence, Optional[RnnState]],
                            Tuple[Union[PackedSequence, torch.Tensor], RnnState]]``, required.
            A function to run on the inputs. In most cases, this is a ``torch.nn.Module``.
        inputs : ``torch.Tensor``, required.
            A tensor of shape ``(batch_size, sequence_length, embedding_size)`` representing
            the inputs to the Encoder.
        mask : ``torch.Tensor``, required.
            A tensor of shape ``(batch_size, sequence_length)``, representing masked and
            non-masked elements of the sequence for each element in the batch.
        hidden_state : ``Optional[RnnState]``, (default = None).
            A single tensor of shape (num_layers, batch_size, hidden_size) representing the
            state of an RNN with or a tuple of
            tensors of shapes (num_layers, batch_size, hidden_size) and
            (num_layers, batch_size, memory_size), representing the hidden state and memory
            state of an LSTM-like RNN.

        Returns
        -------
        module_output : ``Union[torch.Tensor, PackedSequence]``.
            A Tensor or PackedSequence representing the output of the Pytorch Module.
            The batch size dimension will be equal to ``num_valid``, as sequences of zero
            length are clipped off before the module is called, as Pytorch cannot handle
            zero length sequences.
        final_states : ``Optional[RnnState]``
            A Tensor representing the hidden state of the Pytorch Module. This can either
            be a single tensor of shape (num_layers, num_valid, hidden_size), for instance in
            the case of a GRU, or a tuple of tensors, such as those required for an LSTM.
        restoration_indices : ``torch.LongTensor``
            A tensor of shape ``(batch_size,)``, describing the re-indexing required to transform
            the outputs back to their original batch order.
        """
        # In some circumstances you may have sequences of zero length. ``pack_padded_sequence``
        # requires all sequence lengths to be > 0, so remove sequences of zero length before
        # calling self._module, then fill with zeros.

        # First count how many sequences are empty.
        batch_size = mask.size(0)
        num_valid = torch.sum(mask[:, 0]).int().item()

        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices, sorting_indices =\
            sort_batch_by_length(inputs, sequence_lengths)

        # Now create a PackedSequence with only the non-empty, sorted sequences.
        packed_sequence_input = pack_padded_sequence(sorted_inputs[:num_valid, :, :],
                                                     sorted_sequence_lengths[:num_valid].data.tolist(),
                                                     batch_first=True)
        # Prepare the initial states.
        if not self.stateful:
            if hidden_state is None:
                initial_states = hidden_state
            elif isinstance(hidden_state, tuple):
                initial_states = [state.index_select(1, sorting_indices)[:, :num_valid, :].contiguous()
                                  for state in hidden_state]
            else:
                initial_states = hidden_state.index_select(1, sorting_indices)[:, :num_valid, :].contiguous()

        else:
            initial_states = self._get_initial_states(batch_size, num_valid, sorting_indices)

        # Actually call the module on the sorted PackedSequence.
        module_output, final_states = module(packed_sequence_input, initial_states)

        return module_output, final_states, restoration_indices
示例#33
0
文件: rnn.py 项目: vpranav5/NLP4
    def forward(self, passage, question):
        """
        The forward pass of the RNN-based model.

        Parameters
        ----------
        passage: Variable(LongTensor)
            A Variable(LongTensor) of shape (batch_size, passage_length)
            representing the words in the passage for each batch.

        question: Variable(LongTensor)
            A Variable(LongTensor) of shape (batch_size, question_length)
            representing the words in the question for each batch.

        Returns
        -------
        An output dictionary consisting of:
        start_logits: Variable(FloatTensor)
            The first element in the returned tuple. Variable(FloatTensor) of
            shape (batch_size, max_passage_size). Each value is the score
            assigned to a given token. Masked indices are assigned very
            small scores (-1e7).

        end_logits: Variable(FloatTensor)
            The second element in the returned tuple. Variable(FloatTensor) of
            shape (batch_size, max_passage_size). Each value is the score
            assigned to a given token. Masked indices are assigned very
            small scores (-1e7).

        softmax_start_logits: Variable(FloatTensor)
            The third element in the returned tuple. Variable(FloatTensor) of
            shape (batch_size, max_passage_size). Exactly the same as
            start_logits, but with a masked log softmax applied. Represents
            a probability distribution over the passage, indicating the
            probability that any given token is where the answer begins.
            Masked indices have probability mass of -inf.

        softmax_end_logits: Variable(FloatTensor)
            The fourth element in the returned tuple. Variable(FloatTensor) of
            shape (batch_size, max_passage_size). Exactly the same as
            start_logits, but with a masked log softmax applied. Represents
            a probability distribution over the passage, indicating the
            probability that any given token is where the answer end.
            Masked indices have probability mass of -inf.
        """
        # Mask: FloatTensor with 0 in positions that are
        # padding (word index 0) and 1 in positions with actual words.
        # Make a mask for the passage. Shape: ?
        # TODO: Your code here.
        passage_mask = (passage != 0).type(
            torch.cuda.FloatTensor if passage.is_cuda else torch.FloatTensor)

        # Make a mask for the question. Shape: ?
        # TODO: Your code here.
        question_mask = (question != 0).type(
            torch.cuda.FloatTensor if question.is_cuda else torch.FloatTensor)

        # Make a LongTensor with the length (number non-padding words
        # in) each passage.
        # Shape: ?
        # TODO: Your code here.

        # keep as float tensor for use in later methods
        passageLengths = passage_mask.sum(dim=1)

        # Make a LongTensor with the length (number non-padding words
        # in) each question.
        # Shape: ?
        # TODO: Your code here.

        # keep as float tensor by summing along mask dimension for non-padding words
        questionLengths = question_mask.sum(dim=1)

        # Part 1: Embed the passages and the questions.
        # 1.1. Embed the passage.
        # TODO: Your code here.
        # Shape: ?

        # Get stored passage embedding
        embedded_passage = self.embedding(passage)

        # 1.2. Embed the question.
        # TODO: Your code here.
        # Shape: ?

        # Get stored question embedding
        embedded_question = self.embedding(question)

        # Part 2. Encode the embedded passages with the RNN.
        # 2.1. Sort embedded passages by decreasing order of passage_lengths.
        # Hint: allennlp.nn.util.sort_batch_by_length might be helpful.
        # TODO: Your code here.

        # method gives a tuple of outputs
        # (sorted passage, sorted passage lengths, restoration index)
        sorted_passage, sorted_passage_lengths, passage_restoration, _ = sort_batch_by_length(
            embedded_passage, passageLengths)

        # 2.2. Pack the passages with torch.nn.utils.rnn.pack_padded_sequence.
        # Hint: Make sure you have the proper value for batch_first.
        # TODO: Your code here.

        # packing optimizes out the padding, removes out padding words from passages
        # packed_passage is a pytorch object which nests sequences, converts to 2-d matrix
        packed_passage = pack_padded_sequence(sorted_passage,
                                              sorted_passage_lengths,
                                              batch_first=True)

        # 2.3. Encode the packed passages with the RNN.
        # TODO: Your code here. (input), feeding in optimized passages thru the network nodes

        # encoding is used to represent input within the neural network
        # output is a packed sequence
        passageEncoding, passageHidden = self.gruPassage(packed_passage)

        # 2.4. Unpack (pad) the passages with
        # torch.nn.utils.rnn.pad_packed_sequence.
        # Hint: Make sure you have the proper value for batch_first.
        # Shape: ?
        # TODO: Your code here.

        # returns tuple again, variable, variable expands tuple 0, 1
        # extract unpadded passages from encoding
        passage_unpacked, lens_unpacked = pad_packed_sequence(passageEncoding,
                                                              batch_first=True)

        # 2.5. Unsort the unpacked, encoded passage to restore the
        # initial ordering.
        # Hint: Look into torch.index_select or NumPy/PyTorch fancy indexing.
        # Shape: ?
        # TODO: Your code here.

        # Parameters: (input, dim to index along, original ordering)
        # use restoration indices to get original ordering for unpacked passages
        unsorted_passage = passage_unpacked.index_select(
            0, passage_restoration)

        # Part 3. Encode the embedded questions with the RNN.
        # 3.1. Sort the embedded questions by decreasing order
        #      of question_lengths.
        # Hint: allennlp.nn.util.sort_batch_by_length might be helpful.
        # TODO: Your code here.

        # Returns tuple of 4 arguments
        sorted_question, sorted_question_lengths, question_restoration, _ = sort_batch_by_length(
            embedded_question, questionLengths)

        # 3.2. Pack the questions with pack_padded_sequence.
        # Hint: Make sure you have the proper value for batch_first.
        # TODO: Your code here.

        # Pack questions based on padding
        packed_question = pack_padded_sequence(sorted_question,
                                               sorted_question_lengths,
                                               batch_first=True)

        # 3.3. Encode the questions with the RNN.
        # TODO: Your code here.

        # Encode with question bidirectional GRU
        # output is a packed sequence
        questionEncoding, questionHidden = self.gruQuestion(packed_question)

        # 3.4. Unpack (pad) the questions with pad_packed_sequence.
        # Hint: Make sure you have the proper value for batch_first.
        # Shape: ?
        # TODO: Your code here.

        # extract unpadded questions
        question_unpacked, lens_unpacked = pad_packed_sequence(
            questionEncoding, batch_first=True)

        # 3.5. Unsort the unpacked, encoded question to restore the
        # initial ordering.
        # Hint: Look into torch.index_select or NumPy/PyTorch fancy indexing.
        # Shape: ?
        # TODO: Your code here.

        # Unsort using question restoration original ordering
        unsorted_question = question_unpacked.index_select(
            0, question_restoration)

        # 3.6. Take the average of the GRU hidden states.
        # Hint: Be careful how you treat padding.
        # Shape: ?
        # TODO: Your code here.

        # set padding to 0 in question, question hidden gru can have hidden state for padding index that is not all 0
        # element-wise product mask * unpacked, unsorted question of question,unsqueeze and add dimension to mask so it fits
        questionProduct = question_mask.unsqueeze(-1) * unsorted_question

        # sum up non-padded elements of product and get average of gru states
        questionRepresent = (torch.sum(questionProduct, dim=1) /
                             questionLengths.unsqueeze(1))

        # Part 4: Combine the passage and question representations by
        # concatenating the passage and question representations with
        # their product.

        # 4.1. Reshape the question encoding to make it
        # amenable to concatenation
        # Shape: (batchsize, max passage length, hidden size)
        # TODO: Your code here.

        # questionEncoding (batchsize, max passage length, hidden size)
        # expand depending on this size
        tiled_encoded_q = questionRepresent.unsqueeze(
            dim=1).expand_as(unsorted_passage)

        # 4.2. Concatenate to make the combined representation.
        # Hint: Use torch.cat
        # Shape:  (batch_size, max_passage_size, 6 * embedding_dim)
        # TODO: Your code here.

        # concatenate the expanded passage and expanded questions as well as product over last dim
        combined_x_q = torch.cat([
            unsorted_passage, tiled_encoded_q,
            unsorted_passage * tiled_encoded_q
        ],
                                 dim=-1)

        # Part 5: Compute logits for answer start index.

        # 5.1. Apply the affine transformation, and edit the shape.
        # Shape after affine transformation: ?
        # Shape after editing shape: ?
        # TODO: Your code here.

        # get start logits with output project, and reshape last column
        start_logits = self.start_output_projection(combined_x_q).squeeze(-1)

        # 5.2. Replace the masked values so they have a very low score (-1e7).
        # This tensor is your start_logits.
        # Hint: allennlp.nn.util.replace_masked_values might be helpful.
        # Shape: ?
        # TODO: Your code here.

        start_logits = replace_masked_values(start_logits, passage_mask, -1e7)

        # 5.3. Apply a padding-aware log-softmax to normalize.
        # This tensor is your softmax_start_logits.
        # Hint: allennlp.nn.util.masked_log_softmax might be helpful.
        # Shape: ?
        # TODO: Your code here.

        softmax_start_logits = masked_log_softmax(start_logits, passage_mask)

        # Part 6: Compute logits for answer end index.

        # 6.1. Apply the affine transformation, and edit the shape.
        # Shape after affine transformation: ?
        # Shape after editing shape: ?
        # TODO: Your code here.
        end_logits = self.end_output_projection(combined_x_q).squeeze(-1)

        # 6.2. Replace the masked values so they have a very low score (-1e7).
        # This tensor is your end_logits.
        # Hint: allennlp.nn.util.replace_masked_values might be helpful.
        # Shape: ?
        # TODO: Your code here.
        end_logits = replace_masked_values(end_logits, passage_mask, -1e7)

        # 6.3. Apply a padding-aware log-softmax to normalize.
        # This tensor is your softmax_end_logits.
        # Hint: allennlp.nn.util.masked_log_softmax might be helpful.
        # Shape: ?
        # TODO: Your code here.
        softmax_end_logits = masked_log_softmax(end_logits, passage_mask)

        # Part 7: Output a dictionary with the start_logits, end_logits,
        # softmax_start_logits, softmax_end_logits.
        # TODO: Your code here. Remove the NotImplementedError below.

        return {
            "start_logits": start_logits,
            "end_logits": end_logits,
            "softmax_start_logits": softmax_start_logits,
            "softmax_end_logits": softmax_end_logits
        }
示例#34
0
    def sort_and_run_forward(
            self,
            module: Callable[[PackedSequence, Optional[RnnState]],
                             Tuple[Union[PackedSequence, torch.Tensor],
                                   RnnState], ],
            inputs: torch.Tensor,
            mask: torch.Tensor,
            hidden_state: Optional[RnnState] = None,
            prevs=None,
            rev_prevs=None):
        # In some circumstances you may have sequences of zero length. ``pack_padded_sequence``
        # requires all sequence lengths to be > 0, so remove sequences of zero length before
        # calling self._module, then fill with zeros.

        # First count how many sequences are empty.
        batch_size = mask.size(0)
        num_valid = torch.sum(mask[:, 0]).int().item()

        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices, sorting_indices = sort_batch_by_length(
            inputs, sequence_lengths)

        prevs = [prevs[i] for i in sorting_indices][:num_valid]
        rev_prevs = [rev_prevs[i] for i in sorting_indices][:num_valid]

        # Now create a PackedSequence with only the non-empty, sorted sequences.
        packed_sequence_input = pack_padded_sequence(
            sorted_inputs[:num_valid, :, :],
            sorted_sequence_lengths[:num_valid].data.tolist(),
            batch_first=True,
        )
        # Prepare the initial states.
        if not self.stateful:
            if hidden_state is None:
                initial_states: Any = hidden_state
            elif isinstance(hidden_state, tuple):
                initial_states = [
                    state.index_select(
                        1, sorting_indices)[:, :num_valid, :].contiguous()
                    for state in hidden_state
                ]
            else:
                initial_states = hidden_state.index_select(
                    1, sorting_indices)[:, :num_valid, :].contiguous()
        else:
            initial_states = self._get_initial_states(batch_size, num_valid,
                                                      sorting_indices)

        # Actually call the module on the sorted PackedSequence.
        module_output, final_states = module(packed_sequence_input,
                                             initial_states, prevs, rev_prevs)

        return module_output, final_states, restoration_indices
示例#35
0
 def test_augmented_lstm_works_with_highway_connections(self):
     augmented_lstm = AugmentedLstm(10, 11, use_highway=True)
     sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(self.random_tensor, self.sequence_lengths)
     lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True)
     augmented_lstm(lstm_input)
示例#36
0
    def sort_and_run_forward(
        self,
        module: Callable[
            [PackedSequence, Optional[RnnState]],
            Tuple[Union[PackedSequence, torch.Tensor], RnnState],
        ],
        inputs: torch.Tensor,
        mask: torch.Tensor,
        hidden_state: Optional[RnnState] = None,
    ):
        """
        This function exists because Pytorch RNNs require that their inputs be sorted
        before being passed as input. As all of our Seq2xxxEncoders use this functionality,
        it is provided in a base class. This method can be called on any module which
        takes as input a `PackedSequence` and some `hidden_state`, which can either be a
        tuple of tensors or a tensor.

        As all of our Seq2xxxEncoders have different return types, we return `sorted`
        outputs from the module, which is called directly. Additionally, we return the
        indices into the batch dimension required to restore the tensor to it's correct,
        unsorted order and the number of valid batch elements (i.e the number of elements
        in the batch which are not completely masked). This un-sorting and re-padding
        of the module outputs is left to the subclasses because their outputs have different
        types and handling them smoothly here is difficult.

        # Parameters

        module : `Callable[[PackedSequence, Optional[RnnState]],
                            Tuple[Union[PackedSequence, torch.Tensor], RnnState]]`, required.
            A function to run on the inputs. In most cases, this is a `torch.nn.Module`.
        inputs : `torch.Tensor`, required.
            A tensor of shape `(batch_size, sequence_length, embedding_size)` representing
            the inputs to the Encoder.
        mask : `torch.Tensor`, required.
            A tensor of shape `(batch_size, sequence_length)`, representing masked and
            non-masked elements of the sequence for each element in the batch.
        hidden_state : `Optional[RnnState]`, (default = None).
            A single tensor of shape (num_layers, batch_size, hidden_size) representing the
            state of an RNN with or a tuple of
            tensors of shapes (num_layers, batch_size, hidden_size) and
            (num_layers, batch_size, memory_size), representing the hidden state and memory
            state of an LSTM-like RNN.

        # Returns

        module_output : `Union[torch.Tensor, PackedSequence]`.
            A Tensor or PackedSequence representing the output of the Pytorch Module.
            The batch size dimension will be equal to `num_valid`, as sequences of zero
            length are clipped off before the module is called, as Pytorch cannot handle
            zero length sequences.
        final_states : `Optional[RnnState]`
            A Tensor representing the hidden state of the Pytorch Module. This can either
            be a single tensor of shape (num_layers, num_valid, hidden_size), for instance in
            the case of a GRU, or a tuple of tensors, such as those required for an LSTM.
        restoration_indices : `torch.LongTensor`
            A tensor of shape `(batch_size,)`, describing the re-indexing required to transform
            the outputs back to their original batch order.
        """
        # In some circumstances you may have sequences of zero length. `pack_padded_sequence`
        # requires all sequence lengths to be > 0, so remove sequences of zero length before
        # calling self._module, then fill with zeros.

        # First count how many sequences are empty.
        batch_size = mask.size(0)
        num_valid = torch.sum(mask[:, 0]).int().item()

        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        (
            sorted_inputs,
            sorted_sequence_lengths,
            restoration_indices,
            sorting_indices,
        ) = sort_batch_by_length(inputs, sequence_lengths)

        # Now create a PackedSequence with only the non-empty, sorted sequences.
        packed_sequence_input = pack_padded_sequence(
            sorted_inputs[:num_valid, :, :],
            sorted_sequence_lengths[:num_valid].data.tolist(),
            batch_first=True,
        )
        # Prepare the initial states.
        if not self.stateful:
            if hidden_state is None:
                initial_states: Any = hidden_state
            elif isinstance(hidden_state, tuple):
                initial_states = [
                    state.index_select(1, sorting_indices)[:, :num_valid, :].contiguous()
                    for state in hidden_state
                ]
            else:
                initial_states = hidden_state.index_select(1, sorting_indices)[
                    :, :num_valid, :
                ].contiguous()

        else:
            initial_states = self._get_initial_states(batch_size, num_valid, sorting_indices)

        # Actually call the module on the sorted PackedSequence.
        module_output, final_states = module(packed_sequence_input, initial_states)

        return module_output, final_states, restoration_indices