Пример #1
0
    def setUp(self):
        super().setUp()
        self.lstm = LSTM(bidirectional=True,
                         num_layers=3,
                         input_size=3,
                         hidden_size=7,
                         batch_first=True)
        self.rnn = RNN(bidirectional=True,
                       num_layers=3,
                       input_size=3,
                       hidden_size=7,
                       batch_first=True)
        self.encoder_base = _EncoderBase(stateful=True)

        tensor = torch.rand([5, 7, 3])
        tensor[1, 6:, :] = 0
        tensor[3, 2:, :] = 0
        self.tensor = tensor
        mask = torch.ones(5, 7).bool()
        mask[1, 6:] = False
        mask[2, :] = False  # <= completely masked
        mask[3, 2:] = False
        mask[4, :] = False  # <= completely masked
        self.mask = mask

        self.batch_size = 5
        self.num_valid = 3
        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        _, _, restoration_indices, sorting_indices = sort_batch_by_length(
            tensor, sequence_lengths)
        self.sorting_indices = sorting_indices
        self.restoration_indices = restoration_indices
Пример #2
0
class Recurrent(Layer):
    def __init__(self, units, length, stateful=False, *args, **kwargs):
        super(Recurrent, self).__init__(*args, **kwargs)
        self.units = units
        self.length = length
        self.output_dim = [length, units]
        self.stateful = stateful
        self.states = None
        if self.input_dim is not None:
            self.build(self.input_dim)

    @property
    def params(self):
        return list(self.layer.parameters())

    def build(self, input_dim):
        self.input_dim = input_dim
        self.layer = TorchRecurrent(self.input_dim, self.units, self.length)

    def clear_states(self):
        self.states = None

    def forward(self, X):
        X = super(Recurrent, self).forward(X)
        if self.stateful and self.states is not None:
            outputs, self.states = self.layer.forward(X, self.states)
        else:
            outputs, self.states = self.layer.forward(X)

        return outputs
 def __init__(self, num_words, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, activ=ACTIV):
     my_rnn = RNN(
         input_size=num_words,
         hidden_size=hidden_size,
         num_layers=num_layers,
         nonlinearity=activ,
     )
class IndRNN(Module):
    def __init__(self, hidden_size, *args, **kwargs):
        super().__init__()
        self.module = RNN(hidden_size=hidden_size, *args, **kwargs, nonlinearity='relu')

        # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN
        # I'm not sure what is going on here, this is what weight_drop does so I stick to it
        self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition

        # We need to register it in this module to make it work with weight dropout
        w_hh = FloatTensor(hidden_size).type_as(getattr(self.module, 'weight_hh_l0').data)
        w_hh.uniform_(-1, 1)

        getattr(self.module, 'bias_ih_l0').data.fill_(0)
        getattr(self.module, 'bias_hh_l0').data.fill_(0)

        self.register_parameter(name='weight_hh_l0', param=Parameter(w_hh))
        del self.module._parameters['weight_hh_l0']

    def widget_demagnetizer_y2k_edition(*args, **kwargs):
        # We need to replace flatten_parameters with a nothing function
        # It must be a function rather than a lambda as otherwise pickling explodes
        # We can't write boring code though, so ... WIDGET DEMAGNETIZER Y2K EDITION!
        # (╯°□°)╯︵ ┻━┻
        return

    def _setweights(self):
        w_hh = getattr(self, 'weight_hh_l0')
        w_hh = diag(w_hh)
        setattr(self.module, 'weight_hh_l0', w_hh)

    def forward(self, *args):
        self._setweights()
        return self.module.forward(*args)
Пример #5
0
def test_dynamic_rnn(sequence_embedding):
    sequence, mask = sequence_embedding

    hidden_size = 4
    batch_size = 3
    sequence_len = 3

    rnn = RNN(input_size=2,
              hidden_size=4,
              num_layers=2,
              batch_first=True,
              bidirectional=True)

    dynamic_rnn = DynamicRnn(rnn=rnn)

    rnn_output: DynamicRnnOutput = dynamic_rnn(sequence=sequence, mask=mask)

    logging.info(json2str(rnn_output))

    last_layer_h_n: torch.Tensor = rnn_output.last_layer_h_n

    last_layer_h_n_expect_size = (batch_size, hidden_size * 2)

    ASSERT.assertEqual(last_layer_h_n_expect_size, last_layer_h_n.size())

    ASSERT.assertTrue(rnn_output.last_layer_c_n is None)

    sequence_encoding_expect_size = (batch_size, sequence_len, hidden_size * 2)
    senquence_encoding = rnn_output.output
    ASSERT.assertEqual(sequence_encoding_expect_size,
                       senquence_encoding.size())
Пример #6
0
def build_encoder(args, vocab):
    """Builds the encoder to params."""

    input_size = len(vocab.source)
    rnn_layer = None
    bidirectional = False if args.encoder_mode != 'bigru' else True
    dropout = args.rnn_dropout if args.encoder_layers != 1 else 0

    if args.encoder_mode == 'rnn':
        rnn_layer = RNN(args.hidden_size,
                        args.hidden_size,
                        num_layers=args.encoder_layers,
                        dropout=dropout,
                        batch_first=True)
    elif args.encoder_mode == 'gru' or args.encoder_mode == 'bigru':
        rnn_layer = GRU(args.hidden_size,
                        args.hidden_size,
                        num_layers=args.encoder_layers,
                        dropout=dropout,
                        bidirectional=bidirectional,
                        batch_first=True)
    else:
        raise ValueError('Invalid encoder mode: %s' % (args.encoder_mode))

    return Encoder(input_size,
                   args.hidden_size,
                   rnn_layer,
                   bidirectional=bidirectional)
    def __init__(self,
                 vocab_size,
                 emb_dim,
                 hidden_size,
                 weight,
                 kqv_dim,
                 rnn_type='gru',
                 bidirectional=False,
                 batch_first=False,
                 padding_idx=None):
        super(ZXOTextEncoder, self).__init__()
        self.embed = nn.Embedding(vocab_size,
                                  embedding_dim=emb_dim,
                                  _weight=weight)
        if rnn_type == 'rnn':
            self.rnn = RNN(emb_dim,
                           hidden_size,
                           bidirectional=bidirectional,
                           num_layers=6,
                           batch_first=batch_first)
        elif rnn_type == 'gru':
            self.rnn = GRU(emb_dim,
                           hidden_size,
                           bidirectional=bidirectional,
                           num_layers=6,
                           batch_first=batch_first)
        elif rnn_type == 'lstm':
            self.rnn = LSTM(emb_dim,
                            hidden_size,
                            bidirectional=bidirectional,
                            num_layers=6,
                            batch_first=batch_first)

        self.attn = Attn(emb_dim, kqv_dim)
        self.linear = nn.Linear(emb_dim, 2)
Пример #8
0
 def __init__(self,
              idim: int,
              hdim: int,
              nlayers: int = 1,
              enc_type: str = "blstm"):
     """
     This represents the computation that happens for 1 RNN Layer
     Uses packing,padding utils from Pytorch
     :param int input_dim- The input size of the RNN
     :param int hidden_dim- The hidden size of the RNN
     :param int nlayers- Number of RNN Layers
     :param str enc_type : Type of encoder- RNN/GRU/LSTM
     """
     super(RNNLayer, self).__init__()
     bidir = True if enc_type[0] == 'b' else False
     enc_type = enc_type[1:] if enc_type[0] == 'b' else enc_type
     if enc_type == "rnn":
         self.elayer = RNN(idim,
                           hdim,
                           nlayers,
                           batch_first=True,
                           bidirectional=bidir)
     elif enc_type == "lstm":
         self.elayer = LSTM(idim,
                            hdim,
                            nlayers,
                            batch_first=True,
                            bidirectional=bidir)
     else:
         self.elayer = GRU(idim,
                           hdim,
                           nlayers,
                           batch_first=True,
                           bidirectional=bidir)
Пример #9
0
 def forward(self, x):
     "Pass through"
     outs = []
     for l in self.conv1s:
         out = pad_layer(x, l)
         outs.append(out)
     out = torch.cat(outs + [x], dim=1)
     out = F.leaky_relu(out, negative_slope=self.ns)
     out = self.conv_block(out, [self.conv2], [self.ins_norm1, self.drop1],
                           res=False)
     emb2 = self.emb2(out)
     out = self.conv_block(out, [self.conv3, self.conv4],
                           [self.ins_norm2, self.drop2])
     emb4 = self.emb4(out)
     out = self.conv_block(out, [self.conv5, self.conv6],
                           [self.ins_norm3, self.drop3])
     emb6 = self.emb6(out)
     out = self.conv_block(out, [self.conv7, self.conv8],
                           [self.ins_norm4, self.drop4])
     emb8 = self.emb8(out)
     # dense layer
     out = self.dense_block(out, [self.dense1, self.dense2],
                            [self.ins_norm5, self.drop5],
                            res=True)
     embd2 = self.embd2(out)
     out = self.dense_block(out, [self.dense3, self.dense4],
                            [self.ins_norm6, self.drop6],
                            res=True)
     embd4 = self.embd4(out)
     out_rnn = RNN(out, self.RNN)
     embrnn = self.embrnn(out)
     out = torch.cat([out, out_rnn], dim=1)
     out = linear(out, self.linear)
     out = F.leaky_relu(out, negative_slope=self.ns)
     return (out, (emb2, emb4, emb6, emb8, embd2, embd4, embrnn))
Пример #10
0
    def __init__(self, hidden_size, *args, **kwargs):
        super().__init__()
        self.module = RNN(hidden_size=hidden_size, *args, **kwargs, nonlinearity='relu')

        # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN
        # I'm not sure what is going on here, this is what weight_drop does so I stick to it
        self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition

        # We need to register it in this module to make it work with weight dropout
        w_hh = FloatTensor(hidden_size).type_as(getattr(self.module, 'weight_hh_l0').data)
        w_hh.uniform_(-1, 1)

        getattr(self.module, 'bias_ih_l0').data.fill_(0)
        getattr(self.module, 'bias_hh_l0').data.fill_(0)

        self.register_parameter(name='weight_hh_l0', param=Parameter(w_hh))
        del self.module._parameters['weight_hh_l0']
Пример #11
0
 def __init__(self):
     super(_TolstoiRNNVersion, self).__init__()
     self.lstm = RNN(
         input_size=self.hidden_dim,
         hidden_size=self.hidden_dim,
         num_layers=self.num_layers,
         dropout=0.36,
         batch_first=True,
     )
Пример #12
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self._args = args
     self._kwargs = kwargs
     RNN = get_rnn_impl("CPU", self.rnn_type, kwargs["layer_norm"])
     self.rnns = nn.ModuleList()
     for i, o in zip(self._is, self._os):
         # r = RNN(i, o, batch_first=self.batch_first, zoneout=ZONEOUT)
         r = RNN(i, o, batch_first=self.batch_first)
         self.rnns.append(r)
Пример #13
0
 def __init__(self, input_size, args):
     super(RNN_model_linear, self).__init__()
     self.RNN = RNN(input_size=input_size,
                    hidden_size=args.hidden_units,
                    num_layers=args.lstm_layer)
     if 'bi' in args.model:
         bi_num = 2
     else:
         bi_num = 1
     self.classifier = nn.Linear(args.hidden_units * bi_num, args.n_label)
     self.args = args
Пример #14
0
 def __init__(self, skill_size, rnn_h_size, rnn_layer_size, dropout_rate):
     """
     :param skill_size: int 知识点数量
     :param rnn_h_size: int rnn隐藏单元数量
     :param rnn_layer_size: int rnn隐藏层数量
     :param dropout_rate: float
     """
     super(DktNet, self).__init__()
     self.rnn = RNN(skill_size * 2, rnn_h_size, rnn_layer_size)
     self.dropout = Dropout(p=dropout_rate)
     self.linear = Linear(rnn_h_size, skill_size)
     self.sigmoid = Sigmoid()
Пример #15
0
def initialize_rnn(rnn: nn.RNN) -> None:
    """ Initializes an RNN so that biases have values of 0 and weights are Xavier normalized.

    Args:
        rnn: nn.RNN. The RNN to initialize.
    """
    for name, param in rnn.named_parameters():
        # Set biases to zero
        if 'bias' in name:
            nn.init.constant_(param, 0.0)
        elif 'weight' in name:
            # Otherwise do Xavier initialization
            nn.init.xavier_normal_(param)
Пример #16
0
def build_rnn_layer(args):
    multiplier = 1
    if args.attention_mode != 'none' and args.encoder_mode == 'bigru':
        multiplier = 3
    elif args.attention_mode != 'none' and args.encoder_mode != 'bigru':
        multiplier = 2

    dropout = args.rnn_dropout if args.decoder_layers != 1 else 0

    if args.decoder_mode == 'rnn':
        return RNN(multiplier * args.hidden_size,
                   args.hidden_size,
                   num_layers=args.decoder_layers,
                   dropout=dropout,
                   batch_first=True)
    elif args.decoder_mode == 'gru':
        return GRU(multiplier * args.hidden_size,
                   args.hidden_size,
                   num_layers=args.decoder_layers,
                   dropout=dropout,
                   batch_first=True)
    else:
        raise ValueError('Invalid decoder mode: %s' % (args.decoder_mode))
Пример #17
0
 def forward(self, x, c):
     "Pass through"
     # emb = self.emb(c)
     (emb2, emb4, emb6, emb8, embd2, embd4, embrnn) = c
     # conv layer
     out = self.conv_block(x, [self.conv1, self.conv2],
                           self.ins_norm1,
                           embrnn,
                           res=True)
     out = self.conv_block(out, [self.conv3, self.conv4],
                           self.ins_norm2,
                           embd4,
                           res=True)
     out = self.conv_block(out, [self.conv5, self.conv6],
                           self.ins_norm3,
                           embd2,
                           res=True)
     # dense layer
     out = self.dense_block(out,
                            emb8, [self.dense1, self.dense2],
                            self.ins_norm4,
                            res=True)
     out = self.dense_block(out,
                            emb6, [self.dense3, self.dense4],
                            self.ins_norm5,
                            res=True)
     out_appended = append_emb(emb4, out.size(2), out)
     # rnn layer
     out_rnn = RNN(out_appended, self.RNN)
     out = torch.cat([out, out_rnn], dim=1)
     out = append_emb(emb2, out.size(2), out)
     out = linear(out, self.dense5)
     out = F.leaky_relu(out, negative_slope=self.ns)
     out = linear(out, self.linear)
     out = out.exp()
     return out
Пример #18
0
from backpack.custom_module.permute import Permute
from backpack.custom_module.reduce_tuple import ReduceTuple

SHARED_SETTINGS = SECONDORDER_SETTINGS
LOCAL_SETTINGS = []
##################################################################
#                         RNN settings                           #
##################################################################
LOCAL_SETTINGS += [
    # RNN settings
    {
        "input_fn":
        lambda: rand(8, 5, 6),
        "module_fn":
        lambda: Sequential(
            RNN(input_size=6, hidden_size=3, batch_first=True),
            ReduceTuple(index=0),
            Permute(0, 2, 1),
            Flatten(),
        ),
        "loss_function_fn":
        lambda: MSELoss(),
        "target_fn":
        lambda: regression_targets((8, 3 * 5)),
    },
    {
        "input_fn":
        lambda: rand(4, 3, 5),
        "module_fn":
        lambda: Sequential(
            LSTM(input_size=5, hidden_size=4, batch_first=True),
Пример #19
0
    def __init__(self,
                 args,
                 emb_index,
                 bidirec,
                 initial_mean_value,
                 overal_maxlen=0):
        super(REGRESSION, self).__init__()
        self.dropout_W = 0.5  # default=0.5
        self.dropout_U = 0.1  # default=0.1
        self.args = args
        cnn_border_mode = 'same'
        if initial_mean_value.ndim == 0:
            initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
        num_outputs = len(initial_mean_value)
        if args.recurrent_unit == 'lstm':
            from torch.nn import LSTM as RNN
        elif args.recurrent_unit == 'gru':
            from torch.nn import GRU as RNN
        elif args.recurrent_unit == 'simple':
            from torch.nn import RNN as RNN

        self.embed = Embedding(args.vocab_size, args.emb_dim)
        outputdim = args.emb_dim
        if args.cnn_dim > 0:
            self.conv = Conv1DWithMasking(outputdim, args.cnn_dim,
                                          args.cnn_window_size, 1,
                                          (args.cnn_window_size - 1) // 2)
            outputdim = args.cnn_dim
        if args.rnn_dim > 0:
            self.rnn = RNN(outputdim,
                           args.rnn_dim,
                           num_layers=1,
                           bias=True,
                           dropout=self.dropout_W,
                           batch_first=True,
                           bidirectional=bidirec)
            outputdim = args.rnn_dim
            if bidirec == 1:
                outputdim = args.rnn_dim * 2
        if args.dropout_prob > 0:
            self.dropout = Dropout(args.dropout_prob)
        if args.aggregation == 'mot':
            self.mot = MeanOverTime()
        elif args.aggregation.startswith('att'):
            self.att = Attention(outputdim,
                                 op=args.aggregation,
                                 activation='tanh',
                                 init_stdev=0.01)

        self.linear = Linear(outputdim, num_outputs)
        # if not args.skip_init_bias:
        # 	self.linear.bias.data = (torch.log(initial_mean_value) - torch.log(1 - initial_mean_value)).float()
        self.emb_index = emb_index
        if args.emb_path:
            from .w2vEmbReader import W2VEmbReader as EmbReader
            logger.info('Initializing lookup table')
            emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
            self.embed[
                emb_index].weight.data = emb_reader.get_emb_matrix_given_vocab(
                    vocab, model.layers[model.emb_index].get_weights())
        logger.info('  Done')
Пример #20
0
 def build(self, input_dim):
     self.input_dim = input_dim
     self.layer = TorchRecurrent(self.input_dim, self.units, self.length)
Пример #21
0
 def __init__(self, input_size, args):
     super(RNN_model, self).__init__()
     self.RNN = RNN(input_size=input_size,
                    hidden_size=args.hidden_units,
                    num_layers=args.lstm_layer)
     self.args = args
Пример #22
0
class TestEncoderBase(AllenNlpTestCase):
    def setUp(self):
        super().setUp()
        self.lstm = LSTM(bidirectional=True,
                         num_layers=3,
                         input_size=3,
                         hidden_size=7,
                         batch_first=True)
        self.rnn = RNN(bidirectional=True,
                       num_layers=3,
                       input_size=3,
                       hidden_size=7,
                       batch_first=True)
        self.encoder_base = _EncoderBase(stateful=True)

        tensor = torch.rand([5, 7, 3])
        tensor[1, 6:, :] = 0
        tensor[3, 2:, :] = 0
        self.tensor = tensor
        mask = torch.ones(5, 7).bool()
        mask[1, 6:] = False
        mask[2, :] = False  # <= completely masked
        mask[3, 2:] = False
        mask[4, :] = False  # <= completely masked
        self.mask = mask

        self.batch_size = 5
        self.num_valid = 3
        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        _, _, restoration_indices, sorting_indices = sort_batch_by_length(
            tensor, sequence_lengths)
        self.sorting_indices = sorting_indices
        self.restoration_indices = restoration_indices

    def test_non_stateful_states_are_sorted_correctly(self):
        encoder_base = _EncoderBase(stateful=False)
        initial_states = (torch.randn(6, 5, 7), torch.randn(6, 5, 7))
        # Check that we sort the state for non-stateful encoders. To test
        # we'll just use a "pass through" encoder, as we aren't actually testing
        # the functionality of the encoder here anyway.
        _, states, restoration_indices = encoder_base.sort_and_run_forward(
            lambda *x: x, self.tensor, self.mask, initial_states)
        # Our input tensor had 2 zero length sequences, so we need
        # to concat a tensor of shape
        # (num_layers * num_directions, batch_size - num_valid, hidden_dim),
        # to the output before unsorting it.
        zeros = torch.zeros([6, 2, 7])

        # sort_and_run_forward strips fully-padded instances from the batch;
        # in order to use the restoration_indices we need to add back the two
        #  that got stripped. What we get back should match what we started with.
        for state, original in zip(states, initial_states):
            assert list(state.size()) == [6, 3, 7]
            state_with_zeros = torch.cat([state, zeros], 1)
            unsorted_state = state_with_zeros.index_select(
                1, restoration_indices)
            for index in [0, 1, 3]:
                numpy.testing.assert_array_equal(
                    unsorted_state[:, index, :].data.numpy(),
                    original[:, index, :].data.numpy())

    def test_get_initial_states(self):
        # First time we call it, there should be no state, so we should return None.
        assert (self.encoder_base._get_initial_states(
            self.batch_size, self.num_valid, self.sorting_indices) is None)

        # First test the case that the previous state is _smaller_ than the current state input.
        initial_states = (torch.randn([1, 3, 7]), torch.randn([1, 3, 7]))
        self.encoder_base._states = initial_states
        # sorting indices are: [0, 1, 3, 2, 4]
        returned_states = self.encoder_base._get_initial_states(
            self.batch_size, self.num_valid, self.sorting_indices)

        correct_expanded_states = [
            torch.cat([state, torch.zeros([1, 2, 7])], 1)
            for state in initial_states
        ]
        # State should have been expanded with zeros to have shape (1, batch_size, hidden_size).
        numpy.testing.assert_array_equal(
            self.encoder_base._states[0].data.numpy(),
            correct_expanded_states[0].data.numpy())
        numpy.testing.assert_array_equal(
            self.encoder_base._states[1].data.numpy(),
            correct_expanded_states[1].data.numpy())

        # The returned states should be of shape (1, num_valid, hidden_size) and
        # they also should have been sorted with respect to the indices.
        # sorting indices are: [0, 1, 3, 2, 4]

        correct_returned_states = [
            state.index_select(1, self.sorting_indices)[:, :self.num_valid, :]
            for state in correct_expanded_states
        ]

        numpy.testing.assert_array_equal(
            returned_states[0].data.numpy(),
            correct_returned_states[0].data.numpy())
        numpy.testing.assert_array_equal(
            returned_states[1].data.numpy(),
            correct_returned_states[1].data.numpy())

        # Now test the case that the previous state is larger:
        original_states = (torch.randn([1, 10, 7]), torch.randn([1, 10, 7]))
        self.encoder_base._states = original_states
        # sorting indices are: [0, 1, 3, 2, 4]
        returned_states = self.encoder_base._get_initial_states(
            self.batch_size, self.num_valid, self.sorting_indices)
        # State should not have changed, as they were larger
        # than the batch size of the requested states.
        numpy.testing.assert_array_equal(
            self.encoder_base._states[0].data.numpy(),
            original_states[0].data.numpy())
        numpy.testing.assert_array_equal(
            self.encoder_base._states[1].data.numpy(),
            original_states[1].data.numpy())

        # The returned states should be of shape (1, num_valid, hidden_size) and they
        # also should have been sorted with respect to the indices.
        correct_returned_state = [
            x.index_select(1, self.sorting_indices)[:, :self.num_valid, :]
            for x in original_states
        ]
        numpy.testing.assert_array_equal(
            returned_states[0].data.numpy(),
            correct_returned_state[0].data.numpy())
        numpy.testing.assert_array_equal(
            returned_states[1].data.numpy(),
            correct_returned_state[1].data.numpy())

    def test_update_states(self):
        assert self.encoder_base._states is None
        initial_states = torch.randn([1, 5, 7]), torch.randn([1, 5, 7])

        index_selected_initial_states = (
            initial_states[0].index_select(1, self.restoration_indices),
            initial_states[1].index_select(1, self.restoration_indices),
        )

        self.encoder_base._update_states(initial_states,
                                         self.restoration_indices)
        # State was None, so the updated state should just be the sorted given state.
        numpy.testing.assert_array_equal(
            self.encoder_base._states[0].data.numpy(),
            index_selected_initial_states[0].data.numpy())
        numpy.testing.assert_array_equal(
            self.encoder_base._states[1].data.numpy(),
            index_selected_initial_states[1].data.numpy())

        new_states = torch.randn([1, 5, 7]), torch.randn([1, 5, 7])
        # tensor has 2 completely masked rows, so the last 2 rows of the _sorted_ states
        # will be completely zero, having been appended after calling the respective encoder.
        new_states[0][:, -2:, :] = 0
        new_states[1][:, -2:, :] = 0

        index_selected_new_states = (
            new_states[0].index_select(1, self.restoration_indices),
            new_states[1].index_select(1, self.restoration_indices),
        )

        self.encoder_base._update_states(new_states, self.restoration_indices)
        # Check that the update _preserved_ the state for the rows which were
        # completely masked (2 and 4):
        for index in [2, 4]:
            numpy.testing.assert_array_equal(
                self.encoder_base._states[0][:, index, :].data.numpy(),
                index_selected_initial_states[0][:, index, :].data.numpy(),
            )
            numpy.testing.assert_array_equal(
                self.encoder_base._states[1][:, index, :].data.numpy(),
                index_selected_initial_states[1][:, index, :].data.numpy(),
            )
        # Now the states which were updated:
        for index in [0, 1, 3]:
            numpy.testing.assert_array_equal(
                self.encoder_base._states[0][:, index, :].data.numpy(),
                index_selected_new_states[0][:, index, :].data.numpy(),
            )
            numpy.testing.assert_array_equal(
                self.encoder_base._states[1][:, index, :].data.numpy(),
                index_selected_new_states[1][:, index, :].data.numpy(),
            )

        # Now test the case that the new state is smaller:
        small_new_states = torch.randn([1, 3, 7]), torch.randn([1, 3, 7])
        # pretend the 2nd sequence in the batch was fully masked.
        small_restoration_indices = torch.LongTensor([2, 0, 1])
        small_new_states[0][:, 0, :] = 0
        small_new_states[1][:, 0, :] = 0

        index_selected_small_states = (
            small_new_states[0].index_select(1, small_restoration_indices),
            small_new_states[1].index_select(1, small_restoration_indices),
        )
        self.encoder_base._update_states(small_new_states,
                                         small_restoration_indices)

        # Check the index for the row we didn't update is the same as the previous step:
        for index in [1, 3]:
            numpy.testing.assert_array_equal(
                self.encoder_base._states[0][:, index, :].data.numpy(),
                index_selected_new_states[0][:, index, :].data.numpy(),
            )
            numpy.testing.assert_array_equal(
                self.encoder_base._states[1][:, index, :].data.numpy(),
                index_selected_new_states[1][:, index, :].data.numpy(),
            )
        # Indices we did update:
        for index in [0, 2]:
            numpy.testing.assert_array_equal(
                self.encoder_base._states[0][:, index, :].data.numpy(),
                index_selected_small_states[0][:, index, :].data.numpy(),
            )
            numpy.testing.assert_array_equal(
                self.encoder_base._states[1][:, index, :].data.numpy(),
                index_selected_small_states[1][:, index, :].data.numpy(),
            )

        # We didn't update index 4 in the previous step either, so it should be equal to the
        # 4th index of initial states.
        numpy.testing.assert_array_equal(
            self.encoder_base._states[0][:, 4, :].data.numpy(),
            index_selected_initial_states[0][:, 4, :].data.numpy(),
        )
        numpy.testing.assert_array_equal(
            self.encoder_base._states[1][:, 4, :].data.numpy(),
            index_selected_initial_states[1][:, 4, :].data.numpy(),
        )

    def test_reset_states(self):
        # Initialize the encoder states.
        assert self.encoder_base._states is None
        initial_states = torch.randn([1, 5, 7]), torch.randn([1, 5, 7])
        index_selected_initial_states = (
            initial_states[0].index_select(1, self.restoration_indices),
            initial_states[1].index_select(1, self.restoration_indices),
        )
        self.encoder_base._update_states(initial_states,
                                         self.restoration_indices)

        # Check that only some of the states are reset when a mask is provided.
        mask = torch.tensor([True, True, False, False, False])
        self.encoder_base.reset_states(mask)
        # First two states should be zeros
        numpy.testing.assert_array_equal(
            self.encoder_base._states[0][:, :2, :].data.numpy(),
            torch.zeros_like(initial_states[0])[:, :2, :].data.numpy(),
        )
        numpy.testing.assert_array_equal(
            self.encoder_base._states[1][:, :2, :].data.numpy(),
            torch.zeros_like(initial_states[1])[:, :2, :].data.numpy(),
        )
        # Remaining states should be the same
        numpy.testing.assert_array_equal(
            self.encoder_base._states[0][:, 2:, :].data.numpy(),
            index_selected_initial_states[0][:, 2:, :].data.numpy(),
        )
        numpy.testing.assert_array_equal(
            self.encoder_base._states[1][:, 2:, :].data.numpy(),
            index_selected_initial_states[1][:, 2:, :].data.numpy(),
        )

        # Check that error is raised if mask has wrong batch size.
        bad_mask = torch.tensor([True, True, False])
        with self.assertRaises(ValueError):
            self.encoder_base.reset_states(bad_mask)

        # Check that states are reset to None if no mask is provided.
        self.encoder_base.reset_states()
        assert self.encoder_base._states is None

    def test_non_contiguous_initial_states_handled(self):
        # Check that the encoder is robust to non-contiguous initial states.

        # Case 1: Encoder is not stateful

        # A transposition will make the tensors non-contiguous, start them off at the wrong shape
        # and transpose them into the right shape.
        encoder_base = _EncoderBase(stateful=False)
        initial_states = (
            torch.randn(5, 6, 7).permute(1, 0, 2),
            torch.randn(5, 6, 7).permute(1, 0, 2),
        )
        assert not initial_states[0].is_contiguous(
        ) and not initial_states[1].is_contiguous()
        assert initial_states[0].size() == torch.Size([6, 5, 7])
        assert initial_states[1].size() == torch.Size([6, 5, 7])

        # We'll pass them through an LSTM encoder and a vanilla RNN encoder to make sure it works
        # whether the initial states are a tuple of tensors or just a single tensor.
        encoder_base.sort_and_run_forward(self.lstm, self.tensor, self.mask,
                                          initial_states)
        encoder_base.sort_and_run_forward(self.rnn, self.tensor, self.mask,
                                          initial_states[0])

        # Case 2: Encoder is stateful

        # For stateful encoders, the initial state may be non-contiguous if its state was
        # previously updated with non-contiguous tensors. As in the non-stateful tests, we check
        # that the encoder still works on initial states for RNNs and LSTMs.
        final_states = initial_states
        # Check LSTM
        encoder_base = _EncoderBase(stateful=True)
        encoder_base._update_states(final_states, self.restoration_indices)
        encoder_base.sort_and_run_forward(self.lstm, self.tensor, self.mask)
        # Check RNN
        encoder_base.reset_states()
        encoder_base._update_states([final_states[0]],
                                    self.restoration_indices)
        encoder_base.sort_and_run_forward(self.rnn, self.tensor, self.mask)

    @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires cuda")
    def test_non_contiguous_initial_states_handled_on_gpu(self):
        # Some PyTorch operations which produce contiguous tensors on the CPU produce
        # non-contiguous tensors on the GPU (e.g. forward pass of an RNN when batch_first=True).
        # Accordingly, we perform the same checks from previous test on the GPU to ensure the
        # encoder is not affected by which device it is on.

        # Case 1: Encoder is not stateful

        # A transposition will make the tensors non-contiguous, start them off at the wrong shape
        # and transpose them into the right shape.
        encoder_base = _EncoderBase(stateful=False).cuda()
        initial_states = (
            torch.randn(5, 6, 7).cuda().permute(1, 0, 2),
            torch.randn(5, 6, 7).cuda().permute(1, 0, 2),
        )
        assert not initial_states[0].is_contiguous(
        ) and not initial_states[1].is_contiguous()
        assert initial_states[0].size() == torch.Size([6, 5, 7])
        assert initial_states[1].size() == torch.Size([6, 5, 7])

        # We'll pass them through an LSTM encoder and a vanilla RNN encoder to make sure it works
        # whether the initial states are a tuple of tensors or just a single tensor.
        encoder_base.sort_and_run_forward(self.lstm.cuda(), self.tensor.cuda(),
                                          self.mask.cuda(), initial_states)
        encoder_base.sort_and_run_forward(self.rnn.cuda(), self.tensor.cuda(),
                                          self.mask.cuda(), initial_states[0])

        # Case 2: Encoder is stateful

        # For stateful encoders, the initial state may be non-contiguous if its state was
        # previously updated with non-contiguous tensors. As in the non-stateful tests, we check
        # that the encoder still works on initial states for RNNs and LSTMs.
        final_states = initial_states
        # Check LSTM
        encoder_base = _EncoderBase(stateful=True).cuda()
        encoder_base._update_states(final_states,
                                    self.restoration_indices.cuda())
        encoder_base.sort_and_run_forward(self.lstm.cuda(), self.tensor.cuda(),
                                          self.mask.cuda())
        # Check RNN
        encoder_base.reset_states()
        encoder_base._update_states([final_states[0]],
                                    self.restoration_indices.cuda())
        encoder_base.sort_and_run_forward(self.rnn.cuda(), self.tensor.cuda(),
                                          self.mask.cuda())
Пример #23
0
    def train_torch(self, X, y_true, batch_size, learning_rate, num_epochs,
                    print_many, verbose):
        self.batch_size = batch_size
        progresses = {
            int(num_epochs // (100 / i)): i
            for i in range(1, 101, 1)
        }
        t0 = counter()
        durations = []

        device = torch.device('cuda:0')
        rnn = RNN(input_size=self.input_dim,
                  hidden_size=self.hidden_dim,
                  num_layers=1,
                  nonlinearity='tanh',
                  bias=True,
                  batch_first=False).to(device)
        fc = FCLayer(self.hidden_dim, self.output_size, bias=True).to(device)
        params = [rnn.parameters(), fc.params()]
        optimizer = SGD(chain(*params), lr=learning_rate)
        for epoch in range(num_epochs):
            epoch_loss = 0
            for i in range(self.max_iters):
                x_batch = X[i * self.batch_size:(i + 1) * self.batch_size]
                x_batch = np.array(
                    [x_batch[:, step, :] for step in range(self.time_steps)])
                y_true_batch = y_true[i * self.batch_size:(i + 1) *
                                      self.batch_size]
                batch_size_local = x_batch.shape[1]

                # convert to pytorch tensor
                y_true_batch = y_true_batch.astype(np.int64)
                y_true_batch = torch.tensor(y_true_batch,
                                            requires_grad=False).to(device)
                x_batch = x_batch.astype(np.float32)
                x_batch = torch.tensor(x_batch, requires_grad=True).to(device)

                # forward pass
                h_stack, h_last = rnn.forward(x_batch, hx=None)
                fc_out = fc.forward(h_last)
                log_y_pred = F.log_softmax(input=fc_out, dim=2)
                log_y_pred = log_y_pred.view(batch_size_local,
                                             self.output_size)
                loss = F.nll_loss(input=log_y_pred,
                                  target=y_true_batch,
                                  reduction='mean')

                # update gradient
                optimizer.zero_grad()
                loss.backward()
                epoch_loss += loss.item()
                optimizer.step()

            durations.append(counter() - t0)
            t0 = counter()
            if (print_many and epoch % 100 == 0) or (not print_many
                                                     and epoch in progresses):
                print(
                    f"after epoch: {epoch}, epoch_losses: {round(epoch_loss / self.max_iters, 3)}"
                )

        if verbose > 0:
            avg_epoch_time = sum(durations) / len(durations)
            print("average epoch time:", round(avg_epoch_time, 3))
            return avg_epoch_time
Пример #24
0
import torch
from torch.nn import RNN, LSTM

rnn = RNN(input_size=4, hidden_size=5, batch_first=True)
inputs = torch.rand(2, 3, 4)

outputs, hn = rnn(inputs)

print(outputs, outputs.shape)
print(hn, hn.shape)

lstm = LSTM(input_size=4, hidden_size=6, batch_first=True)
outputs, (hn, cn) = lstm(inputs)

print(outputs, outputs.shape)
print(hn, hn.shape)
print(cn, cn.shape)
Пример #25
0
 def build(
     self,
     name: str,
     embedding_dim: int,
     hidden_size: int = 32,
     num_filters: int = 1,
     num_heads: int = 3,
     output_dim: int = 30,
     ngram_filter_sizes: Tuple = (1, 2, 3, 4, 5),
     filters: List[List[int]] = [[1, 4], [2, 8], [3, 16], [4, 32], [5, 64]],
     num_highway: int = 2,
     projection_dim: int = 16
 ) -> Callable[[Tensor, Optional[Tensor]], Tensor]:
     encoder = None
     if name in {'boe'}:
         encoder = BagOfEmbeddingsEncoder(embedding_dim=embedding_dim,
                                          averaged=True)
     elif name in {'cnn'}:
         encoder = CnnEncoder(embedding_dim=embedding_dim,
                              num_filters=num_filters,
                              ngram_filter_sizes=ngram_filter_sizes,
                              output_dim=output_dim)
     elif name in {'cnnh'}:
         encoder = CnnHighwayEncoder(embedding_dim=embedding_dim,
                                     filters=filters,
                                     num_highway=num_highway,
                                     projection_dim=projection_dim,
                                     projection_location="after_cnn")
     elif name in {'rnn'}:
         rnn = RNN(input_size=embedding_dim,
                   bidirectional=True,
                   hidden_size=hidden_size,
                   batch_first=True)
         encoder = PytorchSeq2VecWrapper(rnn)
     elif name in {'lstm'}:
         lstm = LSTM(input_size=embedding_dim,
                     bidirectional=True,
                     hidden_size=hidden_size,
                     batch_first=True)
         encoder = PytorchSeq2VecWrapper(lstm)
     elif name in {'gru'}:
         gru = GRU(input_size=embedding_dim,
                   bidirectional=True,
                   hidden_size=hidden_size,
                   batch_first=True)
         encoder = PytorchSeq2VecWrapper(gru)
     elif name in {'intra'}:
         intra = IntraSentenceAttentionEncoder(input_dim=embedding_dim,
                                               projection_dim=output_dim,
                                               combination="1,2")
         aggr = PytorchSeq2VecWrapper(
             LSTM(input_size=embedding_dim + output_dim,
                  bidirectional=True,
                  hidden_size=hidden_size,
                  batch_first=True))
         encoder = lambda x, y: aggr(intra(x, y), y)
     elif name in {'multihead'}:
         sim = MultiHeadedSimilarity(num_heads, embedding_dim)
         multi = IntraSentenceAttentionEncoder(
             input_dim=embedding_dim,
             projection_dim=embedding_dim,
             similarity_function=sim,
             num_attention_heads=num_heads,
             combination="1+2")
         aggr = PytorchSeq2VecWrapper(
             LSTM(input_size=embedding_dim,
                  bidirectional=True,
                  hidden_size=hidden_size,
                  batch_first=True))
         encoder = lambda x, y: aggr(multi(x, y), y)
     assert encoder is not None
     return encoder
#x = x + 10
#Sx_all = torch.cat((Sx_all, x), dim=0)

# Select Training Data.
Sx_tr, y_tr = Sx_all[subset == 0], y_all[subset == 0]

# Set Mean to 0, and variance to 1. -> Normal Distribution
mu_tr = Sx_tr.mean(dim=0)
std_tr = Sx_tr.std(dim=0)
Sx_tr = (Sx_tr - mu_tr) / std_tr

# Design ML Model
num_inputs = Sx_tr.shape[-1]
num_classes = y_tr.cpu().unique().numel()
model = Sequential(Linear(num_inputs, num_classes), LogSoftmax(dim=1))
rnn = RNN(336, 336)
optimizer = Adam(model.parameters())
criterion = NLLLoss()

if use_cuda:
    model = model.cuda()
    criterion = criterion.cuda()

# Number of signals to use in each gradient descent step (batch).
batch_size = 32
# Number of epochs.
num_epochs = 80
# Learning rate for Adam.
lr = 1e-2

# set number of batches
Пример #27
0
    def __init__(self,
                 nb_features,
                 nb_frames,
                 nb_layers,
                 hidden_size,
                 bidirectional=False,
                 mixture_mean=None,
                 mixture_scale=None,
                 label_mean=None,
                 activation_function="relu",
                 recurrent_layer="lstm"):
        super(Generalised_Recurrent_Model, self).__init__()

        # set the hidden size
        self.hidden_size = hidden_size

        # create parameters with torch tensors for mean and scale
        self.mixture_mean = Parameter(
            torch.from_numpy(np.copy(mixture_mean).astype(np.float32)))

        self.label_scale = Parameter(
            torch.from_numpy(np.copy(mixture_scale).astype(np.float32)))

        # fully connected dense layer for input dimensionality reduction
        self.fc_dr = Linear(in_features=nb_features, out_features=hidden_size)

        # different recurrent layers
        recurrent_layers = {
            'lstm':
            LSTM(input_size=hidden_size,
                 hidden_size=hidden_size,
                 num_layers=nb_layers,
                 batch_first=True,
                 bidirectional=bidirectional),
            'gru':
            GRU(input_size=hidden_size,
                hidden_size=hidden_size,
                num_layers=nb_layers,
                batch_first=True,
                bidirectional=bidirectional),
            'rnn':
            RNN(input_size=hidden_size,
                hidden_size=hidden_size,
                num_layers=nb_layers,
                batch_first=True,
                bidirectional=bidirectional)
        }
        # recurrent layer
        self.recurrent_layer = recurrent_layers[recurrent_layer]

        self.lstm_output = hidden_size * 2 if bidirectional else hidden_size

        # fully connected dense layer for input dimensionality expansion
        self.fc_de = Linear(in_features=self.lstm_output,
                            out_features=nb_features)

        # output label scaling
        self.label_scale = Parameter(torch.ones(nb_features))

        # output label mean
        self.label_mean = Parameter(
            torch.from_numpy(np.copy(label_mean).astype(np.float32)))

        # activation function
        activation_functions = {'relu': F.relu, 'tanh': torch.tanh}
        self.activation_function = activation_functions[activation_function]