def setUp(self): super().setUp() self.lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.rnn = RNN(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.encoder_base = _EncoderBase(stateful=True) tensor = torch.rand([5, 7, 3]) tensor[1, 6:, :] = 0 tensor[3, 2:, :] = 0 self.tensor = tensor mask = torch.ones(5, 7).bool() mask[1, 6:] = False mask[2, :] = False # <= completely masked mask[3, 2:] = False mask[4, :] = False # <= completely masked self.mask = mask self.batch_size = 5 self.num_valid = 3 sequence_lengths = get_lengths_from_binary_sequence_mask(mask) _, _, restoration_indices, sorting_indices = sort_batch_by_length( tensor, sequence_lengths) self.sorting_indices = sorting_indices self.restoration_indices = restoration_indices
class Recurrent(Layer): def __init__(self, units, length, stateful=False, *args, **kwargs): super(Recurrent, self).__init__(*args, **kwargs) self.units = units self.length = length self.output_dim = [length, units] self.stateful = stateful self.states = None if self.input_dim is not None: self.build(self.input_dim) @property def params(self): return list(self.layer.parameters()) def build(self, input_dim): self.input_dim = input_dim self.layer = TorchRecurrent(self.input_dim, self.units, self.length) def clear_states(self): self.states = None def forward(self, X): X = super(Recurrent, self).forward(X) if self.stateful and self.states is not None: outputs, self.states = self.layer.forward(X, self.states) else: outputs, self.states = self.layer.forward(X) return outputs
def __init__(self, num_words, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, activ=ACTIV): my_rnn = RNN( input_size=num_words, hidden_size=hidden_size, num_layers=num_layers, nonlinearity=activ, )
class IndRNN(Module): def __init__(self, hidden_size, *args, **kwargs): super().__init__() self.module = RNN(hidden_size=hidden_size, *args, **kwargs, nonlinearity='relu') # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN # I'm not sure what is going on here, this is what weight_drop does so I stick to it self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition # We need to register it in this module to make it work with weight dropout w_hh = FloatTensor(hidden_size).type_as(getattr(self.module, 'weight_hh_l0').data) w_hh.uniform_(-1, 1) getattr(self.module, 'bias_ih_l0').data.fill_(0) getattr(self.module, 'bias_hh_l0').data.fill_(0) self.register_parameter(name='weight_hh_l0', param=Parameter(w_hh)) del self.module._parameters['weight_hh_l0'] def widget_demagnetizer_y2k_edition(*args, **kwargs): # We need to replace flatten_parameters with a nothing function # It must be a function rather than a lambda as otherwise pickling explodes # We can't write boring code though, so ... WIDGET DEMAGNETIZER Y2K EDITION! # (╯°□°)╯︵ ┻━┻ return def _setweights(self): w_hh = getattr(self, 'weight_hh_l0') w_hh = diag(w_hh) setattr(self.module, 'weight_hh_l0', w_hh) def forward(self, *args): self._setweights() return self.module.forward(*args)
def test_dynamic_rnn(sequence_embedding): sequence, mask = sequence_embedding hidden_size = 4 batch_size = 3 sequence_len = 3 rnn = RNN(input_size=2, hidden_size=4, num_layers=2, batch_first=True, bidirectional=True) dynamic_rnn = DynamicRnn(rnn=rnn) rnn_output: DynamicRnnOutput = dynamic_rnn(sequence=sequence, mask=mask) logging.info(json2str(rnn_output)) last_layer_h_n: torch.Tensor = rnn_output.last_layer_h_n last_layer_h_n_expect_size = (batch_size, hidden_size * 2) ASSERT.assertEqual(last_layer_h_n_expect_size, last_layer_h_n.size()) ASSERT.assertTrue(rnn_output.last_layer_c_n is None) sequence_encoding_expect_size = (batch_size, sequence_len, hidden_size * 2) senquence_encoding = rnn_output.output ASSERT.assertEqual(sequence_encoding_expect_size, senquence_encoding.size())
def build_encoder(args, vocab): """Builds the encoder to params.""" input_size = len(vocab.source) rnn_layer = None bidirectional = False if args.encoder_mode != 'bigru' else True dropout = args.rnn_dropout if args.encoder_layers != 1 else 0 if args.encoder_mode == 'rnn': rnn_layer = RNN(args.hidden_size, args.hidden_size, num_layers=args.encoder_layers, dropout=dropout, batch_first=True) elif args.encoder_mode == 'gru' or args.encoder_mode == 'bigru': rnn_layer = GRU(args.hidden_size, args.hidden_size, num_layers=args.encoder_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True) else: raise ValueError('Invalid encoder mode: %s' % (args.encoder_mode)) return Encoder(input_size, args.hidden_size, rnn_layer, bidirectional=bidirectional)
def __init__(self, vocab_size, emb_dim, hidden_size, weight, kqv_dim, rnn_type='gru', bidirectional=False, batch_first=False, padding_idx=None): super(ZXOTextEncoder, self).__init__() self.embed = nn.Embedding(vocab_size, embedding_dim=emb_dim, _weight=weight) if rnn_type == 'rnn': self.rnn = RNN(emb_dim, hidden_size, bidirectional=bidirectional, num_layers=6, batch_first=batch_first) elif rnn_type == 'gru': self.rnn = GRU(emb_dim, hidden_size, bidirectional=bidirectional, num_layers=6, batch_first=batch_first) elif rnn_type == 'lstm': self.rnn = LSTM(emb_dim, hidden_size, bidirectional=bidirectional, num_layers=6, batch_first=batch_first) self.attn = Attn(emb_dim, kqv_dim) self.linear = nn.Linear(emb_dim, 2)
def __init__(self, idim: int, hdim: int, nlayers: int = 1, enc_type: str = "blstm"): """ This represents the computation that happens for 1 RNN Layer Uses packing,padding utils from Pytorch :param int input_dim- The input size of the RNN :param int hidden_dim- The hidden size of the RNN :param int nlayers- Number of RNN Layers :param str enc_type : Type of encoder- RNN/GRU/LSTM """ super(RNNLayer, self).__init__() bidir = True if enc_type[0] == 'b' else False enc_type = enc_type[1:] if enc_type[0] == 'b' else enc_type if enc_type == "rnn": self.elayer = RNN(idim, hdim, nlayers, batch_first=True, bidirectional=bidir) elif enc_type == "lstm": self.elayer = LSTM(idim, hdim, nlayers, batch_first=True, bidirectional=bidir) else: self.elayer = GRU(idim, hdim, nlayers, batch_first=True, bidirectional=bidir)
def forward(self, x): "Pass through" outs = [] for l in self.conv1s: out = pad_layer(x, l) outs.append(out) out = torch.cat(outs + [x], dim=1) out = F.leaky_relu(out, negative_slope=self.ns) out = self.conv_block(out, [self.conv2], [self.ins_norm1, self.drop1], res=False) emb2 = self.emb2(out) out = self.conv_block(out, [self.conv3, self.conv4], [self.ins_norm2, self.drop2]) emb4 = self.emb4(out) out = self.conv_block(out, [self.conv5, self.conv6], [self.ins_norm3, self.drop3]) emb6 = self.emb6(out) out = self.conv_block(out, [self.conv7, self.conv8], [self.ins_norm4, self.drop4]) emb8 = self.emb8(out) # dense layer out = self.dense_block(out, [self.dense1, self.dense2], [self.ins_norm5, self.drop5], res=True) embd2 = self.embd2(out) out = self.dense_block(out, [self.dense3, self.dense4], [self.ins_norm6, self.drop6], res=True) embd4 = self.embd4(out) out_rnn = RNN(out, self.RNN) embrnn = self.embrnn(out) out = torch.cat([out, out_rnn], dim=1) out = linear(out, self.linear) out = F.leaky_relu(out, negative_slope=self.ns) return (out, (emb2, emb4, emb6, emb8, embd2, embd4, embrnn))
def __init__(self, hidden_size, *args, **kwargs): super().__init__() self.module = RNN(hidden_size=hidden_size, *args, **kwargs, nonlinearity='relu') # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN # I'm not sure what is going on here, this is what weight_drop does so I stick to it self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition # We need to register it in this module to make it work with weight dropout w_hh = FloatTensor(hidden_size).type_as(getattr(self.module, 'weight_hh_l0').data) w_hh.uniform_(-1, 1) getattr(self.module, 'bias_ih_l0').data.fill_(0) getattr(self.module, 'bias_hh_l0').data.fill_(0) self.register_parameter(name='weight_hh_l0', param=Parameter(w_hh)) del self.module._parameters['weight_hh_l0']
def __init__(self): super(_TolstoiRNNVersion, self).__init__() self.lstm = RNN( input_size=self.hidden_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, dropout=0.36, batch_first=True, )
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._args = args self._kwargs = kwargs RNN = get_rnn_impl("CPU", self.rnn_type, kwargs["layer_norm"]) self.rnns = nn.ModuleList() for i, o in zip(self._is, self._os): # r = RNN(i, o, batch_first=self.batch_first, zoneout=ZONEOUT) r = RNN(i, o, batch_first=self.batch_first) self.rnns.append(r)
def __init__(self, input_size, args): super(RNN_model_linear, self).__init__() self.RNN = RNN(input_size=input_size, hidden_size=args.hidden_units, num_layers=args.lstm_layer) if 'bi' in args.model: bi_num = 2 else: bi_num = 1 self.classifier = nn.Linear(args.hidden_units * bi_num, args.n_label) self.args = args
def __init__(self, skill_size, rnn_h_size, rnn_layer_size, dropout_rate): """ :param skill_size: int 知识点数量 :param rnn_h_size: int rnn隐藏单元数量 :param rnn_layer_size: int rnn隐藏层数量 :param dropout_rate: float """ super(DktNet, self).__init__() self.rnn = RNN(skill_size * 2, rnn_h_size, rnn_layer_size) self.dropout = Dropout(p=dropout_rate) self.linear = Linear(rnn_h_size, skill_size) self.sigmoid = Sigmoid()
def initialize_rnn(rnn: nn.RNN) -> None: """ Initializes an RNN so that biases have values of 0 and weights are Xavier normalized. Args: rnn: nn.RNN. The RNN to initialize. """ for name, param in rnn.named_parameters(): # Set biases to zero if 'bias' in name: nn.init.constant_(param, 0.0) elif 'weight' in name: # Otherwise do Xavier initialization nn.init.xavier_normal_(param)
def build_rnn_layer(args): multiplier = 1 if args.attention_mode != 'none' and args.encoder_mode == 'bigru': multiplier = 3 elif args.attention_mode != 'none' and args.encoder_mode != 'bigru': multiplier = 2 dropout = args.rnn_dropout if args.decoder_layers != 1 else 0 if args.decoder_mode == 'rnn': return RNN(multiplier * args.hidden_size, args.hidden_size, num_layers=args.decoder_layers, dropout=dropout, batch_first=True) elif args.decoder_mode == 'gru': return GRU(multiplier * args.hidden_size, args.hidden_size, num_layers=args.decoder_layers, dropout=dropout, batch_first=True) else: raise ValueError('Invalid decoder mode: %s' % (args.decoder_mode))
def forward(self, x, c): "Pass through" # emb = self.emb(c) (emb2, emb4, emb6, emb8, embd2, embd4, embrnn) = c # conv layer out = self.conv_block(x, [self.conv1, self.conv2], self.ins_norm1, embrnn, res=True) out = self.conv_block(out, [self.conv3, self.conv4], self.ins_norm2, embd4, res=True) out = self.conv_block(out, [self.conv5, self.conv6], self.ins_norm3, embd2, res=True) # dense layer out = self.dense_block(out, emb8, [self.dense1, self.dense2], self.ins_norm4, res=True) out = self.dense_block(out, emb6, [self.dense3, self.dense4], self.ins_norm5, res=True) out_appended = append_emb(emb4, out.size(2), out) # rnn layer out_rnn = RNN(out_appended, self.RNN) out = torch.cat([out, out_rnn], dim=1) out = append_emb(emb2, out.size(2), out) out = linear(out, self.dense5) out = F.leaky_relu(out, negative_slope=self.ns) out = linear(out, self.linear) out = out.exp() return out
from backpack.custom_module.permute import Permute from backpack.custom_module.reduce_tuple import ReduceTuple SHARED_SETTINGS = SECONDORDER_SETTINGS LOCAL_SETTINGS = [] ################################################################## # RNN settings # ################################################################## LOCAL_SETTINGS += [ # RNN settings { "input_fn": lambda: rand(8, 5, 6), "module_fn": lambda: Sequential( RNN(input_size=6, hidden_size=3, batch_first=True), ReduceTuple(index=0), Permute(0, 2, 1), Flatten(), ), "loss_function_fn": lambda: MSELoss(), "target_fn": lambda: regression_targets((8, 3 * 5)), }, { "input_fn": lambda: rand(4, 3, 5), "module_fn": lambda: Sequential( LSTM(input_size=5, hidden_size=4, batch_first=True),
def __init__(self, args, emb_index, bidirec, initial_mean_value, overal_maxlen=0): super(REGRESSION, self).__init__() self.dropout_W = 0.5 # default=0.5 self.dropout_U = 0.1 # default=0.1 self.args = args cnn_border_mode = 'same' if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) if args.recurrent_unit == 'lstm': from torch.nn import LSTM as RNN elif args.recurrent_unit == 'gru': from torch.nn import GRU as RNN elif args.recurrent_unit == 'simple': from torch.nn import RNN as RNN self.embed = Embedding(args.vocab_size, args.emb_dim) outputdim = args.emb_dim if args.cnn_dim > 0: self.conv = Conv1DWithMasking(outputdim, args.cnn_dim, args.cnn_window_size, 1, (args.cnn_window_size - 1) // 2) outputdim = args.cnn_dim if args.rnn_dim > 0: self.rnn = RNN(outputdim, args.rnn_dim, num_layers=1, bias=True, dropout=self.dropout_W, batch_first=True, bidirectional=bidirec) outputdim = args.rnn_dim if bidirec == 1: outputdim = args.rnn_dim * 2 if args.dropout_prob > 0: self.dropout = Dropout(args.dropout_prob) if args.aggregation == 'mot': self.mot = MeanOverTime() elif args.aggregation.startswith('att'): self.att = Attention(outputdim, op=args.aggregation, activation='tanh', init_stdev=0.01) self.linear = Linear(outputdim, num_outputs) # if not args.skip_init_bias: # self.linear.bias.data = (torch.log(initial_mean_value) - torch.log(1 - initial_mean_value)).float() self.emb_index = emb_index if args.emb_path: from .w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) self.embed[ emb_index].weight.data = emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].get_weights()) logger.info(' Done')
def build(self, input_dim): self.input_dim = input_dim self.layer = TorchRecurrent(self.input_dim, self.units, self.length)
def __init__(self, input_size, args): super(RNN_model, self).__init__() self.RNN = RNN(input_size=input_size, hidden_size=args.hidden_units, num_layers=args.lstm_layer) self.args = args
class TestEncoderBase(AllenNlpTestCase): def setUp(self): super().setUp() self.lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.rnn = RNN(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.encoder_base = _EncoderBase(stateful=True) tensor = torch.rand([5, 7, 3]) tensor[1, 6:, :] = 0 tensor[3, 2:, :] = 0 self.tensor = tensor mask = torch.ones(5, 7).bool() mask[1, 6:] = False mask[2, :] = False # <= completely masked mask[3, 2:] = False mask[4, :] = False # <= completely masked self.mask = mask self.batch_size = 5 self.num_valid = 3 sequence_lengths = get_lengths_from_binary_sequence_mask(mask) _, _, restoration_indices, sorting_indices = sort_batch_by_length( tensor, sequence_lengths) self.sorting_indices = sorting_indices self.restoration_indices = restoration_indices def test_non_stateful_states_are_sorted_correctly(self): encoder_base = _EncoderBase(stateful=False) initial_states = (torch.randn(6, 5, 7), torch.randn(6, 5, 7)) # Check that we sort the state for non-stateful encoders. To test # we'll just use a "pass through" encoder, as we aren't actually testing # the functionality of the encoder here anyway. _, states, restoration_indices = encoder_base.sort_and_run_forward( lambda *x: x, self.tensor, self.mask, initial_states) # Our input tensor had 2 zero length sequences, so we need # to concat a tensor of shape # (num_layers * num_directions, batch_size - num_valid, hidden_dim), # to the output before unsorting it. zeros = torch.zeros([6, 2, 7]) # sort_and_run_forward strips fully-padded instances from the batch; # in order to use the restoration_indices we need to add back the two # that got stripped. What we get back should match what we started with. for state, original in zip(states, initial_states): assert list(state.size()) == [6, 3, 7] state_with_zeros = torch.cat([state, zeros], 1) unsorted_state = state_with_zeros.index_select( 1, restoration_indices) for index in [0, 1, 3]: numpy.testing.assert_array_equal( unsorted_state[:, index, :].data.numpy(), original[:, index, :].data.numpy()) def test_get_initial_states(self): # First time we call it, there should be no state, so we should return None. assert (self.encoder_base._get_initial_states( self.batch_size, self.num_valid, self.sorting_indices) is None) # First test the case that the previous state is _smaller_ than the current state input. initial_states = (torch.randn([1, 3, 7]), torch.randn([1, 3, 7])) self.encoder_base._states = initial_states # sorting indices are: [0, 1, 3, 2, 4] returned_states = self.encoder_base._get_initial_states( self.batch_size, self.num_valid, self.sorting_indices) correct_expanded_states = [ torch.cat([state, torch.zeros([1, 2, 7])], 1) for state in initial_states ] # State should have been expanded with zeros to have shape (1, batch_size, hidden_size). numpy.testing.assert_array_equal( self.encoder_base._states[0].data.numpy(), correct_expanded_states[0].data.numpy()) numpy.testing.assert_array_equal( self.encoder_base._states[1].data.numpy(), correct_expanded_states[1].data.numpy()) # The returned states should be of shape (1, num_valid, hidden_size) and # they also should have been sorted with respect to the indices. # sorting indices are: [0, 1, 3, 2, 4] correct_returned_states = [ state.index_select(1, self.sorting_indices)[:, :self.num_valid, :] for state in correct_expanded_states ] numpy.testing.assert_array_equal( returned_states[0].data.numpy(), correct_returned_states[0].data.numpy()) numpy.testing.assert_array_equal( returned_states[1].data.numpy(), correct_returned_states[1].data.numpy()) # Now test the case that the previous state is larger: original_states = (torch.randn([1, 10, 7]), torch.randn([1, 10, 7])) self.encoder_base._states = original_states # sorting indices are: [0, 1, 3, 2, 4] returned_states = self.encoder_base._get_initial_states( self.batch_size, self.num_valid, self.sorting_indices) # State should not have changed, as they were larger # than the batch size of the requested states. numpy.testing.assert_array_equal( self.encoder_base._states[0].data.numpy(), original_states[0].data.numpy()) numpy.testing.assert_array_equal( self.encoder_base._states[1].data.numpy(), original_states[1].data.numpy()) # The returned states should be of shape (1, num_valid, hidden_size) and they # also should have been sorted with respect to the indices. correct_returned_state = [ x.index_select(1, self.sorting_indices)[:, :self.num_valid, :] for x in original_states ] numpy.testing.assert_array_equal( returned_states[0].data.numpy(), correct_returned_state[0].data.numpy()) numpy.testing.assert_array_equal( returned_states[1].data.numpy(), correct_returned_state[1].data.numpy()) def test_update_states(self): assert self.encoder_base._states is None initial_states = torch.randn([1, 5, 7]), torch.randn([1, 5, 7]) index_selected_initial_states = ( initial_states[0].index_select(1, self.restoration_indices), initial_states[1].index_select(1, self.restoration_indices), ) self.encoder_base._update_states(initial_states, self.restoration_indices) # State was None, so the updated state should just be the sorted given state. numpy.testing.assert_array_equal( self.encoder_base._states[0].data.numpy(), index_selected_initial_states[0].data.numpy()) numpy.testing.assert_array_equal( self.encoder_base._states[1].data.numpy(), index_selected_initial_states[1].data.numpy()) new_states = torch.randn([1, 5, 7]), torch.randn([1, 5, 7]) # tensor has 2 completely masked rows, so the last 2 rows of the _sorted_ states # will be completely zero, having been appended after calling the respective encoder. new_states[0][:, -2:, :] = 0 new_states[1][:, -2:, :] = 0 index_selected_new_states = ( new_states[0].index_select(1, self.restoration_indices), new_states[1].index_select(1, self.restoration_indices), ) self.encoder_base._update_states(new_states, self.restoration_indices) # Check that the update _preserved_ the state for the rows which were # completely masked (2 and 4): for index in [2, 4]: numpy.testing.assert_array_equal( self.encoder_base._states[0][:, index, :].data.numpy(), index_selected_initial_states[0][:, index, :].data.numpy(), ) numpy.testing.assert_array_equal( self.encoder_base._states[1][:, index, :].data.numpy(), index_selected_initial_states[1][:, index, :].data.numpy(), ) # Now the states which were updated: for index in [0, 1, 3]: numpy.testing.assert_array_equal( self.encoder_base._states[0][:, index, :].data.numpy(), index_selected_new_states[0][:, index, :].data.numpy(), ) numpy.testing.assert_array_equal( self.encoder_base._states[1][:, index, :].data.numpy(), index_selected_new_states[1][:, index, :].data.numpy(), ) # Now test the case that the new state is smaller: small_new_states = torch.randn([1, 3, 7]), torch.randn([1, 3, 7]) # pretend the 2nd sequence in the batch was fully masked. small_restoration_indices = torch.LongTensor([2, 0, 1]) small_new_states[0][:, 0, :] = 0 small_new_states[1][:, 0, :] = 0 index_selected_small_states = ( small_new_states[0].index_select(1, small_restoration_indices), small_new_states[1].index_select(1, small_restoration_indices), ) self.encoder_base._update_states(small_new_states, small_restoration_indices) # Check the index for the row we didn't update is the same as the previous step: for index in [1, 3]: numpy.testing.assert_array_equal( self.encoder_base._states[0][:, index, :].data.numpy(), index_selected_new_states[0][:, index, :].data.numpy(), ) numpy.testing.assert_array_equal( self.encoder_base._states[1][:, index, :].data.numpy(), index_selected_new_states[1][:, index, :].data.numpy(), ) # Indices we did update: for index in [0, 2]: numpy.testing.assert_array_equal( self.encoder_base._states[0][:, index, :].data.numpy(), index_selected_small_states[0][:, index, :].data.numpy(), ) numpy.testing.assert_array_equal( self.encoder_base._states[1][:, index, :].data.numpy(), index_selected_small_states[1][:, index, :].data.numpy(), ) # We didn't update index 4 in the previous step either, so it should be equal to the # 4th index of initial states. numpy.testing.assert_array_equal( self.encoder_base._states[0][:, 4, :].data.numpy(), index_selected_initial_states[0][:, 4, :].data.numpy(), ) numpy.testing.assert_array_equal( self.encoder_base._states[1][:, 4, :].data.numpy(), index_selected_initial_states[1][:, 4, :].data.numpy(), ) def test_reset_states(self): # Initialize the encoder states. assert self.encoder_base._states is None initial_states = torch.randn([1, 5, 7]), torch.randn([1, 5, 7]) index_selected_initial_states = ( initial_states[0].index_select(1, self.restoration_indices), initial_states[1].index_select(1, self.restoration_indices), ) self.encoder_base._update_states(initial_states, self.restoration_indices) # Check that only some of the states are reset when a mask is provided. mask = torch.tensor([True, True, False, False, False]) self.encoder_base.reset_states(mask) # First two states should be zeros numpy.testing.assert_array_equal( self.encoder_base._states[0][:, :2, :].data.numpy(), torch.zeros_like(initial_states[0])[:, :2, :].data.numpy(), ) numpy.testing.assert_array_equal( self.encoder_base._states[1][:, :2, :].data.numpy(), torch.zeros_like(initial_states[1])[:, :2, :].data.numpy(), ) # Remaining states should be the same numpy.testing.assert_array_equal( self.encoder_base._states[0][:, 2:, :].data.numpy(), index_selected_initial_states[0][:, 2:, :].data.numpy(), ) numpy.testing.assert_array_equal( self.encoder_base._states[1][:, 2:, :].data.numpy(), index_selected_initial_states[1][:, 2:, :].data.numpy(), ) # Check that error is raised if mask has wrong batch size. bad_mask = torch.tensor([True, True, False]) with self.assertRaises(ValueError): self.encoder_base.reset_states(bad_mask) # Check that states are reset to None if no mask is provided. self.encoder_base.reset_states() assert self.encoder_base._states is None def test_non_contiguous_initial_states_handled(self): # Check that the encoder is robust to non-contiguous initial states. # Case 1: Encoder is not stateful # A transposition will make the tensors non-contiguous, start them off at the wrong shape # and transpose them into the right shape. encoder_base = _EncoderBase(stateful=False) initial_states = ( torch.randn(5, 6, 7).permute(1, 0, 2), torch.randn(5, 6, 7).permute(1, 0, 2), ) assert not initial_states[0].is_contiguous( ) and not initial_states[1].is_contiguous() assert initial_states[0].size() == torch.Size([6, 5, 7]) assert initial_states[1].size() == torch.Size([6, 5, 7]) # We'll pass them through an LSTM encoder and a vanilla RNN encoder to make sure it works # whether the initial states are a tuple of tensors or just a single tensor. encoder_base.sort_and_run_forward(self.lstm, self.tensor, self.mask, initial_states) encoder_base.sort_and_run_forward(self.rnn, self.tensor, self.mask, initial_states[0]) # Case 2: Encoder is stateful # For stateful encoders, the initial state may be non-contiguous if its state was # previously updated with non-contiguous tensors. As in the non-stateful tests, we check # that the encoder still works on initial states for RNNs and LSTMs. final_states = initial_states # Check LSTM encoder_base = _EncoderBase(stateful=True) encoder_base._update_states(final_states, self.restoration_indices) encoder_base.sort_and_run_forward(self.lstm, self.tensor, self.mask) # Check RNN encoder_base.reset_states() encoder_base._update_states([final_states[0]], self.restoration_indices) encoder_base.sort_and_run_forward(self.rnn, self.tensor, self.mask) @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires cuda") def test_non_contiguous_initial_states_handled_on_gpu(self): # Some PyTorch operations which produce contiguous tensors on the CPU produce # non-contiguous tensors on the GPU (e.g. forward pass of an RNN when batch_first=True). # Accordingly, we perform the same checks from previous test on the GPU to ensure the # encoder is not affected by which device it is on. # Case 1: Encoder is not stateful # A transposition will make the tensors non-contiguous, start them off at the wrong shape # and transpose them into the right shape. encoder_base = _EncoderBase(stateful=False).cuda() initial_states = ( torch.randn(5, 6, 7).cuda().permute(1, 0, 2), torch.randn(5, 6, 7).cuda().permute(1, 0, 2), ) assert not initial_states[0].is_contiguous( ) and not initial_states[1].is_contiguous() assert initial_states[0].size() == torch.Size([6, 5, 7]) assert initial_states[1].size() == torch.Size([6, 5, 7]) # We'll pass them through an LSTM encoder and a vanilla RNN encoder to make sure it works # whether the initial states are a tuple of tensors or just a single tensor. encoder_base.sort_and_run_forward(self.lstm.cuda(), self.tensor.cuda(), self.mask.cuda(), initial_states) encoder_base.sort_and_run_forward(self.rnn.cuda(), self.tensor.cuda(), self.mask.cuda(), initial_states[0]) # Case 2: Encoder is stateful # For stateful encoders, the initial state may be non-contiguous if its state was # previously updated with non-contiguous tensors. As in the non-stateful tests, we check # that the encoder still works on initial states for RNNs and LSTMs. final_states = initial_states # Check LSTM encoder_base = _EncoderBase(stateful=True).cuda() encoder_base._update_states(final_states, self.restoration_indices.cuda()) encoder_base.sort_and_run_forward(self.lstm.cuda(), self.tensor.cuda(), self.mask.cuda()) # Check RNN encoder_base.reset_states() encoder_base._update_states([final_states[0]], self.restoration_indices.cuda()) encoder_base.sort_and_run_forward(self.rnn.cuda(), self.tensor.cuda(), self.mask.cuda())
def train_torch(self, X, y_true, batch_size, learning_rate, num_epochs, print_many, verbose): self.batch_size = batch_size progresses = { int(num_epochs // (100 / i)): i for i in range(1, 101, 1) } t0 = counter() durations = [] device = torch.device('cuda:0') rnn = RNN(input_size=self.input_dim, hidden_size=self.hidden_dim, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False).to(device) fc = FCLayer(self.hidden_dim, self.output_size, bias=True).to(device) params = [rnn.parameters(), fc.params()] optimizer = SGD(chain(*params), lr=learning_rate) for epoch in range(num_epochs): epoch_loss = 0 for i in range(self.max_iters): x_batch = X[i * self.batch_size:(i + 1) * self.batch_size] x_batch = np.array( [x_batch[:, step, :] for step in range(self.time_steps)]) y_true_batch = y_true[i * self.batch_size:(i + 1) * self.batch_size] batch_size_local = x_batch.shape[1] # convert to pytorch tensor y_true_batch = y_true_batch.astype(np.int64) y_true_batch = torch.tensor(y_true_batch, requires_grad=False).to(device) x_batch = x_batch.astype(np.float32) x_batch = torch.tensor(x_batch, requires_grad=True).to(device) # forward pass h_stack, h_last = rnn.forward(x_batch, hx=None) fc_out = fc.forward(h_last) log_y_pred = F.log_softmax(input=fc_out, dim=2) log_y_pred = log_y_pred.view(batch_size_local, self.output_size) loss = F.nll_loss(input=log_y_pred, target=y_true_batch, reduction='mean') # update gradient optimizer.zero_grad() loss.backward() epoch_loss += loss.item() optimizer.step() durations.append(counter() - t0) t0 = counter() if (print_many and epoch % 100 == 0) or (not print_many and epoch in progresses): print( f"after epoch: {epoch}, epoch_losses: {round(epoch_loss / self.max_iters, 3)}" ) if verbose > 0: avg_epoch_time = sum(durations) / len(durations) print("average epoch time:", round(avg_epoch_time, 3)) return avg_epoch_time
import torch from torch.nn import RNN, LSTM rnn = RNN(input_size=4, hidden_size=5, batch_first=True) inputs = torch.rand(2, 3, 4) outputs, hn = rnn(inputs) print(outputs, outputs.shape) print(hn, hn.shape) lstm = LSTM(input_size=4, hidden_size=6, batch_first=True) outputs, (hn, cn) = lstm(inputs) print(outputs, outputs.shape) print(hn, hn.shape) print(cn, cn.shape)
def build( self, name: str, embedding_dim: int, hidden_size: int = 32, num_filters: int = 1, num_heads: int = 3, output_dim: int = 30, ngram_filter_sizes: Tuple = (1, 2, 3, 4, 5), filters: List[List[int]] = [[1, 4], [2, 8], [3, 16], [4, 32], [5, 64]], num_highway: int = 2, projection_dim: int = 16 ) -> Callable[[Tensor, Optional[Tensor]], Tensor]: encoder = None if name in {'boe'}: encoder = BagOfEmbeddingsEncoder(embedding_dim=embedding_dim, averaged=True) elif name in {'cnn'}: encoder = CnnEncoder(embedding_dim=embedding_dim, num_filters=num_filters, ngram_filter_sizes=ngram_filter_sizes, output_dim=output_dim) elif name in {'cnnh'}: encoder = CnnHighwayEncoder(embedding_dim=embedding_dim, filters=filters, num_highway=num_highway, projection_dim=projection_dim, projection_location="after_cnn") elif name in {'rnn'}: rnn = RNN(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True) encoder = PytorchSeq2VecWrapper(rnn) elif name in {'lstm'}: lstm = LSTM(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True) encoder = PytorchSeq2VecWrapper(lstm) elif name in {'gru'}: gru = GRU(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True) encoder = PytorchSeq2VecWrapper(gru) elif name in {'intra'}: intra = IntraSentenceAttentionEncoder(input_dim=embedding_dim, projection_dim=output_dim, combination="1,2") aggr = PytorchSeq2VecWrapper( LSTM(input_size=embedding_dim + output_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True)) encoder = lambda x, y: aggr(intra(x, y), y) elif name in {'multihead'}: sim = MultiHeadedSimilarity(num_heads, embedding_dim) multi = IntraSentenceAttentionEncoder( input_dim=embedding_dim, projection_dim=embedding_dim, similarity_function=sim, num_attention_heads=num_heads, combination="1+2") aggr = PytorchSeq2VecWrapper( LSTM(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True)) encoder = lambda x, y: aggr(multi(x, y), y) assert encoder is not None return encoder
#x = x + 10 #Sx_all = torch.cat((Sx_all, x), dim=0) # Select Training Data. Sx_tr, y_tr = Sx_all[subset == 0], y_all[subset == 0] # Set Mean to 0, and variance to 1. -> Normal Distribution mu_tr = Sx_tr.mean(dim=0) std_tr = Sx_tr.std(dim=0) Sx_tr = (Sx_tr - mu_tr) / std_tr # Design ML Model num_inputs = Sx_tr.shape[-1] num_classes = y_tr.cpu().unique().numel() model = Sequential(Linear(num_inputs, num_classes), LogSoftmax(dim=1)) rnn = RNN(336, 336) optimizer = Adam(model.parameters()) criterion = NLLLoss() if use_cuda: model = model.cuda() criterion = criterion.cuda() # Number of signals to use in each gradient descent step (batch). batch_size = 32 # Number of epochs. num_epochs = 80 # Learning rate for Adam. lr = 1e-2 # set number of batches
def __init__(self, nb_features, nb_frames, nb_layers, hidden_size, bidirectional=False, mixture_mean=None, mixture_scale=None, label_mean=None, activation_function="relu", recurrent_layer="lstm"): super(Generalised_Recurrent_Model, self).__init__() # set the hidden size self.hidden_size = hidden_size # create parameters with torch tensors for mean and scale self.mixture_mean = Parameter( torch.from_numpy(np.copy(mixture_mean).astype(np.float32))) self.label_scale = Parameter( torch.from_numpy(np.copy(mixture_scale).astype(np.float32))) # fully connected dense layer for input dimensionality reduction self.fc_dr = Linear(in_features=nb_features, out_features=hidden_size) # different recurrent layers recurrent_layers = { 'lstm': LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=nb_layers, batch_first=True, bidirectional=bidirectional), 'gru': GRU(input_size=hidden_size, hidden_size=hidden_size, num_layers=nb_layers, batch_first=True, bidirectional=bidirectional), 'rnn': RNN(input_size=hidden_size, hidden_size=hidden_size, num_layers=nb_layers, batch_first=True, bidirectional=bidirectional) } # recurrent layer self.recurrent_layer = recurrent_layers[recurrent_layer] self.lstm_output = hidden_size * 2 if bidirectional else hidden_size # fully connected dense layer for input dimensionality expansion self.fc_de = Linear(in_features=self.lstm_output, out_features=nb_features) # output label scaling self.label_scale = Parameter(torch.ones(nb_features)) # output label mean self.label_mean = Parameter( torch.from_numpy(np.copy(label_mean).astype(np.float32))) # activation function activation_functions = {'relu': F.relu, 'tanh': torch.tanh} self.activation_function = activation_functions[activation_function]