def __init__(self, vocab_size, hidden_size=32): super(CharRNN, self).__init__() self.rnn = autograd.LSTM(vocab_size, hidden_size) self.dense = autograd.Linear(hidden_size, vocab_size) self.optimizer = opt.SGD(0.01) self.hidden_size = hidden_size self.vocab_size = vocab_size self.hx = tensor.Tensor((1, self.hidden_size)) self.cx = tensor.Tensor((1, self.hidden_size))
def test_LSTM_gpu_tiny_ops_shape_check(self): # gradients shape check. inputs, target, h0 = prepare_inputs_targets_for_rnn_test() c_0 = np.random.random((2, 1)).astype(np.float32) c0 = tensor.Tensor(device=gpu_dev, data=c_0) rnn = autograd.LSTM(3, 2) hs, _, _ = rnn(inputs, (h0, c0)) loss = autograd.softmax_cross_entropy(hs[0], target[0]) for i in range(1, len(hs)): l = autograd.softmax_cross_entropy(hs[i], target[i]) loss = autograd.add(loss, l) # d=autograd.infer_dependency(loss.creator) # print(d) for t, dt in autograd.backward(loss): self.check_shape(t.shape, dt.shape)
def test_numerical_gradients_check_for_lstm(self): inputs, target, h0 = prepare_inputs_targets_for_rnn_test() c_0 = np.zeros((2, 2)).astype(np.float32) c0 = tensor.Tensor(device=gpu_dev, data=c_0) rnn = autograd.LSTM(3, 2) def lstm_forward(): hs, _, _ = rnn(inputs, (h0, c0)) loss = autograd.softmax_cross_entropy(hs[0], target[0]) for i in range(1, len(hs)): l = autograd.softmax_cross_entropy(hs[i], target[i]) loss = autograd.add(loss, l) return loss loss1 = lstm_forward() auto_grads = autograd.gradients(loss1) for param in rnn.params: auto_grad = tensor.to_numpy(auto_grads[param]) self.gradients_check(lstm_forward, param, auto_grad)