def check_gradien_on_captioning_rnn(): """ perform numeric gradient checking on the CaptioningRNN class; you should see errors around the order of e-6 or less. """ np.random.seed(231) batch_size = 2 timesteps = 3 input_dim = 4 wordvec_dim = 5 hidden_dim = 6 word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3} vocab_size = len(word_to_idx) captions = np.random.randint(vocab_size, size=(batch_size, timesteps)) features = np.random.randn(batch_size, input_dim) model = CaptioningRNN(word_to_idx, input_dim=input_dim, wordvec_dim=wordvec_dim, hidden_dim=hidden_dim, cell_type='rnn', dtype=np.float64) loss, grads = model.loss(features, captions) for param_name in sorted(grads): f = lambda _: model.loss(features, captions)[0] param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6) e = rel_error(param_grad_num, grads[param_name]) print('%s relative error: %e' % (param_name, e))
def check_lstm_captioning_model(): """You should see a difference on the order of e-10 or less.""" N, D, W, H = 10, 20, 30, 40 word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3} V = len(word_to_idx) T = 13 model = CaptioningRNN(word_to_idx, input_dim=D, wordvec_dim=W, hidden_dim=H, cell_type='lstm', dtype=np.float64) # Set all model parameters to fixed values for k, v in model.params.items(): model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape) features = np.linspace(-0.5, 1.7, num=N * D).reshape(N, D) captions = (np.arange(N * T) % V).reshape(N, T) loss, grads = model.loss(features, captions) expected_loss = 9.82445935443 print('loss: ', loss) print('expected loss: ', expected_loss) print('difference: ', abs(loss - expected_loss))
def check_rnn_for_image_captioning(): """ Now that you have implemented the necessary layers, you can combine them to build an image captioning model. Open the file cs231n/classifiers/rnn.py and look at the CaptioningRNN class. Implement the forward and backward pass of the model in the loss function. For now you only need to implement the case where cell_type='rnn' for vanialla RNNs; you will implement the LSTM case later. After doing so, run the following to check your forward pass using a small test case; you should see error on the order of e-10 or less. """ N, D, W, H = 10, 20, 30, 40 word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3} V = len(word_to_idx) T = 13 model = CaptioningRNN(word_to_idx, input_dim=D, wordvec_dim=W, hidden_dim=H, cell_type='rnn', dtype=np.float64) # Set all model parameters to fixed values for k, v in model.params.items(): model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape) features = np.linspace(-1.5, 0.3, num=(N * D)).reshape(N, D) captions = (np.arange(N * T) % V).reshape(N, T) loss, grads = model.loss(features, captions) expected_loss = 9.83235591003 print('loss: ', loss) print('expected loss: ', expected_loss) print('difference: ', abs(loss - expected_loss))
model = CaptioningRNN(word_to_idx, input_dim=D, wordvec_dim=W, hidden_dim=H, cell_type='lstm', dtype=np.float64) # Set all model parameters to fixed values for k, v in model.params.iteritems(): model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape) features = np.linspace(-0.5, 1.7, num=N*D).reshape(N, D) captions = (np.arange(N * T) % V).reshape(N, T) loss, grads = model.loss(features, captions) expected_loss = 9.82445935443 print 'loss: ', loss print 'expected loss: ', expected_loss print 'difference: ', abs(loss - expected_loss) small_data = load_coco_data(max_train=50) small_lstm_model = CaptioningRNN( cell_type='lstm', word_to_idx=data['word_to_idx'], input_dim=data['train_features'].shape[1], hidden_dim=512, wordvec_dim=256, dtype=np.float32,
model = CaptioningRNN(word_to_idx, input_dim=D, wordvec_dim=W, hidden_dim=H, cell_type='rnn', dtype=np.float64) # Set all model parameters to fixed values for k, v in model.params.items(): model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape) features = np.linspace(-1.5, 0.3, num=(N * D)).reshape(N, D) captions = (np.arange(N * T) % V).reshape(N, T) loss, grads = model.loss(features, captions) expected_loss = 9.83235591003 print('loss: ', loss) print('expected loss: ', expected_loss) print('difference: ', abs(loss - expected_loss)) # Run the following cell to perform numeric gradient checking on the `CaptioningRNN` class; you should errors around `5e-6` or less. # In[ ]: np.random.seed(231) batch_size = 2 timesteps = 3
model = CaptioningRNN(word_to_idx, input_dim=D, wordvec_dim=W, hidden_dim=H, cell_type='rnn', dtype=np.float64) # Set all model parameters to fixed values for k, v in model.params.iteritems(): model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape) features = np.linspace(-1.5, 0.3, num=(N * D)).reshape(N, D) captions = (np.arange(N * T) % V).reshape(N, T) loss, grads = model.loss(features, captions) expected_loss = 9.83235591003 print 'loss: ', loss print 'expected loss: ', expected_loss print 'difference: ', abs(loss - expected_loss) batch_size = 2 timesteps = 3 input_dim = 4 wordvec_dim = 5 hidden_dim = 6 word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3} vocab_size = len(word_to_idx) captions = np.random.randint(vocab_size, size=(batch_size, timesteps))